From 6d313ace2d4b04f38aba75c91125a2d5b8d5b0df Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 9 Sep 2022 11:09:52 +0100 Subject: [PATCH 1/6] C++: Copy the new use-use flow code to experimental. --- .../semmle/code/cpp/ir/dataflow/DataFlow.qll | 26 + .../semmle/code/cpp/ir/dataflow/DataFlow2.qll | 16 + .../semmle/code/cpp/ir/dataflow/DataFlow3.qll | 16 + .../semmle/code/cpp/ir/dataflow/DataFlow4.qll | 16 + .../code/cpp/ir/dataflow/ResolveCall.qll | 23 + .../code/cpp/ir/dataflow/TaintTracking.qll | 23 + .../code/cpp/ir/dataflow/TaintTracking2.qll | 15 + .../code/cpp/ir/dataflow/TaintTracking3.qll | 15 + .../ir/dataflow/internal/DataFlowDispatch.qll | 273 + .../cpp/ir/dataflow/internal/DataFlowImpl.qll | 4450 +++++++++++++++++ .../ir/dataflow/internal/DataFlowImpl2.qll | 4450 +++++++++++++++++ .../ir/dataflow/internal/DataFlowImpl3.qll | 4450 +++++++++++++++++ .../ir/dataflow/internal/DataFlowImpl4.qll | 4450 +++++++++++++++++ .../dataflow/internal/DataFlowImplCommon.qll | 1374 +++++ .../internal/DataFlowImplConsistency.qll | 212 + .../internal/DataFlowImplSpecific.qll | 11 + .../ir/dataflow/internal/DataFlowPrivate.qll | 560 +++ .../cpp/ir/dataflow/internal/DataFlowUtil.qll | 1346 +++++ .../cpp/ir/dataflow/internal/ModelUtil.qll | 93 + .../ir/dataflow/internal/PrintIRLocalFlow.qll | 136 + .../dataflow/internal/PrintIRStoreSteps.qll | 33 + .../ir/dataflow/internal/PrintIRUtilities.qll | 39 + .../cpp/ir/dataflow/internal/SsaInternals.qll | 547 ++ .../dataflow/internal/SsaInternalsCommon.qll | 268 + .../dataflow/internal/TaintTrackingUtil.qll | 208 + .../dataflow/internal/ssa0/SsaInternals.qll | 314 ++ .../tainttracking1/TaintTrackingImpl.qll | 186 + .../tainttracking1/TaintTrackingParameter.qll | 5 + .../tainttracking2/TaintTrackingImpl.qll | 186 + .../tainttracking2/TaintTrackingParameter.qll | 5 + .../tainttracking3/TaintTrackingImpl.qll | 186 + .../tainttracking3/TaintTrackingParameter.qll | 5 + 32 files changed, 23937 insertions(+) create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow2.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow3.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow4.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/ResolveCall.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingParameter.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingParameter.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll create mode 100644 cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingParameter.qll diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow.qll new file mode 100644 index 000000000000..acb2fbc68086 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow.qll @@ -0,0 +1,26 @@ +/** + * Provides a library for local (intra-procedural) and global (inter-procedural) + * data flow analysis: deciding whether data can flow from a _source_ to a + * _sink_. This library differs from the one in `semmle.code.cpp.dataflow` in that + * this library uses the IR (Intermediate Representation) library, which provides + * a more precise semantic representation of the program, whereas the other dataflow + * library uses the more syntax-oriented ASTs. This library should provide more accurate + * results than the AST-based library in most scenarios. + * + * Unless configured otherwise, _flow_ means that the exact value of + * the source may reach the sink. We do not track flow across pointer + * dereferences or array indexing. + * + * To use global (interprocedural) data flow, extend the class + * `DataFlow::Configuration` as documented on that class. To use local + * (intraprocedural) data flow between expressions, call + * `DataFlow::localExprFlow`. For more general cases of local data flow, call + * `DataFlow::localFlow` or `DataFlow::localFlowStep` with arguments of type + * `DataFlow::Node`. + */ + +import cpp + +module DataFlow { + import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowImpl +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow2.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow2.qll new file mode 100644 index 000000000000..0513bd4ebcb8 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow2.qll @@ -0,0 +1,16 @@ +/** + * Provides a `DataFlow2` module, which is a copy of the `DataFlow` module. Use + * this class when data-flow configurations must depend on each other. Two + * classes extending `DataFlow::Configuration` should never depend on each + * other, but one of them should instead depend on a + * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a + * `DataFlow4::Configuration`. + * + * See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation. + */ + +import cpp + +module DataFlow2 { + import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowImpl2 +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow3.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow3.qll new file mode 100644 index 000000000000..571b75b37c49 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow3.qll @@ -0,0 +1,16 @@ +/** + * Provides a `DataFlow3` module, which is a copy of the `DataFlow` module. Use + * this class when data-flow configurations must depend on each other. Two + * classes extending `DataFlow::Configuration` should never depend on each + * other, but one of them should instead depend on a + * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a + * `DataFlow4::Configuration`. + * + * See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation. + */ + +import cpp + +module DataFlow3 { + import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowImpl3 +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow4.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow4.qll new file mode 100644 index 000000000000..18c21e554f6f --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/DataFlow4.qll @@ -0,0 +1,16 @@ +/** + * Provides a `DataFlow4` module, which is a copy of the `DataFlow` module. Use + * this class when data-flow configurations must depend on each other. Two + * classes extending `DataFlow::Configuration` should never depend on each + * other, but one of them should instead depend on a + * `DataFlow2::Configuration`, a `DataFlow3::Configuration`, or a + * `DataFlow4::Configuration`. + * + * See `semmle.code.cpp.ir.dataflow.DataFlow` for the full documentation. + */ + +import cpp + +module DataFlow4 { + import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowImpl4 +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/ResolveCall.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/ResolveCall.qll new file mode 100644 index 000000000000..bcf2fa8c7db0 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/ResolveCall.qll @@ -0,0 +1,23 @@ +/** + * Provides a predicate for non-contextual virtual dispatch and function + * pointer resolution. + */ + +import cpp +private import semmle.code.cpp.ir.ValueNumbering +private import internal.DataFlowDispatch +private import semmle.code.cpp.ir.IR + +/** + * Resolve potential target function(s) for `call`. + * + * If `call` is a call through a function pointer (`ExprCall`) or its target is + * a virtual member function, simple data flow analysis is performed in order + * to identify the possible target(s). + */ +Function resolveCall(Call call) { + exists(CallInstruction callInstruction | + callInstruction.getAst() = call and + result = viableCallable(callInstruction) + ) +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll new file mode 100644 index 000000000000..7eada0bb2443 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll @@ -0,0 +1,23 @@ +/** + * Provides classes for performing local (intra-procedural) and + * global (inter-procedural) taint-tracking analyses. + * + * We define _taint propagation_ informally to mean that a substantial part of + * the information from the source is preserved at the sink. For example, taint + * propagates from `x` to `x + 100`, but it does not propagate from `x` to `x > + * 100` since we consider a single bit of information to be too little. + * + * To use global (interprocedural) taint tracking, extend the class + * `TaintTracking::Configuration` as documented on that class. To use local + * (intraprocedural) taint tracking between expressions, call + * `TaintTracking::localExprTaint`. For more general cases of local taint + * tracking, call `TaintTracking::localTaint` or + * `TaintTracking::localTaintStep` with arguments of type `DataFlow::Node`. + */ + +import semmle.code.cpp.ir.dataflow.DataFlow +import semmle.code.cpp.ir.dataflow.DataFlow2 + +module TaintTracking { + import semmle.code.cpp.ir.dataflow.internal.tainttracking1.TaintTrackingImpl +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll new file mode 100644 index 000000000000..3ef03a3bd2cb --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll @@ -0,0 +1,15 @@ +/** + * Provides a `TaintTracking2` module, which is a copy of the `TaintTracking` + * module. Use this class when data-flow configurations or taint-tracking + * configurations must depend on each other. Two classes extending + * `DataFlow::Configuration` should never depend on each other, but one of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The + * `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and + * `TaintTracking2::Configuration` extends `DataFlow2::Configuration`. + * + * See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation. + */ +module TaintTracking2 { + import semmle.code.cpp.ir.dataflow.internal.tainttracking2.TaintTrackingImpl +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll new file mode 100644 index 000000000000..98e1caebf388 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll @@ -0,0 +1,15 @@ +/** + * Provides a `TaintTracking3` module, which is a copy of the `TaintTracking` + * module. Use this class when data-flow configurations or taint-tracking + * configurations must depend on each other. Two classes extending + * `DataFlow::Configuration` should never depend on each other, but one of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. The + * `TaintTracking::Configuration` class extends `DataFlow::Configuration`, and + * `TaintTracking2::Configuration` extends `DataFlow2::Configuration`. + * + * See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation. + */ +module TaintTracking3 { + import semmle.code.cpp.ir.dataflow.internal.tainttracking3.TaintTrackingImpl +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll new file mode 100644 index 000000000000..f92612e77af1 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowDispatch.qll @@ -0,0 +1,273 @@ +private import cpp +private import semmle.code.cpp.ir.IR +private import experimental.semmle.code.cpp.ir.dataflow.DataFlow +private import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate +private import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowUtil +private import DataFlowImplCommon as DataFlowImplCommon + +/** + * Gets a function that might be called by `call`. + */ +cached +Function viableCallable(CallInstruction call) { + DataFlowImplCommon::forceCachingInSameStage() and + result = call.getStaticCallTarget() + or + // If the target of the call does not have a body in the snapshot, it might + // be because the target is just a header declaration, and the real target + // will be determined at run time when the caller and callee are linked + // together by the operating system's dynamic linker. In case a _unique_ + // function with the right signature is present in the database, we return + // that as a potential callee. + exists(string qualifiedName, int nparams | + callSignatureWithoutBody(qualifiedName, nparams, call) and + functionSignatureWithBody(qualifiedName, nparams, result) and + strictcount(Function other | functionSignatureWithBody(qualifiedName, nparams, other)) = 1 + ) + or + // Virtual dispatch + result = call.(VirtualDispatch::DataSensitiveCall).resolve() +} + +/** + * Provides virtual dispatch support compatible with the original + * implementation of `semmle.code.cpp.security.TaintTracking`. + */ +private module VirtualDispatch { + /** A call that may dispatch differently depending on the qualifier value. */ + abstract class DataSensitiveCall extends DataFlowCall { + /** + * Gets the node whose value determines the target of this call. This node + * could be the qualifier of a virtual dispatch or the function-pointer + * expression in a call to a function pointer. What they have in common is + * that we need to find out which data flows there, and then it's up to the + * `resolve` predicate to stitch that information together and resolve the + * call. + */ + abstract DataFlow::Node getDispatchValue(); + + /** Gets a candidate target for this call. */ + abstract Function resolve(); + + /** + * Whether `src` can flow to this call. + * + * Searches backwards from `getDispatchValue()` to `src`. The `allowFromArg` + * parameter is true when the search is allowed to continue backwards into + * a parameter; non-recursive callers should pass `_` for `allowFromArg`. + */ + predicate flowsFrom(DataFlow::Node src, boolean allowFromArg) { + src = this.getDispatchValue() and allowFromArg = true + or + exists(DataFlow::Node other, boolean allowOtherFromArg | + this.flowsFrom(other, allowOtherFromArg) + | + // Call argument + exists(DataFlowCall call, Position i | + other + .(DataFlow::ParameterNode) + .isParameterOf(pragma[only_bind_into](call).getStaticCallTarget(), i) and + src.(ArgumentNode).argumentOf(call, pragma[only_bind_into](pragma[only_bind_out](i))) + ) and + allowOtherFromArg = true and + allowFromArg = true + or + // Call return + exists(DataFlowCall call, ReturnKind returnKind | + other = getAnOutNode(call, returnKind) and + returnNodeWithKindAndEnclosingCallable(src, returnKind, call.getStaticCallTarget()) + ) and + allowFromArg = false + or + // Local flow + DataFlow::localFlowStep(src, other) and + allowFromArg = allowOtherFromArg + or + // Flow from global variable to load. + exists(LoadInstruction load, GlobalOrNamespaceVariable var | + var = src.asVariable() and + other.asInstruction() = load and + addressOfGlobal(load.getSourceAddress(), var) and + // The `allowFromArg` concept doesn't play a role when `src` is a + // global variable, so we just set it to a single arbitrary value for + // performance. + allowFromArg = true + ) + or + // Flow from store to global variable. + exists(StoreInstruction store, GlobalOrNamespaceVariable var | + var = other.asVariable() and + store = src.asInstruction() and + storeIntoGlobal(store, var) and + // Setting `allowFromArg` to `true` like in the base case means we + // treat a store to a global variable like the dispatch itself: flow + // may come from anywhere. + allowFromArg = true + ) + ) + } + } + + pragma[noinline] + private predicate storeIntoGlobal(StoreInstruction store, GlobalOrNamespaceVariable var) { + addressOfGlobal(store.getDestinationAddress(), var) + } + + /** Holds if `addressInstr` is an instruction that produces the address of `var`. */ + private predicate addressOfGlobal(Instruction addressInstr, GlobalOrNamespaceVariable var) { + // Access directly to the global variable + addressInstr.(VariableAddressInstruction).getAstVariable() = var + or + // Access to a field on a global union + exists(FieldAddressInstruction fa | + fa = addressInstr and + fa.getObjectAddress().(VariableAddressInstruction).getAstVariable() = var and + fa.getField().getDeclaringType() instanceof Union + ) + } + + /** + * A ReturnNode with its ReturnKind and its enclosing callable. + * + * Used to fix a join ordering issue in flowsFrom. + */ + pragma[noinline] + private predicate returnNodeWithKindAndEnclosingCallable( + ReturnNode node, ReturnKind kind, DataFlowCallable callable + ) { + node.getKind() = kind and + node.getEnclosingCallable() = callable + } + + /** Call through a function pointer. */ + private class DataSensitiveExprCall extends DataSensitiveCall { + DataSensitiveExprCall() { not exists(this.getStaticCallTarget()) } + + override DataFlow::Node getDispatchValue() { result.asInstruction() = this.getCallTarget() } + + override Function resolve() { + exists(FunctionInstruction fi | + this.flowsFrom(DataFlow::instructionNode(fi), _) and + result = fi.getFunctionSymbol() + ) and + ( + this.getNumberOfArguments() <= result.getEffectiveNumberOfParameters() and + this.getNumberOfArguments() >= result.getEffectiveNumberOfParameters() + or + result.isVarargs() + ) + } + } + + /** Call to a virtual function. */ + private class DataSensitiveOverriddenFunctionCall extends DataSensitiveCall { + DataSensitiveOverriddenFunctionCall() { + exists(this.getStaticCallTarget().(VirtualFunction).getAnOverridingFunction()) + } + + override DataFlow::Node getDispatchValue() { result.asInstruction() = this.getThisArgument() } + + override MemberFunction resolve() { + exists(Class overridingClass | + this.overrideMayAffectCall(overridingClass, result) and + this.hasFlowFromCastFrom(overridingClass) + ) + } + + /** + * Holds if `this` is a virtual function call whose static target is + * overridden by `overridingFunction` in `overridingClass`. + */ + pragma[noinline] + private predicate overrideMayAffectCall(Class overridingClass, MemberFunction overridingFunction) { + overridingFunction.getAnOverriddenFunction+() = this.getStaticCallTarget().(VirtualFunction) and + overridingFunction.getDeclaringType() = overridingClass + } + + /** + * Holds if the qualifier of `this` has flow from an upcast from + * `derivedClass`. + */ + pragma[noinline] + private predicate hasFlowFromCastFrom(Class derivedClass) { + exists(ConvertToBaseInstruction toBase | + this.flowsFrom(DataFlow::instructionNode(toBase), _) and + derivedClass = toBase.getDerivedClass() + ) + } + } +} + +/** + * Holds if `f` is a function with a body that has name `qualifiedName` and + * `nparams` parameter count. See `functionSignature`. + */ +private predicate functionSignatureWithBody(string qualifiedName, int nparams, Function f) { + functionSignature(f, qualifiedName, nparams) and + exists(f.getBlock()) +} + +/** + * Holds if the target of `call` is a function _with no definition_ that has + * name `qualifiedName` and `nparams` parameter count. See `functionSignature`. + */ +pragma[noinline] +private predicate callSignatureWithoutBody(string qualifiedName, int nparams, CallInstruction call) { + exists(Function target | + target = call.getStaticCallTarget() and + not exists(target.getBlock()) and + functionSignature(target, qualifiedName, nparams) + ) +} + +/** + * Holds if `f` has name `qualifiedName` and `nparams` parameter count. This is + * an approximation of its signature for the purpose of matching functions that + * might be the same across link targets. + */ +private predicate functionSignature(Function f, string qualifiedName, int nparams) { + qualifiedName = f.getQualifiedName() and + nparams = f.getNumberOfParameters() and + not f.isStatic() +} + +/** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. + */ +predicate mayBenefitFromCallContext(CallInstruction call, Function f) { + mayBenefitFromCallContext(call, f, _) +} + +/** + * Holds if `call` is a call through a function pointer, and the pointer + * value is given as the `arg`'th argument to `f`. + */ +private predicate mayBenefitFromCallContext( + VirtualDispatch::DataSensitiveCall call, Function f, int arg +) { + f = pragma[only_bind_out](call).getEnclosingCallable() and + exists(InitializeParameterInstruction init | + not exists(call.getStaticCallTarget()) and + init.getEnclosingFunction() = f and + call.flowsFrom(DataFlow::instructionNode(init), _) and + init.getParameter().getIndex() = arg + ) +} + +/** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ +Function viableImplInCallContext(CallInstruction call, CallInstruction ctx) { + result = viableCallable(call) and + exists(int i, Function f | + mayBenefitFromCallContext(pragma[only_bind_into](call), f, i) and + f = ctx.getStaticCallTarget() and + result = ctx.getArgument(i).getUnconvertedResultExpression().(FunctionAccess).getTarget() + ) +} + +/** Holds if arguments at position `apos` match parameters at position `ppos`. */ +pragma[inline] +predicate parameterMatch(ParameterPosition ppos, ArgumentPosition apos) { ppos = apos } diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll new file mode 100644 index 000000000000..468f8640a784 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl.qll @@ -0,0 +1,4450 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public +import DataFlowImplCommonPublic + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + predicate isSource(Node source) { none() } + + /** + * Holds if `source` is a relevant data flow source with the given initial + * `state`. + */ + predicate isSource(Node source, FlowState state) { none() } + + /** + * Holds if `sink` is a relevant data flow sink. + */ + predicate isSink(Node sink) { none() } + + /** + * Holds if `sink` is a relevant data flow sink accepting `state`. + */ + predicate isSink(Node source, FlowState state) { none() } + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** + * Holds if data flow through `node` is prohibited when the flow state is + * `state`. + */ + predicate isBarrier(Node node, FlowState state) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited. + */ + deprecated predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited when + * the flow state is `state` + */ + deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) { + none() + } + + /** + * Holds if an arbitrary number of implicit read steps of content `c` may be + * taken at `node`. + */ + predicate allowImplicitRead(Node node, ContentSet c) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { this.hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if hidden nodes should be included in the data flow graph. + * + * This feature should only be used for debugging or when the data flow graph + * is not visualized (for example in a `path-problem` query). + */ + predicate includeHiddenNodes() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSource(n, _)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n | this.isSink(n, _)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0 + or + super.hasFlow(source, sink) + } +} + +private newtype TNodeEx = + TNodeNormal(Node n) or + TNodeImplicitRead(Node n, boolean hasRead) { + any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true] + } + +private class NodeEx extends TNodeEx { + string toString() { + result = this.asNode().toString() + or + exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]") + } + + Node asNode() { this = TNodeNormal(result) } + + predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) } + + Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallable0() { + nodeEnclosingCallable(this.projectToNode(), result) + } + + pragma[inline] + DataFlowCallable getEnclosingCallable() { + pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result) + } + + pragma[nomagic] + private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) } + + pragma[inline] + DataFlowType getDataFlowType() { + pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result) + } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private class ArgNodeEx extends NodeEx { + ArgNodeEx() { this.asNode() instanceof ArgNode } +} + +private class ParamNodeEx extends NodeEx { + ParamNodeEx() { this.asNode() instanceof ParamNode } + + predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { + this.asNode().(ParamNode).isParameterOf(c, pos) + } + + ParameterPosition getPosition() { this.isParameterOf(_, result) } + + predicate allowParameterReturnInSelf() { allowParameterReturnInSelfCached(this.asNode()) } +} + +private class RetNodeEx extends NodeEx { + RetNodeEx() { this.asNode() instanceof ReturnNodeExt } + + ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) } + + ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() } +} + +private predicate inBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierIn(n) + | + config.isSource(n) or config.isSource(n, _) + ) +} + +private predicate outBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierOut(n) + | + config.isSink(n) or config.isSink(n, _) + ) +} + +/** A bridge class to access the deprecated `isBarrierGuard`. */ +private class BarrierGuardGuardedNodeBridge extends Unit { + abstract predicate guardedNode(Node n, Configuration config); + + abstract predicate guardedNode(Node n, FlowState state, Configuration config); +} + +private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge { + deprecated override predicate guardedNode(Node n, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g) and + n = g.getAGuardedNode() + ) + } + + deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g, state) and + n = g.getAGuardedNode() + ) + } +} + +pragma[nomagic] +private predicate fullBarrier(NodeEx node, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n) + or + config.isBarrierIn(n) and + not config.isSource(n) and + not config.isSource(n, _) + or + config.isBarrierOut(n) and + not config.isSink(n) and + not config.isSink(n, _) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, config) + ) +} + +pragma[nomagic] +private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n, state) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, state, config) + ) +} + +pragma[nomagic] +private predicate sourceNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSource(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSource(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +pragma[nomagic] +private predicate sinkNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSink(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSink(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +/** Provides the relevant barriers for a step from `node1` to `node2`. */ +pragma[inline] +private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) { + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + simpleLocalFlowStepExt(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.asNode() = n and + node2.isImplicitReadNode(n, false) and + not fullBarrier(node1, config) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.isImplicitReadNode(n, true) and + node2.asNode() = n and + not fullBarrier(node2, config) + ) +} + +private predicate additionalLocalStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + jumpStepCached(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate additionalJumpStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +pragma[nomagic] +private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) { + readSet(pragma[only_bind_into](node1.asNode()), c, pragma[only_bind_into](node2.asNode())) and + stepFilter(node1, node2, config) + or + exists(Node n | + node2.isImplicitReadNode(n, true) and + node1.isImplicitReadNode(n, _) and + config.allowImplicitRead(n, c) + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(ContentSet cs | + readSet(node1, cs, node2, config) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate clearsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + clearsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate expectsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + expectsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +pragma[nomagic] +private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) } + +pragma[nomagic] +private predicate store( + NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config +) { + store(pragma[only_bind_into](node1.asNode()), tc, pragma[only_bind_into](node2.asNode()), + contentType) and + read(_, tc.getContent(), _, config) and + stepFilter(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) { + viableReturnPosOut(call, pos, out.asNode()) +} + +pragma[nomagic] +private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) { + viableParamArg(call, p.asNode(), arg.asNode()) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private predicate hasSourceCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSourceCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate hasSinkCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSinkCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private module Stage1 implements StageSig { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { + sourceNode(node, _, config) and + if hasSourceCallCtx(config) then cc = true else cc = false + or + exists(NodeEx mid | fwdFlow(mid, cc, config) | + localFlowStep(mid, node, config) or + additionalLocalFlowStep(mid, node, config) or + additionalLocalStateStep(mid, _, node, _, config) + ) + or + exists(NodeEx mid | fwdFlow(mid, _, config) and cc = false | + jumpStep(mid, node, config) or + additionalJumpStep(mid, node, config) or + additionalJumpStateStep(mid, _, node, _, config) + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) + ) + or + // read + exists(ContentSet c | + fwdFlowReadSet(c, node, cc, config) and + fwdFlowConsCandSet(c, _, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true and + not fullBarrier(node, config) + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + } + + private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) { + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + readSet(mid, c, node, config) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `cs` may be interpreted in a read as the target of some store + * into `c`, in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) { + fwdFlowConsCand(c, config) and + c = cs.getAReadContent() + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(RetNodeEx ret | + fwdFlow(ret, cc, config) and + ret.getReturnPosition() = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOutEx(call, pos, out) and + not fullBarrier(out, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNodeEx arg | + fwdFlow(arg, cc, config) and + viableParamArgEx(call, _, arg) + ) + } + + private predicate stateStepFwd(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1 | + additionalLocalStateStep(node1, state1, _, state2, config) or + additionalJumpStateStep(node1, state1, _, state2, config) + | + fwdFlow(node1, config) + ) + } + + private predicate fwdFlowState(FlowState state, Configuration config) { + sourceNode(_, state, config) + or + exists(FlowState state0 | + fwdFlowState(state0, config) and + stateStepFwd(state0, state, config) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { + exists(FlowState state | + fwdFlow(node, pragma[only_bind_into](config)) and + sinkNode(node, state, config) and + fwdFlowState(state, pragma[only_bind_into](config)) and + if hasSinkCallCtx(config) then toReturn = true else toReturn = false + ) + or + exists(NodeEx mid | revFlow(mid, toReturn, config) | + localFlowStep(node, mid, config) or + additionalLocalFlowStep(node, mid, config) or + additionalLocalStateStep(node, _, mid, _, config) + ) + or + exists(NodeEx mid | revFlow(mid, _, config) and toReturn = false | + jumpStep(node, mid, config) or + additionalJumpStep(node, mid, config) or + additionalJumpStateStep(node, _, mid, _, config) + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(NodeEx mid, ContentSet c | + readSet(node, c, mid, config) and + fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + node.(RetNodeEx).getReturnPosition() = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, ContentSet cs | + fwdFlow(node, pragma[only_bind_into](config)) and + readSet(node, cs, mid, config) and + fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) { + exists(NodeEx mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + pragma[nomagic] + predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOutEx(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, NodeEx out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config + ) { + viableParamArgEx(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config + ) { + exists(ParamNodeEx p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(NodeEx out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + private predicate stateStepRev(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + additionalLocalStateStep(node1, state1, node2, state2, config) or + additionalJumpStateStep(node1, state1, node2, state2, config) + | + revFlow(node1, _, pragma[only_bind_into](config)) and + revFlow(node2, _, pragma[only_bind_into](config)) and + fwdFlowState(state1, pragma[only_bind_into](config)) and + fwdFlowState(state2, pragma[only_bind_into](config)) + ) + } + + predicate revFlowState(FlowState state, Configuration config) { + exists(NodeEx node | + sinkNode(node, state, config) and + revFlow(node, _, pragma[only_bind_into](config)) and + fwdFlowState(state, pragma[only_bind_into](config)) + ) + or + exists(FlowState state0 | + revFlowState(state0, config) and + stateStepRev(state, state0, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType, config) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + read(n1, c, n2, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) } + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, _, pragma[only_bind_into](config)) and + exists(state) and + exists(ap) + } + + private predicate throughFlowNodeCand(NodeEx node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(RetNodeEx ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + pragma[nomagic] + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(ArgNodeEx arg, boolean toReturn | + revFlow(arg, toReturn, config) and + revFlowInToReturn(call, arg, config) and + revFlowIsReturned(call, toReturn, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | fwdFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | revFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config +) { + viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(NodeEx n1, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(NodeEx n2, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private signature module StageSig { + class Ap; + + predicate revFlow(NodeEx node, Configuration config); + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config); + + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config); + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config); + + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ); + + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config); +} + +private module MkStage { + class ApApprox = PrevStage::Ap; + + signature module StageParam { + class Ap; + + class ApNil extends Ap; + + bindingset[result, ap] + ApApprox getApprox(Ap ap); + + ApNil getApNil(NodeEx node); + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail); + + Content getHeadContent(Ap ap); + + class ApOption; + + ApOption apNone(); + + ApOption apSome(Ap ap); + + class Cc; + + class CcCall extends Cc; + + // TODO: member predicate on CcCall + predicate matchesCall(CcCall cc, DataFlowCall call); + + class CcNoCall extends Cc; + + Cc ccNone(); + + CcCall ccSomeCall(); + + class LocalCc; + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc); + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc); + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc); + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ); + + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ); + + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ); + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config); + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType); + } + + module Stage implements StageSig { + import Param + + /* Begin: Stage logic. */ + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + pragma[only_bind_out](apa) = pragma[only_bind_out](result) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) and + matchesCall(ccc, call) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + fwdFlow0(node, state, cc, argAp, ap, config) and + PrevStage::revFlow(node, state, unbindApa(getApprox(ap)), config) and + filter(node, state, ap, config) + } + + pragma[nomagic] + private predicate fwdFlow0( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + sourceNode(node, state, config) and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc | + fwdFlow(mid, state0, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc) + | + localStep(mid, state0, node, state, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, state0, node, state, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, state, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(mid, state0, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStateStep(mid, state0, node, state, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, state, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, state, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, state, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, state, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, state, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, state, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + fwdFlow(node1, state, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, Cc innercc, ApOption argAp, + Ap ap, Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, state, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, FlowState state, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, state, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, FlowState state, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, state, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, _, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, state, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, state, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate returnNodeMayFlowThrough( + RetNodeEx ret, FlowState state, Ap ap, Configuration config + ) { + fwdFlow(ret, state, any(CcCall ccc), apSome(_), ap, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow0(node, state, toReturn, returnAp, ap, config) and + fwdFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, state, _, _, ap, config) and + sinkNode(node, state, config) and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid, FlowState state0 | + localStep(node, state, mid, state0, true, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + localStep(node, pragma[only_bind_into](state), mid, state0, false, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, state, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), state, _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStateStep(node, state, mid, state0, config) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, nil, + pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, state, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, state, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, state, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, state, _, _, ap, config) and + toReturn = true and + if returnNodeMayFlowThrough(node, state, ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, FlowState state, TypedContent tc, NodeEx mid, + boolean toReturn, ApOption returnAp, Configuration config + ) { + revFlow(mid, state, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, state, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, FlowState state, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, FlowState state, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, FlowState state, CcCall ccc | + revFlowOut(call, ret, state, toReturn, returnAp, ap, config) and + fwdFlow(ret, state, ccc, apSome(_), ap, config) and + matchesCall(ccc, call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, FlowState state, Configuration config) { + revFlow(node, state, _, _, _, config) + } + + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, ap, config) + } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + private predicate validAp(Ap ap, Configuration config) { + revFlow(_, _, _, _, ap, config) and ap instanceof ApNil + or + exists(TypedContent head, Ap tail | + consCand(head, tail, config) and + ap = apCons(head, tail) + ) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + revConsCand(tc, ap, config) and + validAp(ap, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, _, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, FlowState state, Ap ap0, ReturnKindExt kind, ParameterPosition pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), pragma[only_bind_into](state), true, apSome(_), + pragma[only_bind_into](ap0), pragma[only_bind_into](config)) and + fwdFlow(ret, state, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists( + Ap returnAp0, ArgNodeEx arg, FlowState state, boolean toReturn, ApOption returnAp, Ap ap + | + revFlow(arg, state, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + states = count(FlowState state | fwdFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(n, state, cc, argAp, ap, config) + ) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + states = count(FlowState state | revFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, boolean b, ApOption retAp, Ap ap | + revFlow(n, state, b, retAp, ap, config) + ) + } + /* End: Stage logic. */ + } +} + +private module BooleanCallContext { + class Cc extends boolean { + Cc() { this in [true, false] } + } + + class CcCall extends Cc { + CcCall() { this = true } + } + + /** Holds if the call context may be `call`. */ + predicate matchesCall(CcCall cc, DataFlowCall call) { any() } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + CcCall ccSomeCall() { result = true } + + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() } +} + +private module Level1CallContext { + class Cc = CallContext; + + class CcCall = CallContextCall; + + pragma[inline] + predicate matchesCall(CcCall cc, DataFlowCall call) { cc.matchesCall(call) } + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + + module NoLocalCallContext { + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSiteDispatch(call, c) + then result = TSpecificCall(call) + else result = TSomeCall() + } + } + + module LocalCallContext { + class LocalCc = LocalCallContext; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { + result = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + node.getEnclosingCallable()) + } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } +} + +private module Stage2Param implements MkStage::StageParam { + private module PrevStage = Stage1; + + class Ap extends boolean { + Ap() { this in [true, false] } + } + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + PrevStage::Ap getApprox(Ap ap) { any() } + + ApNil getApNil(NodeEx node) { Stage1::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + import Level1CallContext + import NoLocalCallContext + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalStateStep(node1, state1, node2, state2, config) + ) and + exists(ap) and + exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + predicate flowIntoCall = flowIntoCallNodeCand1/5; + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + expectsContentEx(node, c) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + PrevStage::revFlowState(state, pragma[only_bind_into](config)) and + exists(ap) and + not stateBarrier(node, state, config) and + ( + notExpectsContent(node) + or + ap = true and + expectsContentCand(node, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage2 implements StageSig { + import MkStage::Stage +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends NodeEx { + FlowCheckNode() { + castNode(this.asNode()) or + clearsContentCached(this.asNode(), _) or + expectsContentCached(this.asNode(), _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) { + Stage2::revFlow(node, state, config) and + ( + sourceNode(node, state, config) + or + jumpStep(_, node, config) + or + additionalJumpStep(_, node, config) + or + additionalJumpStateStep(_, _, node, state, config) + or + node instanceof ParamNodeEx + or + node.asNode() instanceof OutNodeExt + or + Stage2::storeStepCand(_, _, _, node, _, config) + or + Stage2::readStepCand(_, _, node, config) + or + node instanceof FlowCheckNode + or + exists(FlowState s | + additionalLocalStateStep(_, s, node, state, config) and + s != state + ) + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(NodeEx node, FlowState state, Configuration config) { + exists(NodeEx next | Stage2::revFlow(next, state, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + Stage2::storeStepCand(node, _, _, next, _, config) or + Stage2::readStepCand(node, _, next, config) + ) + or + exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) | + additionalJumpStateStep(node, state, next, s, config) + or + additionalLocalStateStep(node, state, next, s, config) and + s != state + ) + or + Stage2::revFlow(node, state, config) and + node instanceof FlowCheckNode + or + sinkNode(node, state, config) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, Configuration config + ) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 and + Stage2::revFlow(node1, pragma[only_bind_into](state1), false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, pragma[only_bind_into](state2), false, + pragma[only_bind_into](config)) + or + additionalLocalStateStep(node1, state1, node2, state2, config) and + Stage2::revFlow(node1, state1, false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, state2, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t, + Configuration config, LocalCallContext cc + ) { + not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = node1.getDataFlowType() and // irrelevant dummy value + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + or + additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and + preservesValue = false and + t = node2.getDataFlowType() + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) + or + exists(NodeEx mid | + localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t, + pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + ) + or + exists(NodeEx mid | + localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = node2.getDataFlowType() + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + AccessPathFrontNil apf, Configuration config, LocalCallContext callContext + ) { + localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, state1, config) and + state1 = state2 + or + additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and + state1 != state2 and + preservesValue = false and + apf = TFrontNil(node2.getDataFlowType()) and + callContext.relevantFor(node1.getEnclosingCallable()) and + not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() | + isUnreachableInCallCached(node1.asNode(), call) or + isUnreachableInCallCached(node2.asNode(), call) + ) + } +} + +private import LocalFlowBigStep + +private module Stage3Param implements MkStage::StageParam { + private module PrevStage = Stage2; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + import BooleanCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap, config, _) and exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + predicate flowIntoCall = flowIntoCallNodeCand2/5; + + pragma[nomagic] + private predicate clearSet(NodeEx node, ContentSet c, Configuration config) { + PrevStage::revFlow(node, config) and + clearsContentCached(node.asNode(), c) + } + + pragma[nomagic] + private predicate clearContent(NodeEx node, Content c, Configuration config) { + exists(ContentSet cs | + PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and + c = cs.getAReadContent() and + clearSet(node, cs, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate clear(NodeEx node, Ap ap, Configuration config) { + clearContent(node, ap.getHead().getContent(), config) + } + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and + expectsContentEx(node, c) and + c = ap.getHead().getContent() + ) + } + + pragma[nomagic] + private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + exists(state) and + exists(config) and + not clear(node, ap, config) and + (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and + ( + notExpectsContent(node) + or + expectsContentCand(node, ap, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } +} + +private module Stage3 implements StageSig { + import MkStage::Stage +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx( + NodeEx node, FlowState state, AccessPathFront argApf, Configuration config +) { + exists(AccessPathFront apf | + Stage3::revFlow(node, state, true, _, apf, config) and + Stage3::fwdFlow(node, state, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(NodeEx n, FlowState state | + Stage3::revFlow(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4Param implements MkStage::StageParam { + private module PrevStage = Stage3; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.getFront() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + import Level1CallContext + import LocalCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config + ) { + exists(FlowState state | + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config + ) { + exists(FlowState state | + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage4 = MkStage::Stage; + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +pragma[nomagic] +private predicate nodeMayUseSummary0( + NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, _, _) and + Stage4::revFlow(n, state, true, _, apa0, config) and + Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +pragma[nomagic] +private predicate nodeMayUseSummary( + NodeEx n, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(DataFlowCallable c | + Stage4::parameterMayFlowThrough(_, c, apa, config) and + nodeMayUseSummary0(n, c, state, apa, config) + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) { + exists(Configuration config | + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), config) and + Stage4::revFlow(p, state, _, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNodeEx p; + private FlowState s; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, s, ap) } + + ParameterPosition getParameterPos() { p.isParameterOf(_, result) } + + ParamNodeEx getParamNode() { result = p } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(NodeEx n, FlowState state | + Stage4::revFlow(n, state, apa, config) or nodeMayUseSummary(n, state, apa, config) + ) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid( + NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config + ) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, state, config) and + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, state, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, state, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(NodeEx node, FlowState state, Configuration config) { + exists(PathNodeMid sink | + sink.isAtSink() and + node = sink.getNodeEx() and + state = sink.getState() and + config = sink.getConfiguration() + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + this.length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result } + + /** Gets the `FlowState` of this node. */ + FlowState getState() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getANonHiddenSuccessor() and + reach(this) and + reach(result) + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNodeImpl getASuccessorImpl(); + + private PathNodeImpl getASuccessorIfHidden() { + this.isHidden() and + result = this.getASuccessorImpl() + } + + final PathNodeImpl getANonHiddenSuccessor() { + result = this.getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + abstract NodeEx getNodeEx(); + + predicate isHidden() { + not this.getConfiguration().includeHiddenNodes() and + ( + hiddenNode(this.getNodeEx().asNode()) and + not this.isSource() and + not this instanceof PathNodeSink + or + this.getNodeEx() instanceof TNodeImplicitRead + ) + } + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + override string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate directReach(PathNodeImpl n) { + n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor()) +} + +/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */ +private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) } + +/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNodeImpl n1, PathNode n2) { + n1.getANonHiddenSuccessor() = n2 and directReach(n2) +} + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) { + Subpaths::subpaths(arg, par, ret, out) and + reach(arg) and + reach(par) and + reach(ret) and + reach(out) + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + NodeEx node; + FlowState state; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, state, cc, sc, ap, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = this.getSuccMid() + or + // a final step to a sink + result = this.getSuccMid().projectToSink() + } + + override predicate isSource() { + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + } + + predicate isAtSink() { + sinkNode(node, state, config) and + ap instanceof AccessPathNil and + if hasSinkCallCtx(config) + then + // For `FeatureHasSinkCallContext` the condition `cc instanceof CallContextNoCall` + // is exactly what we need to check. This also implies + // `sc instanceof SummaryCtxNone`. + // For `FeatureEqualSourceSinkCallContext` the initial call context was + // set to `CallContextSomeCall` and jumps are disallowed, so + // `cc instanceof CallContextNoCall` never holds. On the other hand, + // in this case there's never any need to enter a call except to identify + // a summary, so the condition in `pathIntoCallable` enforces this, which + // means that `sc instanceof SummaryCtxNone` holds if and only if we are + // in the call context of the source. + sc instanceof SummaryCtxNone or + cc instanceof CallContextNoCall + else any() + } + + PathNodeSink projectToSink() { + this.isAtSink() and + result.getNodeEx() = node and + result.getState() = state and + result.getConfiguration() = unbindConf(config) + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + NodeEx node; + FlowState state; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, state, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + override Configuration getConfiguration() { result = config } + + override PathNodeImpl getASuccessorImpl() { none() } + + override predicate isSource() { sourceNode(node, state, config) } +} + +private predicate pathNode( + PathNodeMid mid, NodeEx midnode, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, + Configuration conf, LocalCallContext localCC +) { + midnode = mid.getNodeEx() and + state = mid.getState() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + midnode.getEnclosingCallable()) and + ap = mid.getAp() +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +pragma[nomagic] +private predicate pathStep( + PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap +) { + exists(NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC | + pathNode(mid, midnode, state0, cc, sc, ap, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, true, _, conf, localCC) + ) + or + exists( + AccessPath ap0, NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC + | + pathNode(mid, midnode, state0, cc, sc, ap0, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + exists(TypedContent tc | pathStoreStep(mid, node, state, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, state, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, state, _, cc, sc, _, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, state, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, state, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, FlowState state, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPathApprox apa, Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, state, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private NodeEx getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result.asNode() = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, FlowState state, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, state, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, ParameterPosition ppos, FlowState state, CallContext cc, DataFlowCall call, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(ArgNodeEx arg, ArgumentPosition apos | + pathNode(mid, arg, state, cc, _, ap, config, _) and + arg.asNode().(ArgNode).argumentOf(call, apos) and + apa = ap.getApprox() and + parameterMatch(ppos, apos) + ) +} + +pragma[nomagic] +private predicate parameterCand( + DataFlowCallable callable, ParameterPosition pos, AccessPathApprox apa, Configuration config +) { + exists(ParamNodeEx p | + Stage4::revFlow(p, _, apa, config) and + p.isParameterOf(callable, pos) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, AccessPath ap, Configuration config +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, pragma[only_bind_into](pos), state, outercc, call, ap, + pragma[only_bind_into](apa), pragma[only_bind_into](config)) and + callable = resolveCall(call, outercc) and + parameterCand(callable, pragma[only_bind_into](pos), pragma[only_bind_into](apa), + pragma[only_bind_into](config)) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +pragma[nomagic] +private predicate pathIntoCallable( + PathNodeMid mid, ParamNodeEx p, FlowState state, CallContext outercc, CallContextCall innercc, + SummaryCtx sc, DataFlowCall call, Configuration config +) { + exists(ParameterPosition pos, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + ( + sc = TSummaryCtxSome(p, state, ap) + or + not exists(TSummaryCtxSome(p, state, ap)) and + sc = TSummaryCtxNone() and + // When the call contexts of source and sink needs to match then there's + // never any reason to enter a callable except to find a summary. See also + // the comment in `PathNodeMid::isAtSink`. + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, FlowState state, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, + AccessPathApprox apa, Configuration config +) { + exists(PathNodeMid mid, RetNodeEx ret, ParameterPosition pos | + pathNode(mid, ret, state, cc, sc, ap, config, _) and + kind = ret.getKind() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + sc.getParamNode().allowParameterReturnInSelf() + ) + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, _, cc, innercc, sc, call, config) and + paramFlowsThrough(kind, state, innercc, sc, ap, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable( + PathNodeMid mid, NodeEx out, FlowState state, CallContext cc, AccessPath ap +) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa, Configuration config | + pathThroughCallable0(call, mid, kind, state, cc, ap, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +private module Subpaths { + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths01( + PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + exists(Configuration config | + pathThroughCallable(arg, out, pragma[only_bind_into](sout), _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, _, innercc, sc, _, config) and + paramFlowsThrough(kind, pragma[only_bind_into](sout), innercc, sc, + pragma[only_bind_into](apout), _, unbindConf(config)) and + not arg.isHidden() + ) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `sout`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths02( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and + out.asNode() = kind.getAnOutNode(_) + } + + pragma[nomagic] + private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple. + */ + pragma[nomagic] + private predicate subpaths03( + PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout + ) { + exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode | + subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and + pathNode(ret, retnode, sout, innercc, sc, apout, unbindConf(getPathNodeConf(arg)), _) and + kind = retnode.getKind() + ) + } + + private PathNodeImpl localStepToHidden(PathNodeImpl n) { + n.getASuccessorImpl() = result and + result.isHidden() and + exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() | + localFlowBigStep(n1, _, n2, _, _, _, _, _) or + store(n1, _, n2, _, _) or + readSet(n1, _, n2, _) + ) + } + + pragma[nomagic] + private predicate hasSuccessor(PathNodeImpl pred, PathNodeMid succ, NodeEx succNode) { + succ = pred.getANonHiddenSuccessor() and + succNode = succ.getNodeEx() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) { + exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 | + pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and + subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and + hasSuccessor(pragma[only_bind_into](arg), par, p) and + not ret.isHidden() and + pathNode(out0, o, sout, _, _, apout, _, _) + | + out = out0 or out = out0.projectToSink() + ) + } + + /** + * Holds if `n` can reach a return node in a summarized subpath that can reach a sink. + */ + predicate retReach(PathNodeImpl n) { + exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out)) + or + exists(PathNodeImpl mid | + retReach(mid) and + n.getANonHiddenSuccessor() = mid and + not subpaths(_, mid, _, _) + ) + } +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.(PathNodeImpl).getNodeEx().asNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNodeEx().asNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples +) { + fwd = true and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int states, int tuples, + Configuration config +) { + stage = "1 Fwd" and + n = 10 and + Stage1::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "1 Rev" and + n = 15 and + Stage1::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Fwd" and + n = 20 and + Stage2::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Rev" and + n = 25 and + Stage2::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Fwd" and + n = 30 and + Stage3::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Rev" and + n = 35 and + Stage3::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Fwd" and + n = 40 and + Stage4::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Rev" and + n = 45 and + Stage4::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, states, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, states, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + additionalJumpStateStep(node1, _, node2, _, config) + or + // flow into callable + viableParamArgEx(_, node2, node1) + or + // flow out of a callable + viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) or config.isSource(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) or config.isSink(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + (config.isSource(n) or config.isSource(n, _)) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + (config.isSink(n) or config.isSink(n, _)) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private predicate relevantState(FlowState state) { + sourceNode(_, state, _) or + sinkNode(_, state, _) or + additionalLocalStateStep(_, state, _, _, _) or + additionalLocalStateStep(_, _, _, state, _) or + additionalJumpStateStep(_, state, _, _, _) or + additionalJumpStateStep(_, _, _, state, _) + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNodeEx p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TSummaryCtx3 = + TSummaryCtx3None() or + TSummaryCtx3Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TRevSummaryCtx3 = + TRevSummaryCtx3None() or + TRevSummaryCtx3Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = TPartialNil(node.getDataFlowType()) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, state, cc, sc1, sc2, sc3, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, TRevSummaryCtx3 sc3, + RevPartialAccessPath ap, Configuration config + ) { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and + not clearsContentEx(node, ap.getHead()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead()) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + not clearsContentEx(node, ap.getHead().getContent()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead().getContent()) + ) and + if node.asNode() instanceof CastingNode + then compatibleTypes(node.getDataFlowType(), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.getNodeEx().projectToNode() = result } + + FlowState getState() { none() } + + private NodeEx getNodeEx() { + result = this.(PartialPathNodeFwd).getNodeEx() or + result = this.(PartialPathNodeRev).getNodeEx() + } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + NodeEx node; + FlowState state; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + TSummaryCtx3 sc3; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, state, cc, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TSummaryCtx3 getSummaryCtx3() { result = sc3 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx1(), result.getSummaryCtx2(), result.getSummaryCtx3(), result.getAp(), + result.getConfiguration()) + } + + predicate isSource() { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + NodeEx node; + FlowState state; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + TRevSummaryCtx3 sc3; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, state, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TRevSummaryCtx3 getSummaryCtx3() { result = sc3 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNodeEx(), this.getState(), this.getSummaryCtx1(), + this.getSummaryCtx2(), this.getSummaryCtx3(), this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, + TSummaryCtx2 sc2, TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalLocalStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsFwd(ap, tc, ap0, config) + ) + or + partialPathIntoCallable(mid, node, state, _, cc, sc1, sc2, sc3, _, ap, config) + or + partialPathOutOfCallable(mid, node, state, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() + or + partialPathThroughCallable(mid, node, state, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + bindingset[result, i] + private int unbindInt(int i) { pragma[only_bind_out](i) = pragma[only_bind_out](result) } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node, + PartialAccessPath ap2 + ) { + exists(NodeEx midNode, DataFlowType contentType | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType, mid.getConfiguration()) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc, + Configuration config + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and + ap.getHead() = tc and + pragma[only_bind_into](config) = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, FlowState state, CallContext innercc, + PartialAccessPath ap, Configuration config + ) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, state, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, state, cc, ap, config) + | + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, ParameterPosition ppos, FlowState state, CallContext cc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ArgNode arg, ArgumentPosition apos | + arg = mid.getNodeEx().asNode() and + state = mid.getState() and + cc = mid.getCallContext() and + arg.argumentOf(call, apos) and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, pos, state, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNodeEx p, FlowState state, CallContext outercc, + CallContextCall innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ParameterPosition pos, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(state) and + sc3 = TSummaryCtx3Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, FlowState state, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, RetNodeEx ret | + mid.getNodeEx() = ret and + kind = ret.getKind() and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3 | + partialPathIntoCallable(mid, _, _, cc, innercc, sc1, sc2, sc3, call, _, config) and + paramFlowsThroughInPartialPath(kind, state, innercc, sc1, sc2, sc3, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, state, cc, ap, config) and + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[nomagic] + private predicate revPartialPathStep( + PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalLocalStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalJumpStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNodeEx p | + mid.getNodeEx() = p and + viableParamArgEx(_, p, node) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, state, sc1, sc2, sc3, _, ap, config) and + pos = getReturnPosition(node.asNode()) + ) + or + revPartialPathThroughCallable(mid, node, state, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node, + RevPartialAccessPath ap2 + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + read(node, c, midNode, mid.getConfiguration()) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config + ) { + exists(NodeEx midNode, TypedContent tc | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + store(node, tc, midNode, _, config) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, FlowState state, TRevSummaryCtx1Some sc1, + TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3, DataFlowCall call, RevPartialAccessPath ap, + Configuration config + ) { + exists(NodeEx out | + mid.getNodeEx() = out and + mid.getState() = state and + viableReturnPosOutEx(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(state) and + sc3 = TRevSummaryCtx3Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + ArgumentPosition apos, FlowState state, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + TRevSummaryCtx3Some sc3, RevPartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNodeEx p, ParameterPosition ppos | + mid.getNodeEx() = p and + mid.getState() = state and + p.getPosition() = ppos and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, ArgumentPosition pos, FlowState state, + RevPartialAccessPath ap, Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3 | + revPartialPathIntoReturn(mid, _, _, sc1, sc2, sc3, call, _, config) and + revPartialPathFlowsThrough(pos, state, sc1, sc2, sc3, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNodeEx node, FlowState state, RevPartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ArgumentPosition pos | + revPartialPathThroughCallable0(call, mid, pos, state, ap, config) and + node.asNode().(ArgNode).argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll new file mode 100644 index 000000000000..468f8640a784 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl2.qll @@ -0,0 +1,4450 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public +import DataFlowImplCommonPublic + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + predicate isSource(Node source) { none() } + + /** + * Holds if `source` is a relevant data flow source with the given initial + * `state`. + */ + predicate isSource(Node source, FlowState state) { none() } + + /** + * Holds if `sink` is a relevant data flow sink. + */ + predicate isSink(Node sink) { none() } + + /** + * Holds if `sink` is a relevant data flow sink accepting `state`. + */ + predicate isSink(Node source, FlowState state) { none() } + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** + * Holds if data flow through `node` is prohibited when the flow state is + * `state`. + */ + predicate isBarrier(Node node, FlowState state) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited. + */ + deprecated predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited when + * the flow state is `state` + */ + deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) { + none() + } + + /** + * Holds if an arbitrary number of implicit read steps of content `c` may be + * taken at `node`. + */ + predicate allowImplicitRead(Node node, ContentSet c) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { this.hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if hidden nodes should be included in the data flow graph. + * + * This feature should only be used for debugging or when the data flow graph + * is not visualized (for example in a `path-problem` query). + */ + predicate includeHiddenNodes() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSource(n, _)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n | this.isSink(n, _)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0 + or + super.hasFlow(source, sink) + } +} + +private newtype TNodeEx = + TNodeNormal(Node n) or + TNodeImplicitRead(Node n, boolean hasRead) { + any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true] + } + +private class NodeEx extends TNodeEx { + string toString() { + result = this.asNode().toString() + or + exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]") + } + + Node asNode() { this = TNodeNormal(result) } + + predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) } + + Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallable0() { + nodeEnclosingCallable(this.projectToNode(), result) + } + + pragma[inline] + DataFlowCallable getEnclosingCallable() { + pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result) + } + + pragma[nomagic] + private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) } + + pragma[inline] + DataFlowType getDataFlowType() { + pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result) + } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private class ArgNodeEx extends NodeEx { + ArgNodeEx() { this.asNode() instanceof ArgNode } +} + +private class ParamNodeEx extends NodeEx { + ParamNodeEx() { this.asNode() instanceof ParamNode } + + predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { + this.asNode().(ParamNode).isParameterOf(c, pos) + } + + ParameterPosition getPosition() { this.isParameterOf(_, result) } + + predicate allowParameterReturnInSelf() { allowParameterReturnInSelfCached(this.asNode()) } +} + +private class RetNodeEx extends NodeEx { + RetNodeEx() { this.asNode() instanceof ReturnNodeExt } + + ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) } + + ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() } +} + +private predicate inBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierIn(n) + | + config.isSource(n) or config.isSource(n, _) + ) +} + +private predicate outBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierOut(n) + | + config.isSink(n) or config.isSink(n, _) + ) +} + +/** A bridge class to access the deprecated `isBarrierGuard`. */ +private class BarrierGuardGuardedNodeBridge extends Unit { + abstract predicate guardedNode(Node n, Configuration config); + + abstract predicate guardedNode(Node n, FlowState state, Configuration config); +} + +private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge { + deprecated override predicate guardedNode(Node n, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g) and + n = g.getAGuardedNode() + ) + } + + deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g, state) and + n = g.getAGuardedNode() + ) + } +} + +pragma[nomagic] +private predicate fullBarrier(NodeEx node, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n) + or + config.isBarrierIn(n) and + not config.isSource(n) and + not config.isSource(n, _) + or + config.isBarrierOut(n) and + not config.isSink(n) and + not config.isSink(n, _) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, config) + ) +} + +pragma[nomagic] +private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n, state) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, state, config) + ) +} + +pragma[nomagic] +private predicate sourceNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSource(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSource(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +pragma[nomagic] +private predicate sinkNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSink(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSink(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +/** Provides the relevant barriers for a step from `node1` to `node2`. */ +pragma[inline] +private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) { + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + simpleLocalFlowStepExt(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.asNode() = n and + node2.isImplicitReadNode(n, false) and + not fullBarrier(node1, config) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.isImplicitReadNode(n, true) and + node2.asNode() = n and + not fullBarrier(node2, config) + ) +} + +private predicate additionalLocalStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + jumpStepCached(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate additionalJumpStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +pragma[nomagic] +private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) { + readSet(pragma[only_bind_into](node1.asNode()), c, pragma[only_bind_into](node2.asNode())) and + stepFilter(node1, node2, config) + or + exists(Node n | + node2.isImplicitReadNode(n, true) and + node1.isImplicitReadNode(n, _) and + config.allowImplicitRead(n, c) + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(ContentSet cs | + readSet(node1, cs, node2, config) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate clearsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + clearsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate expectsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + expectsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +pragma[nomagic] +private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) } + +pragma[nomagic] +private predicate store( + NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config +) { + store(pragma[only_bind_into](node1.asNode()), tc, pragma[only_bind_into](node2.asNode()), + contentType) and + read(_, tc.getContent(), _, config) and + stepFilter(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) { + viableReturnPosOut(call, pos, out.asNode()) +} + +pragma[nomagic] +private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) { + viableParamArg(call, p.asNode(), arg.asNode()) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private predicate hasSourceCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSourceCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate hasSinkCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSinkCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private module Stage1 implements StageSig { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { + sourceNode(node, _, config) and + if hasSourceCallCtx(config) then cc = true else cc = false + or + exists(NodeEx mid | fwdFlow(mid, cc, config) | + localFlowStep(mid, node, config) or + additionalLocalFlowStep(mid, node, config) or + additionalLocalStateStep(mid, _, node, _, config) + ) + or + exists(NodeEx mid | fwdFlow(mid, _, config) and cc = false | + jumpStep(mid, node, config) or + additionalJumpStep(mid, node, config) or + additionalJumpStateStep(mid, _, node, _, config) + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) + ) + or + // read + exists(ContentSet c | + fwdFlowReadSet(c, node, cc, config) and + fwdFlowConsCandSet(c, _, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true and + not fullBarrier(node, config) + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + } + + private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) { + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + readSet(mid, c, node, config) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `cs` may be interpreted in a read as the target of some store + * into `c`, in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) { + fwdFlowConsCand(c, config) and + c = cs.getAReadContent() + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(RetNodeEx ret | + fwdFlow(ret, cc, config) and + ret.getReturnPosition() = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOutEx(call, pos, out) and + not fullBarrier(out, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNodeEx arg | + fwdFlow(arg, cc, config) and + viableParamArgEx(call, _, arg) + ) + } + + private predicate stateStepFwd(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1 | + additionalLocalStateStep(node1, state1, _, state2, config) or + additionalJumpStateStep(node1, state1, _, state2, config) + | + fwdFlow(node1, config) + ) + } + + private predicate fwdFlowState(FlowState state, Configuration config) { + sourceNode(_, state, config) + or + exists(FlowState state0 | + fwdFlowState(state0, config) and + stateStepFwd(state0, state, config) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { + exists(FlowState state | + fwdFlow(node, pragma[only_bind_into](config)) and + sinkNode(node, state, config) and + fwdFlowState(state, pragma[only_bind_into](config)) and + if hasSinkCallCtx(config) then toReturn = true else toReturn = false + ) + or + exists(NodeEx mid | revFlow(mid, toReturn, config) | + localFlowStep(node, mid, config) or + additionalLocalFlowStep(node, mid, config) or + additionalLocalStateStep(node, _, mid, _, config) + ) + or + exists(NodeEx mid | revFlow(mid, _, config) and toReturn = false | + jumpStep(node, mid, config) or + additionalJumpStep(node, mid, config) or + additionalJumpStateStep(node, _, mid, _, config) + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(NodeEx mid, ContentSet c | + readSet(node, c, mid, config) and + fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + node.(RetNodeEx).getReturnPosition() = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, ContentSet cs | + fwdFlow(node, pragma[only_bind_into](config)) and + readSet(node, cs, mid, config) and + fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) { + exists(NodeEx mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + pragma[nomagic] + predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOutEx(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, NodeEx out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config + ) { + viableParamArgEx(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config + ) { + exists(ParamNodeEx p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(NodeEx out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + private predicate stateStepRev(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + additionalLocalStateStep(node1, state1, node2, state2, config) or + additionalJumpStateStep(node1, state1, node2, state2, config) + | + revFlow(node1, _, pragma[only_bind_into](config)) and + revFlow(node2, _, pragma[only_bind_into](config)) and + fwdFlowState(state1, pragma[only_bind_into](config)) and + fwdFlowState(state2, pragma[only_bind_into](config)) + ) + } + + predicate revFlowState(FlowState state, Configuration config) { + exists(NodeEx node | + sinkNode(node, state, config) and + revFlow(node, _, pragma[only_bind_into](config)) and + fwdFlowState(state, pragma[only_bind_into](config)) + ) + or + exists(FlowState state0 | + revFlowState(state0, config) and + stateStepRev(state, state0, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType, config) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + read(n1, c, n2, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) } + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, _, pragma[only_bind_into](config)) and + exists(state) and + exists(ap) + } + + private predicate throughFlowNodeCand(NodeEx node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(RetNodeEx ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + pragma[nomagic] + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(ArgNodeEx arg, boolean toReturn | + revFlow(arg, toReturn, config) and + revFlowInToReturn(call, arg, config) and + revFlowIsReturned(call, toReturn, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | fwdFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | revFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config +) { + viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(NodeEx n1, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(NodeEx n2, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private signature module StageSig { + class Ap; + + predicate revFlow(NodeEx node, Configuration config); + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config); + + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config); + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config); + + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ); + + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config); +} + +private module MkStage { + class ApApprox = PrevStage::Ap; + + signature module StageParam { + class Ap; + + class ApNil extends Ap; + + bindingset[result, ap] + ApApprox getApprox(Ap ap); + + ApNil getApNil(NodeEx node); + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail); + + Content getHeadContent(Ap ap); + + class ApOption; + + ApOption apNone(); + + ApOption apSome(Ap ap); + + class Cc; + + class CcCall extends Cc; + + // TODO: member predicate on CcCall + predicate matchesCall(CcCall cc, DataFlowCall call); + + class CcNoCall extends Cc; + + Cc ccNone(); + + CcCall ccSomeCall(); + + class LocalCc; + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc); + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc); + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc); + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ); + + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ); + + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ); + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config); + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType); + } + + module Stage implements StageSig { + import Param + + /* Begin: Stage logic. */ + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + pragma[only_bind_out](apa) = pragma[only_bind_out](result) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) and + matchesCall(ccc, call) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + fwdFlow0(node, state, cc, argAp, ap, config) and + PrevStage::revFlow(node, state, unbindApa(getApprox(ap)), config) and + filter(node, state, ap, config) + } + + pragma[nomagic] + private predicate fwdFlow0( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + sourceNode(node, state, config) and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc | + fwdFlow(mid, state0, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc) + | + localStep(mid, state0, node, state, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, state0, node, state, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, state, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(mid, state0, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStateStep(mid, state0, node, state, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, state, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, state, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, state, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, state, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, state, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, state, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + fwdFlow(node1, state, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, Cc innercc, ApOption argAp, + Ap ap, Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, state, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, FlowState state, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, state, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, FlowState state, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, state, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, _, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, state, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, state, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate returnNodeMayFlowThrough( + RetNodeEx ret, FlowState state, Ap ap, Configuration config + ) { + fwdFlow(ret, state, any(CcCall ccc), apSome(_), ap, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow0(node, state, toReturn, returnAp, ap, config) and + fwdFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, state, _, _, ap, config) and + sinkNode(node, state, config) and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid, FlowState state0 | + localStep(node, state, mid, state0, true, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + localStep(node, pragma[only_bind_into](state), mid, state0, false, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, state, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), state, _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStateStep(node, state, mid, state0, config) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, nil, + pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, state, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, state, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, state, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, state, _, _, ap, config) and + toReturn = true and + if returnNodeMayFlowThrough(node, state, ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, FlowState state, TypedContent tc, NodeEx mid, + boolean toReturn, ApOption returnAp, Configuration config + ) { + revFlow(mid, state, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, state, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, FlowState state, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, FlowState state, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, FlowState state, CcCall ccc | + revFlowOut(call, ret, state, toReturn, returnAp, ap, config) and + fwdFlow(ret, state, ccc, apSome(_), ap, config) and + matchesCall(ccc, call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, FlowState state, Configuration config) { + revFlow(node, state, _, _, _, config) + } + + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, ap, config) + } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + private predicate validAp(Ap ap, Configuration config) { + revFlow(_, _, _, _, ap, config) and ap instanceof ApNil + or + exists(TypedContent head, Ap tail | + consCand(head, tail, config) and + ap = apCons(head, tail) + ) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + revConsCand(tc, ap, config) and + validAp(ap, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, _, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, FlowState state, Ap ap0, ReturnKindExt kind, ParameterPosition pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), pragma[only_bind_into](state), true, apSome(_), + pragma[only_bind_into](ap0), pragma[only_bind_into](config)) and + fwdFlow(ret, state, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists( + Ap returnAp0, ArgNodeEx arg, FlowState state, boolean toReturn, ApOption returnAp, Ap ap + | + revFlow(arg, state, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + states = count(FlowState state | fwdFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(n, state, cc, argAp, ap, config) + ) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + states = count(FlowState state | revFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, boolean b, ApOption retAp, Ap ap | + revFlow(n, state, b, retAp, ap, config) + ) + } + /* End: Stage logic. */ + } +} + +private module BooleanCallContext { + class Cc extends boolean { + Cc() { this in [true, false] } + } + + class CcCall extends Cc { + CcCall() { this = true } + } + + /** Holds if the call context may be `call`. */ + predicate matchesCall(CcCall cc, DataFlowCall call) { any() } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + CcCall ccSomeCall() { result = true } + + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() } +} + +private module Level1CallContext { + class Cc = CallContext; + + class CcCall = CallContextCall; + + pragma[inline] + predicate matchesCall(CcCall cc, DataFlowCall call) { cc.matchesCall(call) } + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + + module NoLocalCallContext { + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSiteDispatch(call, c) + then result = TSpecificCall(call) + else result = TSomeCall() + } + } + + module LocalCallContext { + class LocalCc = LocalCallContext; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { + result = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + node.getEnclosingCallable()) + } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } +} + +private module Stage2Param implements MkStage::StageParam { + private module PrevStage = Stage1; + + class Ap extends boolean { + Ap() { this in [true, false] } + } + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + PrevStage::Ap getApprox(Ap ap) { any() } + + ApNil getApNil(NodeEx node) { Stage1::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + import Level1CallContext + import NoLocalCallContext + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalStateStep(node1, state1, node2, state2, config) + ) and + exists(ap) and + exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + predicate flowIntoCall = flowIntoCallNodeCand1/5; + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + expectsContentEx(node, c) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + PrevStage::revFlowState(state, pragma[only_bind_into](config)) and + exists(ap) and + not stateBarrier(node, state, config) and + ( + notExpectsContent(node) + or + ap = true and + expectsContentCand(node, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage2 implements StageSig { + import MkStage::Stage +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends NodeEx { + FlowCheckNode() { + castNode(this.asNode()) or + clearsContentCached(this.asNode(), _) or + expectsContentCached(this.asNode(), _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) { + Stage2::revFlow(node, state, config) and + ( + sourceNode(node, state, config) + or + jumpStep(_, node, config) + or + additionalJumpStep(_, node, config) + or + additionalJumpStateStep(_, _, node, state, config) + or + node instanceof ParamNodeEx + or + node.asNode() instanceof OutNodeExt + or + Stage2::storeStepCand(_, _, _, node, _, config) + or + Stage2::readStepCand(_, _, node, config) + or + node instanceof FlowCheckNode + or + exists(FlowState s | + additionalLocalStateStep(_, s, node, state, config) and + s != state + ) + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(NodeEx node, FlowState state, Configuration config) { + exists(NodeEx next | Stage2::revFlow(next, state, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + Stage2::storeStepCand(node, _, _, next, _, config) or + Stage2::readStepCand(node, _, next, config) + ) + or + exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) | + additionalJumpStateStep(node, state, next, s, config) + or + additionalLocalStateStep(node, state, next, s, config) and + s != state + ) + or + Stage2::revFlow(node, state, config) and + node instanceof FlowCheckNode + or + sinkNode(node, state, config) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, Configuration config + ) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 and + Stage2::revFlow(node1, pragma[only_bind_into](state1), false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, pragma[only_bind_into](state2), false, + pragma[only_bind_into](config)) + or + additionalLocalStateStep(node1, state1, node2, state2, config) and + Stage2::revFlow(node1, state1, false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, state2, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t, + Configuration config, LocalCallContext cc + ) { + not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = node1.getDataFlowType() and // irrelevant dummy value + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + or + additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and + preservesValue = false and + t = node2.getDataFlowType() + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) + or + exists(NodeEx mid | + localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t, + pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + ) + or + exists(NodeEx mid | + localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = node2.getDataFlowType() + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + AccessPathFrontNil apf, Configuration config, LocalCallContext callContext + ) { + localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, state1, config) and + state1 = state2 + or + additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and + state1 != state2 and + preservesValue = false and + apf = TFrontNil(node2.getDataFlowType()) and + callContext.relevantFor(node1.getEnclosingCallable()) and + not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() | + isUnreachableInCallCached(node1.asNode(), call) or + isUnreachableInCallCached(node2.asNode(), call) + ) + } +} + +private import LocalFlowBigStep + +private module Stage3Param implements MkStage::StageParam { + private module PrevStage = Stage2; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + import BooleanCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap, config, _) and exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + predicate flowIntoCall = flowIntoCallNodeCand2/5; + + pragma[nomagic] + private predicate clearSet(NodeEx node, ContentSet c, Configuration config) { + PrevStage::revFlow(node, config) and + clearsContentCached(node.asNode(), c) + } + + pragma[nomagic] + private predicate clearContent(NodeEx node, Content c, Configuration config) { + exists(ContentSet cs | + PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and + c = cs.getAReadContent() and + clearSet(node, cs, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate clear(NodeEx node, Ap ap, Configuration config) { + clearContent(node, ap.getHead().getContent(), config) + } + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and + expectsContentEx(node, c) and + c = ap.getHead().getContent() + ) + } + + pragma[nomagic] + private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + exists(state) and + exists(config) and + not clear(node, ap, config) and + (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and + ( + notExpectsContent(node) + or + expectsContentCand(node, ap, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } +} + +private module Stage3 implements StageSig { + import MkStage::Stage +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx( + NodeEx node, FlowState state, AccessPathFront argApf, Configuration config +) { + exists(AccessPathFront apf | + Stage3::revFlow(node, state, true, _, apf, config) and + Stage3::fwdFlow(node, state, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(NodeEx n, FlowState state | + Stage3::revFlow(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4Param implements MkStage::StageParam { + private module PrevStage = Stage3; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.getFront() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + import Level1CallContext + import LocalCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config + ) { + exists(FlowState state | + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config + ) { + exists(FlowState state | + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage4 = MkStage::Stage; + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +pragma[nomagic] +private predicate nodeMayUseSummary0( + NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, _, _) and + Stage4::revFlow(n, state, true, _, apa0, config) and + Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +pragma[nomagic] +private predicate nodeMayUseSummary( + NodeEx n, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(DataFlowCallable c | + Stage4::parameterMayFlowThrough(_, c, apa, config) and + nodeMayUseSummary0(n, c, state, apa, config) + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) { + exists(Configuration config | + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), config) and + Stage4::revFlow(p, state, _, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNodeEx p; + private FlowState s; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, s, ap) } + + ParameterPosition getParameterPos() { p.isParameterOf(_, result) } + + ParamNodeEx getParamNode() { result = p } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(NodeEx n, FlowState state | + Stage4::revFlow(n, state, apa, config) or nodeMayUseSummary(n, state, apa, config) + ) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid( + NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config + ) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, state, config) and + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, state, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, state, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(NodeEx node, FlowState state, Configuration config) { + exists(PathNodeMid sink | + sink.isAtSink() and + node = sink.getNodeEx() and + state = sink.getState() and + config = sink.getConfiguration() + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + this.length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result } + + /** Gets the `FlowState` of this node. */ + FlowState getState() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getANonHiddenSuccessor() and + reach(this) and + reach(result) + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNodeImpl getASuccessorImpl(); + + private PathNodeImpl getASuccessorIfHidden() { + this.isHidden() and + result = this.getASuccessorImpl() + } + + final PathNodeImpl getANonHiddenSuccessor() { + result = this.getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + abstract NodeEx getNodeEx(); + + predicate isHidden() { + not this.getConfiguration().includeHiddenNodes() and + ( + hiddenNode(this.getNodeEx().asNode()) and + not this.isSource() and + not this instanceof PathNodeSink + or + this.getNodeEx() instanceof TNodeImplicitRead + ) + } + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + override string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate directReach(PathNodeImpl n) { + n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor()) +} + +/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */ +private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) } + +/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNodeImpl n1, PathNode n2) { + n1.getANonHiddenSuccessor() = n2 and directReach(n2) +} + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) { + Subpaths::subpaths(arg, par, ret, out) and + reach(arg) and + reach(par) and + reach(ret) and + reach(out) + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + NodeEx node; + FlowState state; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, state, cc, sc, ap, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = this.getSuccMid() + or + // a final step to a sink + result = this.getSuccMid().projectToSink() + } + + override predicate isSource() { + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + } + + predicate isAtSink() { + sinkNode(node, state, config) and + ap instanceof AccessPathNil and + if hasSinkCallCtx(config) + then + // For `FeatureHasSinkCallContext` the condition `cc instanceof CallContextNoCall` + // is exactly what we need to check. This also implies + // `sc instanceof SummaryCtxNone`. + // For `FeatureEqualSourceSinkCallContext` the initial call context was + // set to `CallContextSomeCall` and jumps are disallowed, so + // `cc instanceof CallContextNoCall` never holds. On the other hand, + // in this case there's never any need to enter a call except to identify + // a summary, so the condition in `pathIntoCallable` enforces this, which + // means that `sc instanceof SummaryCtxNone` holds if and only if we are + // in the call context of the source. + sc instanceof SummaryCtxNone or + cc instanceof CallContextNoCall + else any() + } + + PathNodeSink projectToSink() { + this.isAtSink() and + result.getNodeEx() = node and + result.getState() = state and + result.getConfiguration() = unbindConf(config) + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + NodeEx node; + FlowState state; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, state, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + override Configuration getConfiguration() { result = config } + + override PathNodeImpl getASuccessorImpl() { none() } + + override predicate isSource() { sourceNode(node, state, config) } +} + +private predicate pathNode( + PathNodeMid mid, NodeEx midnode, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, + Configuration conf, LocalCallContext localCC +) { + midnode = mid.getNodeEx() and + state = mid.getState() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + midnode.getEnclosingCallable()) and + ap = mid.getAp() +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +pragma[nomagic] +private predicate pathStep( + PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap +) { + exists(NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC | + pathNode(mid, midnode, state0, cc, sc, ap, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, true, _, conf, localCC) + ) + or + exists( + AccessPath ap0, NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC + | + pathNode(mid, midnode, state0, cc, sc, ap0, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + exists(TypedContent tc | pathStoreStep(mid, node, state, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, state, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, state, _, cc, sc, _, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, state, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, state, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, FlowState state, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPathApprox apa, Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, state, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private NodeEx getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result.asNode() = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, FlowState state, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, state, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, ParameterPosition ppos, FlowState state, CallContext cc, DataFlowCall call, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(ArgNodeEx arg, ArgumentPosition apos | + pathNode(mid, arg, state, cc, _, ap, config, _) and + arg.asNode().(ArgNode).argumentOf(call, apos) and + apa = ap.getApprox() and + parameterMatch(ppos, apos) + ) +} + +pragma[nomagic] +private predicate parameterCand( + DataFlowCallable callable, ParameterPosition pos, AccessPathApprox apa, Configuration config +) { + exists(ParamNodeEx p | + Stage4::revFlow(p, _, apa, config) and + p.isParameterOf(callable, pos) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, AccessPath ap, Configuration config +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, pragma[only_bind_into](pos), state, outercc, call, ap, + pragma[only_bind_into](apa), pragma[only_bind_into](config)) and + callable = resolveCall(call, outercc) and + parameterCand(callable, pragma[only_bind_into](pos), pragma[only_bind_into](apa), + pragma[only_bind_into](config)) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +pragma[nomagic] +private predicate pathIntoCallable( + PathNodeMid mid, ParamNodeEx p, FlowState state, CallContext outercc, CallContextCall innercc, + SummaryCtx sc, DataFlowCall call, Configuration config +) { + exists(ParameterPosition pos, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + ( + sc = TSummaryCtxSome(p, state, ap) + or + not exists(TSummaryCtxSome(p, state, ap)) and + sc = TSummaryCtxNone() and + // When the call contexts of source and sink needs to match then there's + // never any reason to enter a callable except to find a summary. See also + // the comment in `PathNodeMid::isAtSink`. + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, FlowState state, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, + AccessPathApprox apa, Configuration config +) { + exists(PathNodeMid mid, RetNodeEx ret, ParameterPosition pos | + pathNode(mid, ret, state, cc, sc, ap, config, _) and + kind = ret.getKind() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + sc.getParamNode().allowParameterReturnInSelf() + ) + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, _, cc, innercc, sc, call, config) and + paramFlowsThrough(kind, state, innercc, sc, ap, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable( + PathNodeMid mid, NodeEx out, FlowState state, CallContext cc, AccessPath ap +) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa, Configuration config | + pathThroughCallable0(call, mid, kind, state, cc, ap, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +private module Subpaths { + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths01( + PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + exists(Configuration config | + pathThroughCallable(arg, out, pragma[only_bind_into](sout), _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, _, innercc, sc, _, config) and + paramFlowsThrough(kind, pragma[only_bind_into](sout), innercc, sc, + pragma[only_bind_into](apout), _, unbindConf(config)) and + not arg.isHidden() + ) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `sout`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths02( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and + out.asNode() = kind.getAnOutNode(_) + } + + pragma[nomagic] + private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple. + */ + pragma[nomagic] + private predicate subpaths03( + PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout + ) { + exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode | + subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and + pathNode(ret, retnode, sout, innercc, sc, apout, unbindConf(getPathNodeConf(arg)), _) and + kind = retnode.getKind() + ) + } + + private PathNodeImpl localStepToHidden(PathNodeImpl n) { + n.getASuccessorImpl() = result and + result.isHidden() and + exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() | + localFlowBigStep(n1, _, n2, _, _, _, _, _) or + store(n1, _, n2, _, _) or + readSet(n1, _, n2, _) + ) + } + + pragma[nomagic] + private predicate hasSuccessor(PathNodeImpl pred, PathNodeMid succ, NodeEx succNode) { + succ = pred.getANonHiddenSuccessor() and + succNode = succ.getNodeEx() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) { + exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 | + pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and + subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and + hasSuccessor(pragma[only_bind_into](arg), par, p) and + not ret.isHidden() and + pathNode(out0, o, sout, _, _, apout, _, _) + | + out = out0 or out = out0.projectToSink() + ) + } + + /** + * Holds if `n` can reach a return node in a summarized subpath that can reach a sink. + */ + predicate retReach(PathNodeImpl n) { + exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out)) + or + exists(PathNodeImpl mid | + retReach(mid) and + n.getANonHiddenSuccessor() = mid and + not subpaths(_, mid, _, _) + ) + } +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.(PathNodeImpl).getNodeEx().asNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNodeEx().asNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples +) { + fwd = true and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int states, int tuples, + Configuration config +) { + stage = "1 Fwd" and + n = 10 and + Stage1::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "1 Rev" and + n = 15 and + Stage1::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Fwd" and + n = 20 and + Stage2::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Rev" and + n = 25 and + Stage2::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Fwd" and + n = 30 and + Stage3::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Rev" and + n = 35 and + Stage3::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Fwd" and + n = 40 and + Stage4::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Rev" and + n = 45 and + Stage4::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, states, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, states, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + additionalJumpStateStep(node1, _, node2, _, config) + or + // flow into callable + viableParamArgEx(_, node2, node1) + or + // flow out of a callable + viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) or config.isSource(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) or config.isSink(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + (config.isSource(n) or config.isSource(n, _)) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + (config.isSink(n) or config.isSink(n, _)) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private predicate relevantState(FlowState state) { + sourceNode(_, state, _) or + sinkNode(_, state, _) or + additionalLocalStateStep(_, state, _, _, _) or + additionalLocalStateStep(_, _, _, state, _) or + additionalJumpStateStep(_, state, _, _, _) or + additionalJumpStateStep(_, _, _, state, _) + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNodeEx p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TSummaryCtx3 = + TSummaryCtx3None() or + TSummaryCtx3Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TRevSummaryCtx3 = + TRevSummaryCtx3None() or + TRevSummaryCtx3Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = TPartialNil(node.getDataFlowType()) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, state, cc, sc1, sc2, sc3, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, TRevSummaryCtx3 sc3, + RevPartialAccessPath ap, Configuration config + ) { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and + not clearsContentEx(node, ap.getHead()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead()) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + not clearsContentEx(node, ap.getHead().getContent()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead().getContent()) + ) and + if node.asNode() instanceof CastingNode + then compatibleTypes(node.getDataFlowType(), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.getNodeEx().projectToNode() = result } + + FlowState getState() { none() } + + private NodeEx getNodeEx() { + result = this.(PartialPathNodeFwd).getNodeEx() or + result = this.(PartialPathNodeRev).getNodeEx() + } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + NodeEx node; + FlowState state; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + TSummaryCtx3 sc3; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, state, cc, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TSummaryCtx3 getSummaryCtx3() { result = sc3 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx1(), result.getSummaryCtx2(), result.getSummaryCtx3(), result.getAp(), + result.getConfiguration()) + } + + predicate isSource() { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + NodeEx node; + FlowState state; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + TRevSummaryCtx3 sc3; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, state, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TRevSummaryCtx3 getSummaryCtx3() { result = sc3 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNodeEx(), this.getState(), this.getSummaryCtx1(), + this.getSummaryCtx2(), this.getSummaryCtx3(), this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, + TSummaryCtx2 sc2, TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalLocalStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsFwd(ap, tc, ap0, config) + ) + or + partialPathIntoCallable(mid, node, state, _, cc, sc1, sc2, sc3, _, ap, config) + or + partialPathOutOfCallable(mid, node, state, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() + or + partialPathThroughCallable(mid, node, state, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + bindingset[result, i] + private int unbindInt(int i) { pragma[only_bind_out](i) = pragma[only_bind_out](result) } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node, + PartialAccessPath ap2 + ) { + exists(NodeEx midNode, DataFlowType contentType | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType, mid.getConfiguration()) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc, + Configuration config + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and + ap.getHead() = tc and + pragma[only_bind_into](config) = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, FlowState state, CallContext innercc, + PartialAccessPath ap, Configuration config + ) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, state, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, state, cc, ap, config) + | + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, ParameterPosition ppos, FlowState state, CallContext cc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ArgNode arg, ArgumentPosition apos | + arg = mid.getNodeEx().asNode() and + state = mid.getState() and + cc = mid.getCallContext() and + arg.argumentOf(call, apos) and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, pos, state, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNodeEx p, FlowState state, CallContext outercc, + CallContextCall innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ParameterPosition pos, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(state) and + sc3 = TSummaryCtx3Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, FlowState state, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, RetNodeEx ret | + mid.getNodeEx() = ret and + kind = ret.getKind() and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3 | + partialPathIntoCallable(mid, _, _, cc, innercc, sc1, sc2, sc3, call, _, config) and + paramFlowsThroughInPartialPath(kind, state, innercc, sc1, sc2, sc3, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, state, cc, ap, config) and + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[nomagic] + private predicate revPartialPathStep( + PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalLocalStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalJumpStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNodeEx p | + mid.getNodeEx() = p and + viableParamArgEx(_, p, node) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, state, sc1, sc2, sc3, _, ap, config) and + pos = getReturnPosition(node.asNode()) + ) + or + revPartialPathThroughCallable(mid, node, state, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node, + RevPartialAccessPath ap2 + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + read(node, c, midNode, mid.getConfiguration()) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config + ) { + exists(NodeEx midNode, TypedContent tc | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + store(node, tc, midNode, _, config) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, FlowState state, TRevSummaryCtx1Some sc1, + TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3, DataFlowCall call, RevPartialAccessPath ap, + Configuration config + ) { + exists(NodeEx out | + mid.getNodeEx() = out and + mid.getState() = state and + viableReturnPosOutEx(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(state) and + sc3 = TRevSummaryCtx3Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + ArgumentPosition apos, FlowState state, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + TRevSummaryCtx3Some sc3, RevPartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNodeEx p, ParameterPosition ppos | + mid.getNodeEx() = p and + mid.getState() = state and + p.getPosition() = ppos and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, ArgumentPosition pos, FlowState state, + RevPartialAccessPath ap, Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3 | + revPartialPathIntoReturn(mid, _, _, sc1, sc2, sc3, call, _, config) and + revPartialPathFlowsThrough(pos, state, sc1, sc2, sc3, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNodeEx node, FlowState state, RevPartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ArgumentPosition pos | + revPartialPathThroughCallable0(call, mid, pos, state, ap, config) and + node.asNode().(ArgNode).argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll new file mode 100644 index 000000000000..468f8640a784 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl3.qll @@ -0,0 +1,4450 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public +import DataFlowImplCommonPublic + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + predicate isSource(Node source) { none() } + + /** + * Holds if `source` is a relevant data flow source with the given initial + * `state`. + */ + predicate isSource(Node source, FlowState state) { none() } + + /** + * Holds if `sink` is a relevant data flow sink. + */ + predicate isSink(Node sink) { none() } + + /** + * Holds if `sink` is a relevant data flow sink accepting `state`. + */ + predicate isSink(Node source, FlowState state) { none() } + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** + * Holds if data flow through `node` is prohibited when the flow state is + * `state`. + */ + predicate isBarrier(Node node, FlowState state) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited. + */ + deprecated predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited when + * the flow state is `state` + */ + deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) { + none() + } + + /** + * Holds if an arbitrary number of implicit read steps of content `c` may be + * taken at `node`. + */ + predicate allowImplicitRead(Node node, ContentSet c) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { this.hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if hidden nodes should be included in the data flow graph. + * + * This feature should only be used for debugging or when the data flow graph + * is not visualized (for example in a `path-problem` query). + */ + predicate includeHiddenNodes() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSource(n, _)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n | this.isSink(n, _)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0 + or + super.hasFlow(source, sink) + } +} + +private newtype TNodeEx = + TNodeNormal(Node n) or + TNodeImplicitRead(Node n, boolean hasRead) { + any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true] + } + +private class NodeEx extends TNodeEx { + string toString() { + result = this.asNode().toString() + or + exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]") + } + + Node asNode() { this = TNodeNormal(result) } + + predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) } + + Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallable0() { + nodeEnclosingCallable(this.projectToNode(), result) + } + + pragma[inline] + DataFlowCallable getEnclosingCallable() { + pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result) + } + + pragma[nomagic] + private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) } + + pragma[inline] + DataFlowType getDataFlowType() { + pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result) + } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private class ArgNodeEx extends NodeEx { + ArgNodeEx() { this.asNode() instanceof ArgNode } +} + +private class ParamNodeEx extends NodeEx { + ParamNodeEx() { this.asNode() instanceof ParamNode } + + predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { + this.asNode().(ParamNode).isParameterOf(c, pos) + } + + ParameterPosition getPosition() { this.isParameterOf(_, result) } + + predicate allowParameterReturnInSelf() { allowParameterReturnInSelfCached(this.asNode()) } +} + +private class RetNodeEx extends NodeEx { + RetNodeEx() { this.asNode() instanceof ReturnNodeExt } + + ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) } + + ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() } +} + +private predicate inBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierIn(n) + | + config.isSource(n) or config.isSource(n, _) + ) +} + +private predicate outBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierOut(n) + | + config.isSink(n) or config.isSink(n, _) + ) +} + +/** A bridge class to access the deprecated `isBarrierGuard`. */ +private class BarrierGuardGuardedNodeBridge extends Unit { + abstract predicate guardedNode(Node n, Configuration config); + + abstract predicate guardedNode(Node n, FlowState state, Configuration config); +} + +private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge { + deprecated override predicate guardedNode(Node n, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g) and + n = g.getAGuardedNode() + ) + } + + deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g, state) and + n = g.getAGuardedNode() + ) + } +} + +pragma[nomagic] +private predicate fullBarrier(NodeEx node, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n) + or + config.isBarrierIn(n) and + not config.isSource(n) and + not config.isSource(n, _) + or + config.isBarrierOut(n) and + not config.isSink(n) and + not config.isSink(n, _) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, config) + ) +} + +pragma[nomagic] +private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n, state) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, state, config) + ) +} + +pragma[nomagic] +private predicate sourceNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSource(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSource(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +pragma[nomagic] +private predicate sinkNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSink(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSink(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +/** Provides the relevant barriers for a step from `node1` to `node2`. */ +pragma[inline] +private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) { + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + simpleLocalFlowStepExt(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.asNode() = n and + node2.isImplicitReadNode(n, false) and + not fullBarrier(node1, config) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.isImplicitReadNode(n, true) and + node2.asNode() = n and + not fullBarrier(node2, config) + ) +} + +private predicate additionalLocalStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + jumpStepCached(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate additionalJumpStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +pragma[nomagic] +private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) { + readSet(pragma[only_bind_into](node1.asNode()), c, pragma[only_bind_into](node2.asNode())) and + stepFilter(node1, node2, config) + or + exists(Node n | + node2.isImplicitReadNode(n, true) and + node1.isImplicitReadNode(n, _) and + config.allowImplicitRead(n, c) + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(ContentSet cs | + readSet(node1, cs, node2, config) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate clearsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + clearsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate expectsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + expectsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +pragma[nomagic] +private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) } + +pragma[nomagic] +private predicate store( + NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config +) { + store(pragma[only_bind_into](node1.asNode()), tc, pragma[only_bind_into](node2.asNode()), + contentType) and + read(_, tc.getContent(), _, config) and + stepFilter(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) { + viableReturnPosOut(call, pos, out.asNode()) +} + +pragma[nomagic] +private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) { + viableParamArg(call, p.asNode(), arg.asNode()) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private predicate hasSourceCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSourceCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate hasSinkCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSinkCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private module Stage1 implements StageSig { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { + sourceNode(node, _, config) and + if hasSourceCallCtx(config) then cc = true else cc = false + or + exists(NodeEx mid | fwdFlow(mid, cc, config) | + localFlowStep(mid, node, config) or + additionalLocalFlowStep(mid, node, config) or + additionalLocalStateStep(mid, _, node, _, config) + ) + or + exists(NodeEx mid | fwdFlow(mid, _, config) and cc = false | + jumpStep(mid, node, config) or + additionalJumpStep(mid, node, config) or + additionalJumpStateStep(mid, _, node, _, config) + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) + ) + or + // read + exists(ContentSet c | + fwdFlowReadSet(c, node, cc, config) and + fwdFlowConsCandSet(c, _, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true and + not fullBarrier(node, config) + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + } + + private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) { + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + readSet(mid, c, node, config) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `cs` may be interpreted in a read as the target of some store + * into `c`, in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) { + fwdFlowConsCand(c, config) and + c = cs.getAReadContent() + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(RetNodeEx ret | + fwdFlow(ret, cc, config) and + ret.getReturnPosition() = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOutEx(call, pos, out) and + not fullBarrier(out, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNodeEx arg | + fwdFlow(arg, cc, config) and + viableParamArgEx(call, _, arg) + ) + } + + private predicate stateStepFwd(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1 | + additionalLocalStateStep(node1, state1, _, state2, config) or + additionalJumpStateStep(node1, state1, _, state2, config) + | + fwdFlow(node1, config) + ) + } + + private predicate fwdFlowState(FlowState state, Configuration config) { + sourceNode(_, state, config) + or + exists(FlowState state0 | + fwdFlowState(state0, config) and + stateStepFwd(state0, state, config) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { + exists(FlowState state | + fwdFlow(node, pragma[only_bind_into](config)) and + sinkNode(node, state, config) and + fwdFlowState(state, pragma[only_bind_into](config)) and + if hasSinkCallCtx(config) then toReturn = true else toReturn = false + ) + or + exists(NodeEx mid | revFlow(mid, toReturn, config) | + localFlowStep(node, mid, config) or + additionalLocalFlowStep(node, mid, config) or + additionalLocalStateStep(node, _, mid, _, config) + ) + or + exists(NodeEx mid | revFlow(mid, _, config) and toReturn = false | + jumpStep(node, mid, config) or + additionalJumpStep(node, mid, config) or + additionalJumpStateStep(node, _, mid, _, config) + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(NodeEx mid, ContentSet c | + readSet(node, c, mid, config) and + fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + node.(RetNodeEx).getReturnPosition() = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, ContentSet cs | + fwdFlow(node, pragma[only_bind_into](config)) and + readSet(node, cs, mid, config) and + fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) { + exists(NodeEx mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + pragma[nomagic] + predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOutEx(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, NodeEx out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config + ) { + viableParamArgEx(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config + ) { + exists(ParamNodeEx p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(NodeEx out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + private predicate stateStepRev(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + additionalLocalStateStep(node1, state1, node2, state2, config) or + additionalJumpStateStep(node1, state1, node2, state2, config) + | + revFlow(node1, _, pragma[only_bind_into](config)) and + revFlow(node2, _, pragma[only_bind_into](config)) and + fwdFlowState(state1, pragma[only_bind_into](config)) and + fwdFlowState(state2, pragma[only_bind_into](config)) + ) + } + + predicate revFlowState(FlowState state, Configuration config) { + exists(NodeEx node | + sinkNode(node, state, config) and + revFlow(node, _, pragma[only_bind_into](config)) and + fwdFlowState(state, pragma[only_bind_into](config)) + ) + or + exists(FlowState state0 | + revFlowState(state0, config) and + stateStepRev(state, state0, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType, config) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + read(n1, c, n2, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) } + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, _, pragma[only_bind_into](config)) and + exists(state) and + exists(ap) + } + + private predicate throughFlowNodeCand(NodeEx node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(RetNodeEx ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + pragma[nomagic] + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(ArgNodeEx arg, boolean toReturn | + revFlow(arg, toReturn, config) and + revFlowInToReturn(call, arg, config) and + revFlowIsReturned(call, toReturn, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | fwdFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | revFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config +) { + viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(NodeEx n1, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(NodeEx n2, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private signature module StageSig { + class Ap; + + predicate revFlow(NodeEx node, Configuration config); + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config); + + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config); + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config); + + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ); + + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config); +} + +private module MkStage { + class ApApprox = PrevStage::Ap; + + signature module StageParam { + class Ap; + + class ApNil extends Ap; + + bindingset[result, ap] + ApApprox getApprox(Ap ap); + + ApNil getApNil(NodeEx node); + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail); + + Content getHeadContent(Ap ap); + + class ApOption; + + ApOption apNone(); + + ApOption apSome(Ap ap); + + class Cc; + + class CcCall extends Cc; + + // TODO: member predicate on CcCall + predicate matchesCall(CcCall cc, DataFlowCall call); + + class CcNoCall extends Cc; + + Cc ccNone(); + + CcCall ccSomeCall(); + + class LocalCc; + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc); + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc); + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc); + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ); + + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ); + + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ); + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config); + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType); + } + + module Stage implements StageSig { + import Param + + /* Begin: Stage logic. */ + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + pragma[only_bind_out](apa) = pragma[only_bind_out](result) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) and + matchesCall(ccc, call) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + fwdFlow0(node, state, cc, argAp, ap, config) and + PrevStage::revFlow(node, state, unbindApa(getApprox(ap)), config) and + filter(node, state, ap, config) + } + + pragma[nomagic] + private predicate fwdFlow0( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + sourceNode(node, state, config) and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc | + fwdFlow(mid, state0, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc) + | + localStep(mid, state0, node, state, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, state0, node, state, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, state, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(mid, state0, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStateStep(mid, state0, node, state, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, state, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, state, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, state, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, state, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, state, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, state, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + fwdFlow(node1, state, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, Cc innercc, ApOption argAp, + Ap ap, Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, state, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, FlowState state, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, state, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, FlowState state, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, state, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, _, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, state, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, state, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate returnNodeMayFlowThrough( + RetNodeEx ret, FlowState state, Ap ap, Configuration config + ) { + fwdFlow(ret, state, any(CcCall ccc), apSome(_), ap, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow0(node, state, toReturn, returnAp, ap, config) and + fwdFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, state, _, _, ap, config) and + sinkNode(node, state, config) and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid, FlowState state0 | + localStep(node, state, mid, state0, true, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + localStep(node, pragma[only_bind_into](state), mid, state0, false, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, state, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), state, _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStateStep(node, state, mid, state0, config) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, nil, + pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, state, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, state, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, state, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, state, _, _, ap, config) and + toReturn = true and + if returnNodeMayFlowThrough(node, state, ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, FlowState state, TypedContent tc, NodeEx mid, + boolean toReturn, ApOption returnAp, Configuration config + ) { + revFlow(mid, state, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, state, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, FlowState state, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, FlowState state, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, FlowState state, CcCall ccc | + revFlowOut(call, ret, state, toReturn, returnAp, ap, config) and + fwdFlow(ret, state, ccc, apSome(_), ap, config) and + matchesCall(ccc, call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, FlowState state, Configuration config) { + revFlow(node, state, _, _, _, config) + } + + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, ap, config) + } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + private predicate validAp(Ap ap, Configuration config) { + revFlow(_, _, _, _, ap, config) and ap instanceof ApNil + or + exists(TypedContent head, Ap tail | + consCand(head, tail, config) and + ap = apCons(head, tail) + ) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + revConsCand(tc, ap, config) and + validAp(ap, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, _, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, FlowState state, Ap ap0, ReturnKindExt kind, ParameterPosition pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), pragma[only_bind_into](state), true, apSome(_), + pragma[only_bind_into](ap0), pragma[only_bind_into](config)) and + fwdFlow(ret, state, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists( + Ap returnAp0, ArgNodeEx arg, FlowState state, boolean toReturn, ApOption returnAp, Ap ap + | + revFlow(arg, state, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + states = count(FlowState state | fwdFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(n, state, cc, argAp, ap, config) + ) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + states = count(FlowState state | revFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, boolean b, ApOption retAp, Ap ap | + revFlow(n, state, b, retAp, ap, config) + ) + } + /* End: Stage logic. */ + } +} + +private module BooleanCallContext { + class Cc extends boolean { + Cc() { this in [true, false] } + } + + class CcCall extends Cc { + CcCall() { this = true } + } + + /** Holds if the call context may be `call`. */ + predicate matchesCall(CcCall cc, DataFlowCall call) { any() } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + CcCall ccSomeCall() { result = true } + + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() } +} + +private module Level1CallContext { + class Cc = CallContext; + + class CcCall = CallContextCall; + + pragma[inline] + predicate matchesCall(CcCall cc, DataFlowCall call) { cc.matchesCall(call) } + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + + module NoLocalCallContext { + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSiteDispatch(call, c) + then result = TSpecificCall(call) + else result = TSomeCall() + } + } + + module LocalCallContext { + class LocalCc = LocalCallContext; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { + result = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + node.getEnclosingCallable()) + } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } +} + +private module Stage2Param implements MkStage::StageParam { + private module PrevStage = Stage1; + + class Ap extends boolean { + Ap() { this in [true, false] } + } + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + PrevStage::Ap getApprox(Ap ap) { any() } + + ApNil getApNil(NodeEx node) { Stage1::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + import Level1CallContext + import NoLocalCallContext + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalStateStep(node1, state1, node2, state2, config) + ) and + exists(ap) and + exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + predicate flowIntoCall = flowIntoCallNodeCand1/5; + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + expectsContentEx(node, c) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + PrevStage::revFlowState(state, pragma[only_bind_into](config)) and + exists(ap) and + not stateBarrier(node, state, config) and + ( + notExpectsContent(node) + or + ap = true and + expectsContentCand(node, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage2 implements StageSig { + import MkStage::Stage +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends NodeEx { + FlowCheckNode() { + castNode(this.asNode()) or + clearsContentCached(this.asNode(), _) or + expectsContentCached(this.asNode(), _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) { + Stage2::revFlow(node, state, config) and + ( + sourceNode(node, state, config) + or + jumpStep(_, node, config) + or + additionalJumpStep(_, node, config) + or + additionalJumpStateStep(_, _, node, state, config) + or + node instanceof ParamNodeEx + or + node.asNode() instanceof OutNodeExt + or + Stage2::storeStepCand(_, _, _, node, _, config) + or + Stage2::readStepCand(_, _, node, config) + or + node instanceof FlowCheckNode + or + exists(FlowState s | + additionalLocalStateStep(_, s, node, state, config) and + s != state + ) + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(NodeEx node, FlowState state, Configuration config) { + exists(NodeEx next | Stage2::revFlow(next, state, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + Stage2::storeStepCand(node, _, _, next, _, config) or + Stage2::readStepCand(node, _, next, config) + ) + or + exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) | + additionalJumpStateStep(node, state, next, s, config) + or + additionalLocalStateStep(node, state, next, s, config) and + s != state + ) + or + Stage2::revFlow(node, state, config) and + node instanceof FlowCheckNode + or + sinkNode(node, state, config) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, Configuration config + ) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 and + Stage2::revFlow(node1, pragma[only_bind_into](state1), false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, pragma[only_bind_into](state2), false, + pragma[only_bind_into](config)) + or + additionalLocalStateStep(node1, state1, node2, state2, config) and + Stage2::revFlow(node1, state1, false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, state2, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t, + Configuration config, LocalCallContext cc + ) { + not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = node1.getDataFlowType() and // irrelevant dummy value + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + or + additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and + preservesValue = false and + t = node2.getDataFlowType() + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) + or + exists(NodeEx mid | + localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t, + pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + ) + or + exists(NodeEx mid | + localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = node2.getDataFlowType() + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + AccessPathFrontNil apf, Configuration config, LocalCallContext callContext + ) { + localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, state1, config) and + state1 = state2 + or + additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and + state1 != state2 and + preservesValue = false and + apf = TFrontNil(node2.getDataFlowType()) and + callContext.relevantFor(node1.getEnclosingCallable()) and + not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() | + isUnreachableInCallCached(node1.asNode(), call) or + isUnreachableInCallCached(node2.asNode(), call) + ) + } +} + +private import LocalFlowBigStep + +private module Stage3Param implements MkStage::StageParam { + private module PrevStage = Stage2; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + import BooleanCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap, config, _) and exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + predicate flowIntoCall = flowIntoCallNodeCand2/5; + + pragma[nomagic] + private predicate clearSet(NodeEx node, ContentSet c, Configuration config) { + PrevStage::revFlow(node, config) and + clearsContentCached(node.asNode(), c) + } + + pragma[nomagic] + private predicate clearContent(NodeEx node, Content c, Configuration config) { + exists(ContentSet cs | + PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and + c = cs.getAReadContent() and + clearSet(node, cs, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate clear(NodeEx node, Ap ap, Configuration config) { + clearContent(node, ap.getHead().getContent(), config) + } + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and + expectsContentEx(node, c) and + c = ap.getHead().getContent() + ) + } + + pragma[nomagic] + private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + exists(state) and + exists(config) and + not clear(node, ap, config) and + (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and + ( + notExpectsContent(node) + or + expectsContentCand(node, ap, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } +} + +private module Stage3 implements StageSig { + import MkStage::Stage +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx( + NodeEx node, FlowState state, AccessPathFront argApf, Configuration config +) { + exists(AccessPathFront apf | + Stage3::revFlow(node, state, true, _, apf, config) and + Stage3::fwdFlow(node, state, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(NodeEx n, FlowState state | + Stage3::revFlow(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4Param implements MkStage::StageParam { + private module PrevStage = Stage3; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.getFront() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + import Level1CallContext + import LocalCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config + ) { + exists(FlowState state | + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config + ) { + exists(FlowState state | + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage4 = MkStage::Stage; + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +pragma[nomagic] +private predicate nodeMayUseSummary0( + NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, _, _) and + Stage4::revFlow(n, state, true, _, apa0, config) and + Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +pragma[nomagic] +private predicate nodeMayUseSummary( + NodeEx n, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(DataFlowCallable c | + Stage4::parameterMayFlowThrough(_, c, apa, config) and + nodeMayUseSummary0(n, c, state, apa, config) + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) { + exists(Configuration config | + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), config) and + Stage4::revFlow(p, state, _, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNodeEx p; + private FlowState s; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, s, ap) } + + ParameterPosition getParameterPos() { p.isParameterOf(_, result) } + + ParamNodeEx getParamNode() { result = p } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(NodeEx n, FlowState state | + Stage4::revFlow(n, state, apa, config) or nodeMayUseSummary(n, state, apa, config) + ) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid( + NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config + ) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, state, config) and + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, state, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, state, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(NodeEx node, FlowState state, Configuration config) { + exists(PathNodeMid sink | + sink.isAtSink() and + node = sink.getNodeEx() and + state = sink.getState() and + config = sink.getConfiguration() + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + this.length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result } + + /** Gets the `FlowState` of this node. */ + FlowState getState() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getANonHiddenSuccessor() and + reach(this) and + reach(result) + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNodeImpl getASuccessorImpl(); + + private PathNodeImpl getASuccessorIfHidden() { + this.isHidden() and + result = this.getASuccessorImpl() + } + + final PathNodeImpl getANonHiddenSuccessor() { + result = this.getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + abstract NodeEx getNodeEx(); + + predicate isHidden() { + not this.getConfiguration().includeHiddenNodes() and + ( + hiddenNode(this.getNodeEx().asNode()) and + not this.isSource() and + not this instanceof PathNodeSink + or + this.getNodeEx() instanceof TNodeImplicitRead + ) + } + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + override string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate directReach(PathNodeImpl n) { + n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor()) +} + +/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */ +private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) } + +/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNodeImpl n1, PathNode n2) { + n1.getANonHiddenSuccessor() = n2 and directReach(n2) +} + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) { + Subpaths::subpaths(arg, par, ret, out) and + reach(arg) and + reach(par) and + reach(ret) and + reach(out) + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + NodeEx node; + FlowState state; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, state, cc, sc, ap, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = this.getSuccMid() + or + // a final step to a sink + result = this.getSuccMid().projectToSink() + } + + override predicate isSource() { + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + } + + predicate isAtSink() { + sinkNode(node, state, config) and + ap instanceof AccessPathNil and + if hasSinkCallCtx(config) + then + // For `FeatureHasSinkCallContext` the condition `cc instanceof CallContextNoCall` + // is exactly what we need to check. This also implies + // `sc instanceof SummaryCtxNone`. + // For `FeatureEqualSourceSinkCallContext` the initial call context was + // set to `CallContextSomeCall` and jumps are disallowed, so + // `cc instanceof CallContextNoCall` never holds. On the other hand, + // in this case there's never any need to enter a call except to identify + // a summary, so the condition in `pathIntoCallable` enforces this, which + // means that `sc instanceof SummaryCtxNone` holds if and only if we are + // in the call context of the source. + sc instanceof SummaryCtxNone or + cc instanceof CallContextNoCall + else any() + } + + PathNodeSink projectToSink() { + this.isAtSink() and + result.getNodeEx() = node and + result.getState() = state and + result.getConfiguration() = unbindConf(config) + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + NodeEx node; + FlowState state; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, state, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + override Configuration getConfiguration() { result = config } + + override PathNodeImpl getASuccessorImpl() { none() } + + override predicate isSource() { sourceNode(node, state, config) } +} + +private predicate pathNode( + PathNodeMid mid, NodeEx midnode, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, + Configuration conf, LocalCallContext localCC +) { + midnode = mid.getNodeEx() and + state = mid.getState() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + midnode.getEnclosingCallable()) and + ap = mid.getAp() +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +pragma[nomagic] +private predicate pathStep( + PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap +) { + exists(NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC | + pathNode(mid, midnode, state0, cc, sc, ap, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, true, _, conf, localCC) + ) + or + exists( + AccessPath ap0, NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC + | + pathNode(mid, midnode, state0, cc, sc, ap0, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + exists(TypedContent tc | pathStoreStep(mid, node, state, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, state, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, state, _, cc, sc, _, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, state, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, state, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, FlowState state, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPathApprox apa, Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, state, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private NodeEx getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result.asNode() = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, FlowState state, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, state, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, ParameterPosition ppos, FlowState state, CallContext cc, DataFlowCall call, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(ArgNodeEx arg, ArgumentPosition apos | + pathNode(mid, arg, state, cc, _, ap, config, _) and + arg.asNode().(ArgNode).argumentOf(call, apos) and + apa = ap.getApprox() and + parameterMatch(ppos, apos) + ) +} + +pragma[nomagic] +private predicate parameterCand( + DataFlowCallable callable, ParameterPosition pos, AccessPathApprox apa, Configuration config +) { + exists(ParamNodeEx p | + Stage4::revFlow(p, _, apa, config) and + p.isParameterOf(callable, pos) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, AccessPath ap, Configuration config +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, pragma[only_bind_into](pos), state, outercc, call, ap, + pragma[only_bind_into](apa), pragma[only_bind_into](config)) and + callable = resolveCall(call, outercc) and + parameterCand(callable, pragma[only_bind_into](pos), pragma[only_bind_into](apa), + pragma[only_bind_into](config)) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +pragma[nomagic] +private predicate pathIntoCallable( + PathNodeMid mid, ParamNodeEx p, FlowState state, CallContext outercc, CallContextCall innercc, + SummaryCtx sc, DataFlowCall call, Configuration config +) { + exists(ParameterPosition pos, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + ( + sc = TSummaryCtxSome(p, state, ap) + or + not exists(TSummaryCtxSome(p, state, ap)) and + sc = TSummaryCtxNone() and + // When the call contexts of source and sink needs to match then there's + // never any reason to enter a callable except to find a summary. See also + // the comment in `PathNodeMid::isAtSink`. + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, FlowState state, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, + AccessPathApprox apa, Configuration config +) { + exists(PathNodeMid mid, RetNodeEx ret, ParameterPosition pos | + pathNode(mid, ret, state, cc, sc, ap, config, _) and + kind = ret.getKind() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + sc.getParamNode().allowParameterReturnInSelf() + ) + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, _, cc, innercc, sc, call, config) and + paramFlowsThrough(kind, state, innercc, sc, ap, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable( + PathNodeMid mid, NodeEx out, FlowState state, CallContext cc, AccessPath ap +) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa, Configuration config | + pathThroughCallable0(call, mid, kind, state, cc, ap, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +private module Subpaths { + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths01( + PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + exists(Configuration config | + pathThroughCallable(arg, out, pragma[only_bind_into](sout), _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, _, innercc, sc, _, config) and + paramFlowsThrough(kind, pragma[only_bind_into](sout), innercc, sc, + pragma[only_bind_into](apout), _, unbindConf(config)) and + not arg.isHidden() + ) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `sout`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths02( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and + out.asNode() = kind.getAnOutNode(_) + } + + pragma[nomagic] + private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple. + */ + pragma[nomagic] + private predicate subpaths03( + PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout + ) { + exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode | + subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and + pathNode(ret, retnode, sout, innercc, sc, apout, unbindConf(getPathNodeConf(arg)), _) and + kind = retnode.getKind() + ) + } + + private PathNodeImpl localStepToHidden(PathNodeImpl n) { + n.getASuccessorImpl() = result and + result.isHidden() and + exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() | + localFlowBigStep(n1, _, n2, _, _, _, _, _) or + store(n1, _, n2, _, _) or + readSet(n1, _, n2, _) + ) + } + + pragma[nomagic] + private predicate hasSuccessor(PathNodeImpl pred, PathNodeMid succ, NodeEx succNode) { + succ = pred.getANonHiddenSuccessor() and + succNode = succ.getNodeEx() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) { + exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 | + pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and + subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and + hasSuccessor(pragma[only_bind_into](arg), par, p) and + not ret.isHidden() and + pathNode(out0, o, sout, _, _, apout, _, _) + | + out = out0 or out = out0.projectToSink() + ) + } + + /** + * Holds if `n` can reach a return node in a summarized subpath that can reach a sink. + */ + predicate retReach(PathNodeImpl n) { + exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out)) + or + exists(PathNodeImpl mid | + retReach(mid) and + n.getANonHiddenSuccessor() = mid and + not subpaths(_, mid, _, _) + ) + } +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.(PathNodeImpl).getNodeEx().asNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNodeEx().asNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples +) { + fwd = true and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int states, int tuples, + Configuration config +) { + stage = "1 Fwd" and + n = 10 and + Stage1::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "1 Rev" and + n = 15 and + Stage1::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Fwd" and + n = 20 and + Stage2::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Rev" and + n = 25 and + Stage2::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Fwd" and + n = 30 and + Stage3::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Rev" and + n = 35 and + Stage3::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Fwd" and + n = 40 and + Stage4::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Rev" and + n = 45 and + Stage4::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, states, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, states, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + additionalJumpStateStep(node1, _, node2, _, config) + or + // flow into callable + viableParamArgEx(_, node2, node1) + or + // flow out of a callable + viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) or config.isSource(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) or config.isSink(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + (config.isSource(n) or config.isSource(n, _)) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + (config.isSink(n) or config.isSink(n, _)) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private predicate relevantState(FlowState state) { + sourceNode(_, state, _) or + sinkNode(_, state, _) or + additionalLocalStateStep(_, state, _, _, _) or + additionalLocalStateStep(_, _, _, state, _) or + additionalJumpStateStep(_, state, _, _, _) or + additionalJumpStateStep(_, _, _, state, _) + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNodeEx p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TSummaryCtx3 = + TSummaryCtx3None() or + TSummaryCtx3Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TRevSummaryCtx3 = + TRevSummaryCtx3None() or + TRevSummaryCtx3Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = TPartialNil(node.getDataFlowType()) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, state, cc, sc1, sc2, sc3, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, TRevSummaryCtx3 sc3, + RevPartialAccessPath ap, Configuration config + ) { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and + not clearsContentEx(node, ap.getHead()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead()) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + not clearsContentEx(node, ap.getHead().getContent()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead().getContent()) + ) and + if node.asNode() instanceof CastingNode + then compatibleTypes(node.getDataFlowType(), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.getNodeEx().projectToNode() = result } + + FlowState getState() { none() } + + private NodeEx getNodeEx() { + result = this.(PartialPathNodeFwd).getNodeEx() or + result = this.(PartialPathNodeRev).getNodeEx() + } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + NodeEx node; + FlowState state; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + TSummaryCtx3 sc3; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, state, cc, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TSummaryCtx3 getSummaryCtx3() { result = sc3 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx1(), result.getSummaryCtx2(), result.getSummaryCtx3(), result.getAp(), + result.getConfiguration()) + } + + predicate isSource() { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + NodeEx node; + FlowState state; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + TRevSummaryCtx3 sc3; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, state, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TRevSummaryCtx3 getSummaryCtx3() { result = sc3 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNodeEx(), this.getState(), this.getSummaryCtx1(), + this.getSummaryCtx2(), this.getSummaryCtx3(), this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, + TSummaryCtx2 sc2, TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalLocalStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsFwd(ap, tc, ap0, config) + ) + or + partialPathIntoCallable(mid, node, state, _, cc, sc1, sc2, sc3, _, ap, config) + or + partialPathOutOfCallable(mid, node, state, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() + or + partialPathThroughCallable(mid, node, state, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + bindingset[result, i] + private int unbindInt(int i) { pragma[only_bind_out](i) = pragma[only_bind_out](result) } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node, + PartialAccessPath ap2 + ) { + exists(NodeEx midNode, DataFlowType contentType | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType, mid.getConfiguration()) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc, + Configuration config + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and + ap.getHead() = tc and + pragma[only_bind_into](config) = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, FlowState state, CallContext innercc, + PartialAccessPath ap, Configuration config + ) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, state, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, state, cc, ap, config) + | + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, ParameterPosition ppos, FlowState state, CallContext cc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ArgNode arg, ArgumentPosition apos | + arg = mid.getNodeEx().asNode() and + state = mid.getState() and + cc = mid.getCallContext() and + arg.argumentOf(call, apos) and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, pos, state, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNodeEx p, FlowState state, CallContext outercc, + CallContextCall innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ParameterPosition pos, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(state) and + sc3 = TSummaryCtx3Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, FlowState state, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, RetNodeEx ret | + mid.getNodeEx() = ret and + kind = ret.getKind() and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3 | + partialPathIntoCallable(mid, _, _, cc, innercc, sc1, sc2, sc3, call, _, config) and + paramFlowsThroughInPartialPath(kind, state, innercc, sc1, sc2, sc3, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, state, cc, ap, config) and + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[nomagic] + private predicate revPartialPathStep( + PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalLocalStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalJumpStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNodeEx p | + mid.getNodeEx() = p and + viableParamArgEx(_, p, node) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, state, sc1, sc2, sc3, _, ap, config) and + pos = getReturnPosition(node.asNode()) + ) + or + revPartialPathThroughCallable(mid, node, state, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node, + RevPartialAccessPath ap2 + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + read(node, c, midNode, mid.getConfiguration()) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config + ) { + exists(NodeEx midNode, TypedContent tc | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + store(node, tc, midNode, _, config) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, FlowState state, TRevSummaryCtx1Some sc1, + TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3, DataFlowCall call, RevPartialAccessPath ap, + Configuration config + ) { + exists(NodeEx out | + mid.getNodeEx() = out and + mid.getState() = state and + viableReturnPosOutEx(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(state) and + sc3 = TRevSummaryCtx3Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + ArgumentPosition apos, FlowState state, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + TRevSummaryCtx3Some sc3, RevPartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNodeEx p, ParameterPosition ppos | + mid.getNodeEx() = p and + mid.getState() = state and + p.getPosition() = ppos and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, ArgumentPosition pos, FlowState state, + RevPartialAccessPath ap, Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3 | + revPartialPathIntoReturn(mid, _, _, sc1, sc2, sc3, call, _, config) and + revPartialPathFlowsThrough(pos, state, sc1, sc2, sc3, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNodeEx node, FlowState state, RevPartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ArgumentPosition pos | + revPartialPathThroughCallable0(call, mid, pos, state, ap, config) and + node.asNode().(ArgNode).argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll new file mode 100644 index 000000000000..468f8640a784 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImpl4.qll @@ -0,0 +1,4450 @@ +/** + * Provides an implementation of global (interprocedural) data flow. This file + * re-exports the local (intraprocedural) data flow analysis from + * `DataFlowImplSpecific::Public` and adds a global analysis, mainly exposed + * through the `Configuration` class. This file exists in several identical + * copies, allowing queries to use multiple `Configuration` classes that depend + * on each other without introducing mutual recursion among those configurations. + */ + +private import DataFlowImplCommon +private import DataFlowImplSpecific::Private +import DataFlowImplSpecific::Public +import DataFlowImplCommonPublic + +/** + * A configuration of interprocedural data flow analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the global data flow library must define its own unique extension + * of this abstract class. To create a configuration, extend this class with + * a subclass whose characteristic predicate is a unique singleton string. + * For example, write + * + * ```ql + * class MyAnalysisConfiguration extends DataFlow::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isBarrier`. + * // Optionally override `isAdditionalFlowStep`. + * } + * ``` + * Conceptually, this defines a graph where the nodes are `DataFlow::Node`s and + * the edges are those data-flow steps that preserve the value of the node + * along with any additional edges defined by `isAdditionalFlowStep`. + * Specifying nodes in `isBarrier` will remove those nodes from the graph, and + * specifying nodes in `isBarrierIn` and/or `isBarrierOut` will remove in-going + * and/or out-going edges from those nodes, respectively. + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but two classes extending + * `DataFlow::Configuration` should never depend on each other. One of them + * should instead depend on a `DataFlow2::Configuration`, a + * `DataFlow3::Configuration`, or a `DataFlow4::Configuration`. + */ +abstract class Configuration extends string { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant data flow source. + */ + predicate isSource(Node source) { none() } + + /** + * Holds if `source` is a relevant data flow source with the given initial + * `state`. + */ + predicate isSource(Node source, FlowState state) { none() } + + /** + * Holds if `sink` is a relevant data flow sink. + */ + predicate isSink(Node sink) { none() } + + /** + * Holds if `sink` is a relevant data flow sink accepting `state`. + */ + predicate isSink(Node source, FlowState state) { none() } + + /** + * Holds if data flow through `node` is prohibited. This completely removes + * `node` from the data flow graph. + */ + predicate isBarrier(Node node) { none() } + + /** + * Holds if data flow through `node` is prohibited when the flow state is + * `state`. + */ + predicate isBarrier(Node node, FlowState state) { none() } + + /** Holds if data flow into `node` is prohibited. */ + predicate isBarrierIn(Node node) { none() } + + /** Holds if data flow out of `node` is prohibited. */ + predicate isBarrierOut(Node node) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited. + */ + deprecated predicate isBarrierGuard(BarrierGuard guard) { none() } + + /** + * DEPRECATED: Use `isBarrier` and `BarrierGuard` module instead. + * + * Holds if data flow through nodes guarded by `guard` is prohibited when + * the flow state is `state` + */ + deprecated predicate isBarrierGuard(BarrierGuard guard, FlowState state) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + */ + predicate isAdditionalFlowStep(Node node1, Node node2) { none() } + + /** + * Holds if data may flow from `node1` to `node2` in addition to the normal data-flow steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalFlowStep(Node node1, FlowState state1, Node node2, FlowState state2) { + none() + } + + /** + * Holds if an arbitrary number of implicit read steps of content `c` may be + * taken at `node`. + */ + predicate allowImplicitRead(Node node, ContentSet c) { none() } + + /** + * Gets the virtual dispatch branching limit when calculating field flow. + * This can be overridden to a smaller value to improve performance (a + * value of 0 disables field flow), or a larger value to get more results. + */ + int fieldFlowBranchLimit() { result = 2 } + + /** + * Gets a data flow configuration feature to add restrictions to the set of + * valid flow paths. + * + * - `FeatureHasSourceCallContext`: + * Assume that sources have some existing call context to disallow + * conflicting return-flow directly following the source. + * - `FeatureHasSinkCallContext`: + * Assume that sinks have some existing call context to disallow + * conflicting argument-to-parameter flow directly preceding the sink. + * - `FeatureEqualSourceSinkCallContext`: + * Implies both of the above and additionally ensures that the entire flow + * path preserves the call context. + */ + FlowFeature getAFeature() { none() } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + */ + predicate hasFlow(Node source, Node sink) { flowsTo(source, sink, this) } + + /** + * Holds if data may flow from `source` to `sink` for this configuration. + * + * The corresponding paths are generated from the end-points and the graph + * included in the module `PathGraph`. + */ + predicate hasFlowPath(PathNode source, PathNode sink) { flowsTo(source, sink, _, _, this) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowTo(Node sink) { this.hasFlow(_, sink) } + + /** + * Holds if data may flow from some source to `sink` for this configuration. + */ + predicate hasFlowToExpr(DataFlowExpr sink) { this.hasFlowTo(exprNode(sink)) } + + /** + * Gets the exploration limit for `hasPartialFlow` and `hasPartialFlowRev` + * measured in approximate number of interprocedural steps. + */ + int explorationLimit() { none() } + + /** + * Holds if hidden nodes should be included in the data flow graph. + * + * This feature should only be used for debugging or when the data flow graph + * is not visualized (for example in a `path-problem` query). + */ + predicate includeHiddenNodes() { none() } + + /** + * Holds if there is a partial data flow path from `source` to `node`. The + * approximate distance between `node` and the closest source is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards sink definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sources is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + */ + final predicate hasPartialFlow(PartialPathNode source, PartialPathNode node, int dist) { + partialFlow(source, node, this) and + dist = node.getSourceDistance() + } + + /** + * Holds if there is a partial data flow path from `node` to `sink`. The + * approximate distance between `node` and the closest sink is `dist` and + * is restricted to be less than or equal to `explorationLimit()`. This + * predicate completely disregards source definitions. + * + * This predicate is intended for data-flow exploration and debugging and may + * perform poorly if the number of sinks is too big and/or the exploration + * limit is set too high without using barriers. + * + * This predicate is disabled (has no results) by default. Override + * `explorationLimit()` with a suitable number to enable this predicate. + * + * To use this in a `path-problem` query, import the module `PartialPathGraph`. + * + * Note that reverse flow has slightly lower precision than the corresponding + * forward flow, as reverse flow disregards type pruning among other features. + */ + final predicate hasPartialFlowRev(PartialPathNode node, PartialPathNode sink, int dist) { + revPartialFlow(node, sink, this) and + dist = node.getSinkDistance() + } +} + +/** + * This class exists to prevent mutual recursion between the user-overridden + * member predicates of `Configuration` and the rest of the data-flow library. + * Good performance cannot be guaranteed in the presence of such recursion, so + * it should be replaced by using more than one copy of the data flow library. + */ +abstract private class ConfigurationRecursionPrevention extends Configuration { + bindingset[this] + ConfigurationRecursionPrevention() { any() } + + override predicate hasFlow(Node source, Node sink) { + strictcount(Node n | this.isSource(n)) < 0 + or + strictcount(Node n | this.isSource(n, _)) < 0 + or + strictcount(Node n | this.isSink(n)) < 0 + or + strictcount(Node n | this.isSink(n, _)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, n2)) < 0 + or + strictcount(Node n1, Node n2 | this.isAdditionalFlowStep(n1, _, n2, _)) < 0 + or + super.hasFlow(source, sink) + } +} + +private newtype TNodeEx = + TNodeNormal(Node n) or + TNodeImplicitRead(Node n, boolean hasRead) { + any(Configuration c).allowImplicitRead(n, _) and hasRead = [false, true] + } + +private class NodeEx extends TNodeEx { + string toString() { + result = this.asNode().toString() + or + exists(Node n | this.isImplicitReadNode(n, _) | result = n.toString() + " [Ext]") + } + + Node asNode() { this = TNodeNormal(result) } + + predicate isImplicitReadNode(Node n, boolean hasRead) { this = TNodeImplicitRead(n, hasRead) } + + Node projectToNode() { this = TNodeNormal(result) or this = TNodeImplicitRead(result, _) } + + pragma[nomagic] + private DataFlowCallable getEnclosingCallable0() { + nodeEnclosingCallable(this.projectToNode(), result) + } + + pragma[inline] + DataFlowCallable getEnclosingCallable() { + pragma[only_bind_out](this).getEnclosingCallable0() = pragma[only_bind_into](result) + } + + pragma[nomagic] + private DataFlowType getDataFlowType0() { nodeDataFlowType(this.asNode(), result) } + + pragma[inline] + DataFlowType getDataFlowType() { + pragma[only_bind_out](this).getDataFlowType0() = pragma[only_bind_into](result) + } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.projectToNode().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +private class ArgNodeEx extends NodeEx { + ArgNodeEx() { this.asNode() instanceof ArgNode } +} + +private class ParamNodeEx extends NodeEx { + ParamNodeEx() { this.asNode() instanceof ParamNode } + + predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { + this.asNode().(ParamNode).isParameterOf(c, pos) + } + + ParameterPosition getPosition() { this.isParameterOf(_, result) } + + predicate allowParameterReturnInSelf() { allowParameterReturnInSelfCached(this.asNode()) } +} + +private class RetNodeEx extends NodeEx { + RetNodeEx() { this.asNode() instanceof ReturnNodeExt } + + ReturnPosition getReturnPosition() { result = getReturnPosition(this.asNode()) } + + ReturnKindExt getKind() { result = this.asNode().(ReturnNodeExt).getKind() } +} + +private predicate inBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierIn(n) + | + config.isSource(n) or config.isSource(n, _) + ) +} + +private predicate outBarrier(NodeEx node, Configuration config) { + exists(Node n | + node.asNode() = n and + config.isBarrierOut(n) + | + config.isSink(n) or config.isSink(n, _) + ) +} + +/** A bridge class to access the deprecated `isBarrierGuard`. */ +private class BarrierGuardGuardedNodeBridge extends Unit { + abstract predicate guardedNode(Node n, Configuration config); + + abstract predicate guardedNode(Node n, FlowState state, Configuration config); +} + +private class BarrierGuardGuardedNode extends BarrierGuardGuardedNodeBridge { + deprecated override predicate guardedNode(Node n, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g) and + n = g.getAGuardedNode() + ) + } + + deprecated override predicate guardedNode(Node n, FlowState state, Configuration config) { + exists(BarrierGuard g | + config.isBarrierGuard(g, state) and + n = g.getAGuardedNode() + ) + } +} + +pragma[nomagic] +private predicate fullBarrier(NodeEx node, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n) + or + config.isBarrierIn(n) and + not config.isSource(n) and + not config.isSource(n, _) + or + config.isBarrierOut(n) and + not config.isSink(n) and + not config.isSink(n, _) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, config) + ) +} + +pragma[nomagic] +private predicate stateBarrier(NodeEx node, FlowState state, Configuration config) { + exists(Node n | node.asNode() = n | + config.isBarrier(n, state) + or + any(BarrierGuardGuardedNodeBridge b).guardedNode(n, state, config) + ) +} + +pragma[nomagic] +private predicate sourceNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSource(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSource(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +pragma[nomagic] +private predicate sinkNode(NodeEx node, FlowState state, Configuration config) { + ( + config.isSink(node.asNode()) and state instanceof FlowStateEmpty + or + config.isSink(node.asNode(), state) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) +} + +/** Provides the relevant barriers for a step from `node1` to `node2`. */ +pragma[inline] +private predicate stepFilter(NodeEx node1, NodeEx node2, Configuration config) { + not outBarrier(node1, config) and + not inBarrier(node2, config) and + not fullBarrier(node1, config) and + not fullBarrier(node2, config) +} + +/** + * Holds if data can flow in one local step from `node1` to `node2`. + */ +private predicate localFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + simpleLocalFlowStepExt(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.asNode() = n and + node2.isImplicitReadNode(n, false) and + not fullBarrier(node1, config) + ) +} + +/** + * Holds if the additional step from `node1` to `node2` does not jump between callables. + */ +private predicate additionalLocalFlowStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) + ) + or + exists(Node n | + config.allowImplicitRead(n, _) and + node1.isImplicitReadNode(n, true) and + node2.asNode() = n and + not fullBarrier(node2, config) + ) +} + +private predicate additionalLocalStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) = getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) + ) +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that discards call contexts. + */ +private predicate jumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + jumpStepCached(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +/** + * Holds if the additional step from `node1` to `node2` jumps between callables. + */ +private predicate additionalJumpStep(NodeEx node1, NodeEx node2, Configuration config) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), pragma[only_bind_into](n2)) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate additionalJumpStateStep( + NodeEx node1, FlowState s1, NodeEx node2, FlowState s2, Configuration config +) { + exists(Node n1, Node n2 | + node1.asNode() = n1 and + node2.asNode() = n2 and + config.isAdditionalFlowStep(pragma[only_bind_into](n1), s1, pragma[only_bind_into](n2), s2) and + getNodeEnclosingCallable(n1) != getNodeEnclosingCallable(n2) and + stepFilter(node1, node2, config) and + not stateBarrier(node1, s1, config) and + not stateBarrier(node2, s2, config) and + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) +} + +pragma[nomagic] +private predicate readSet(NodeEx node1, ContentSet c, NodeEx node2, Configuration config) { + readSet(pragma[only_bind_into](node1.asNode()), c, pragma[only_bind_into](node2.asNode())) and + stepFilter(node1, node2, config) + or + exists(Node n | + node2.isImplicitReadNode(n, true) and + node1.isImplicitReadNode(n, _) and + config.allowImplicitRead(n, c) + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate read(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(ContentSet cs | + readSet(node1, cs, node2, config) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate clearsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + clearsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +// inline to reduce fan-out via `getAReadContent` +bindingset[c] +private predicate expectsContentEx(NodeEx n, Content c) { + exists(ContentSet cs | + expectsContentCached(n.asNode(), cs) and + pragma[only_bind_out](c) = pragma[only_bind_into](cs).getAReadContent() + ) +} + +pragma[nomagic] +private predicate notExpectsContent(NodeEx n) { not expectsContentCached(n.asNode(), _) } + +pragma[nomagic] +private predicate store( + NodeEx node1, TypedContent tc, NodeEx node2, DataFlowType contentType, Configuration config +) { + store(pragma[only_bind_into](node1.asNode()), tc, pragma[only_bind_into](node2.asNode()), + contentType) and + read(_, tc.getContent(), _, config) and + stepFilter(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutEx(DataFlowCall call, ReturnPosition pos, NodeEx out) { + viableReturnPosOut(call, pos, out.asNode()) +} + +pragma[nomagic] +private predicate viableParamArgEx(DataFlowCall call, ParamNodeEx p, ArgNodeEx arg) { + viableParamArg(call, p.asNode(), arg.asNode()) +} + +/** + * Holds if field flow should be used for the given configuration. + */ +private predicate useFieldFlow(Configuration config) { config.fieldFlowBranchLimit() >= 1 } + +private predicate hasSourceCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSourceCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private predicate hasSinkCallCtx(Configuration config) { + exists(FlowFeature feature | feature = config.getAFeature() | + feature instanceof FeatureHasSinkCallContext or + feature instanceof FeatureEqualSourceSinkCallContext + ) +} + +private module Stage1 implements StageSig { + class ApApprox = Unit; + + class Ap = Unit; + + class ApOption = Unit; + + class Cc = boolean; + + /* Begin: Stage 1 logic. */ + /** + * Holds if `node` is reachable from a source in the configuration `config`. + * + * The Boolean `cc` records whether the node is reached through an + * argument in a call. + */ + predicate fwdFlow(NodeEx node, Cc cc, Configuration config) { + sourceNode(node, _, config) and + if hasSourceCallCtx(config) then cc = true else cc = false + or + exists(NodeEx mid | fwdFlow(mid, cc, config) | + localFlowStep(mid, node, config) or + additionalLocalFlowStep(mid, node, config) or + additionalLocalStateStep(mid, _, node, _, config) + ) + or + exists(NodeEx mid | fwdFlow(mid, _, config) and cc = false | + jumpStep(mid, node, config) or + additionalJumpStep(mid, node, config) or + additionalJumpStateStep(mid, _, node, _, config) + ) + or + // store + exists(NodeEx mid | + useFieldFlow(config) and + fwdFlow(mid, cc, config) and + store(mid, _, node, _, config) + ) + or + // read + exists(ContentSet c | + fwdFlowReadSet(c, node, cc, config) and + fwdFlowConsCandSet(c, _, config) + ) + or + // flow into a callable + exists(NodeEx arg | + fwdFlow(arg, _, config) and + viableParamArgEx(_, node, arg) and + cc = true and + not fullBarrier(node, config) + ) + or + // flow out of a callable + exists(DataFlowCall call | + fwdFlowOut(call, node, false, config) and + cc = false + or + fwdFlowOutFromArg(call, node, config) and + fwdFlowIsEntered(call, cc, config) + ) + } + + private predicate fwdFlow(NodeEx node, Configuration config) { fwdFlow(node, _, config) } + + pragma[nomagic] + private predicate fwdFlowReadSet(ContentSet c, NodeEx node, Cc cc, Configuration config) { + exists(NodeEx mid | + fwdFlow(mid, cc, config) and + readSet(mid, c, node, config) + ) + } + + /** + * Holds if `c` is the target of a store in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, TypedContent tc | + not fullBarrier(node, config) and + useFieldFlow(config) and + fwdFlow(mid, _, config) and + store(mid, tc, node, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `cs` may be interpreted in a read as the target of some store + * into `c`, in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowConsCandSet(ContentSet cs, Content c, Configuration config) { + fwdFlowConsCand(c, config) and + c = cs.getAReadContent() + } + + pragma[nomagic] + private predicate fwdFlowReturnPosition(ReturnPosition pos, Cc cc, Configuration config) { + exists(RetNodeEx ret | + fwdFlow(ret, cc, config) and + ret.getReturnPosition() = pos + ) + } + + pragma[nomagic] + private predicate fwdFlowOut(DataFlowCall call, NodeEx out, Cc cc, Configuration config) { + exists(ReturnPosition pos | + fwdFlowReturnPosition(pos, cc, config) and + viableReturnPosOutEx(call, pos, out) and + not fullBarrier(out, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg(DataFlowCall call, NodeEx out, Configuration config) { + fwdFlowOut(call, out, true, config) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered(DataFlowCall call, Cc cc, Configuration config) { + exists(ArgNodeEx arg | + fwdFlow(arg, cc, config) and + viableParamArgEx(call, _, arg) + ) + } + + private predicate stateStepFwd(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1 | + additionalLocalStateStep(node1, state1, _, state2, config) or + additionalJumpStateStep(node1, state1, _, state2, config) + | + fwdFlow(node1, config) + ) + } + + private predicate fwdFlowState(FlowState state, Configuration config) { + sourceNode(_, state, config) + or + exists(FlowState state0 | + fwdFlowState(state0, config) and + stateStepFwd(state0, state, config) + ) + } + + /** + * Holds if `node` is part of a path from a source to a sink in the + * configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from + * the enclosing callable in order to reach a sink. + */ + pragma[nomagic] + predicate revFlow(NodeEx node, boolean toReturn, Configuration config) { + revFlow0(node, toReturn, config) and + fwdFlow(node, config) + } + + pragma[nomagic] + private predicate revFlow0(NodeEx node, boolean toReturn, Configuration config) { + exists(FlowState state | + fwdFlow(node, pragma[only_bind_into](config)) and + sinkNode(node, state, config) and + fwdFlowState(state, pragma[only_bind_into](config)) and + if hasSinkCallCtx(config) then toReturn = true else toReturn = false + ) + or + exists(NodeEx mid | revFlow(mid, toReturn, config) | + localFlowStep(node, mid, config) or + additionalLocalFlowStep(node, mid, config) or + additionalLocalStateStep(node, _, mid, _, config) + ) + or + exists(NodeEx mid | revFlow(mid, _, config) and toReturn = false | + jumpStep(node, mid, config) or + additionalJumpStep(node, mid, config) or + additionalJumpStateStep(node, _, mid, _, config) + ) + or + // store + exists(Content c | + revFlowStore(c, node, toReturn, config) and + revFlowConsCand(c, config) + ) + or + // read + exists(NodeEx mid, ContentSet c | + readSet(node, c, mid, config) and + fwdFlowConsCandSet(c, _, pragma[only_bind_into](config)) and + revFlow(mid, toReturn, pragma[only_bind_into](config)) + ) + or + // flow into a callable + exists(DataFlowCall call | + revFlowIn(call, node, false, config) and + toReturn = false + or + revFlowInToReturn(call, node, config) and + revFlowIsReturned(call, toReturn, config) + ) + or + // flow out of a callable + exists(ReturnPosition pos | + revFlowOut(pos, config) and + node.(RetNodeEx).getReturnPosition() = pos and + toReturn = true + ) + } + + /** + * Holds if `c` is the target of a read in the flow covered by `revFlow`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Content c, Configuration config) { + exists(NodeEx mid, NodeEx node, ContentSet cs | + fwdFlow(node, pragma[only_bind_into](config)) and + readSet(node, cs, mid, config) and + fwdFlowConsCandSet(cs, c, pragma[only_bind_into](config)) and + revFlow(pragma[only_bind_into](mid), _, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate revFlowStore(Content c, NodeEx node, boolean toReturn, Configuration config) { + exists(NodeEx mid, TypedContent tc | + revFlow(mid, toReturn, pragma[only_bind_into](config)) and + fwdFlowConsCand(c, pragma[only_bind_into](config)) and + store(node, tc, mid, _, config) and + c = tc.getContent() + ) + } + + /** + * Holds if `c` is the target of both a read and a store in the flow covered + * by `revFlow`. + */ + pragma[nomagic] + predicate revFlowIsReadAndStored(Content c, Configuration conf) { + revFlowConsCand(c, conf) and + revFlowStore(c, _, _, conf) + } + + pragma[nomagic] + predicate viableReturnPosOutNodeCandFwd1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config + ) { + fwdFlowReturnPosition(pos, _, config) and + viableReturnPosOutEx(call, pos, out) + } + + pragma[nomagic] + private predicate revFlowOut(ReturnPosition pos, Configuration config) { + exists(DataFlowCall call, NodeEx out | + revFlow(out, _, config) and + viableReturnPosOutNodeCandFwd1(call, pos, out, config) + ) + } + + pragma[nomagic] + predicate viableParamArgNodeCandFwd1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config + ) { + viableParamArgEx(call, p, arg) and + fwdFlow(arg, config) + } + + pragma[nomagic] + private predicate revFlowIn( + DataFlowCall call, ArgNodeEx arg, boolean toReturn, Configuration config + ) { + exists(ParamNodeEx p | + revFlow(p, toReturn, config) and + viableParamArgNodeCandFwd1(call, p, arg, config) + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn(DataFlowCall call, ArgNodeEx arg, Configuration config) { + revFlowIn(call, arg, true, config) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned(DataFlowCall call, boolean toReturn, Configuration config) { + exists(NodeEx out | + revFlow(out, toReturn, config) and + fwdFlowOutFromArg(call, out, config) + ) + } + + private predicate stateStepRev(FlowState state1, FlowState state2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + additionalLocalStateStep(node1, state1, node2, state2, config) or + additionalJumpStateStep(node1, state1, node2, state2, config) + | + revFlow(node1, _, pragma[only_bind_into](config)) and + revFlow(node2, _, pragma[only_bind_into](config)) and + fwdFlowState(state1, pragma[only_bind_into](config)) and + fwdFlowState(state2, pragma[only_bind_into](config)) + ) + } + + predicate revFlowState(FlowState state, Configuration config) { + exists(NodeEx node | + sinkNode(node, state, config) and + revFlow(node, _, pragma[only_bind_into](config)) and + fwdFlowState(state, pragma[only_bind_into](config)) + ) + or + exists(FlowState state0 | + revFlowState(state0, config) and + stateStepRev(state, state0, config) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Content c | + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + revFlow(node2, pragma[only_bind_into](config)) and + store(node1, tc, node2, contentType, config) and + c = tc.getContent() and + exists(ap1) + ) + } + + pragma[nomagic] + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config) { + revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + read(n1, c, n2, pragma[only_bind_into](config)) and + revFlow(n2, pragma[only_bind_into](config)) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, config) } + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, _, pragma[only_bind_into](config)) and + exists(state) and + exists(ap) + } + + private predicate throughFlowNodeCand(NodeEx node, Configuration config) { + revFlow(node, true, config) and + fwdFlow(node, true, config) and + not inBarrier(node, config) and + not outBarrier(node, config) + } + + /** Holds if flow may return from `callable`. */ + pragma[nomagic] + private predicate returnFlowCallableNodeCand( + DataFlowCallable callable, ReturnKindExt kind, Configuration config + ) { + exists(RetNodeEx ret | + throughFlowNodeCand(ret, config) and + callable = ret.getEnclosingCallable() and + kind = ret.getKind() + ) + } + + /** + * Holds if flow may enter through `p` and reach a return node making `p` a + * candidate for the origin of a summary. + */ + pragma[nomagic] + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(ReturnKindExt kind | + throughFlowNodeCand(p, config) and + returnFlowCallableNodeCand(c, kind, config) and + p.getEnclosingCallable() = c and + exists(ap) and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = p.getPosition() + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists(ArgNodeEx arg, boolean toReturn | + revFlow(arg, toReturn, config) and + revFlowInToReturn(call, arg, config) and + revFlowIsReturned(call, toReturn, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, config)) and + fields = count(Content f0 | fwdFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | fwdFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | fwdFlow(n, b, config)) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, config)) and + fields = count(Content f0 | revFlowConsCand(f0, config)) and + conscand = -1 and + states = count(FlowState state | revFlowState(state, config)) and + tuples = count(NodeEx n, boolean b | revFlow(n, b, config)) + } + /* End: Stage 1 logic. */ +} + +pragma[noinline] +private predicate localFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + localFlowStep(node1, node2, config) +} + +pragma[noinline] +private predicate additionalLocalFlowStepNodeCand1(NodeEx node1, NodeEx node2, Configuration config) { + Stage1::revFlow(node2, config) and + additionalLocalFlowStep(node1, node2, config) +} + +pragma[nomagic] +private predicate viableReturnPosOutNodeCand1( + DataFlowCall call, ReturnPosition pos, NodeEx out, Configuration config +) { + Stage1::revFlow(out, config) and + Stage1::viableReturnPosOutNodeCandFwd1(call, pos, out, config) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, Configuration config +) { + viableReturnPosOutNodeCand1(call, ret.getReturnPosition(), out, config) and + Stage1::revFlow(ret, config) and + not outBarrier(ret, config) and + not inBarrier(out, config) +} + +pragma[nomagic] +private predicate viableParamArgNodeCand1( + DataFlowCall call, ParamNodeEx p, ArgNodeEx arg, Configuration config +) { + Stage1::viableParamArgNodeCandFwd1(call, p, arg, config) and + Stage1::revFlow(arg, config) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, Configuration config +) { + viableParamArgNodeCand1(call, p, arg, config) and + Stage1::revFlow(p, config) and + not outBarrier(arg, config) and + not inBarrier(p, config) +} + +/** + * Gets the amount of forward branching on the origin of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int branch(NodeEx n1, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n1, n, conf) or flowIntoCallNodeCand1(_, n1, n, conf) + ) +} + +/** + * Gets the amount of backward branching on the target of a cross-call path + * edge in the graph of paths between sources and sinks that ignores call + * contexts. + */ +private int join(NodeEx n2, Configuration conf) { + result = + strictcount(NodeEx n | + flowOutOfCallNodeCand1(_, n, n2, conf) or flowIntoCallNodeCand1(_, n, n2, conf) + ) +} + +/** + * Holds if data can flow out of `call` from `ret` to `out`, either + * through a `ReturnNode` or through an argument that has been mutated, and + * that this step is part of a path from a source to a sink. The + * `allowsFieldFlow` flag indicates whether the branching is within the limit + * specified by the configuration. + */ +pragma[nomagic] +private predicate flowOutOfCallNodeCand1( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, ret, out, config) and + exists(int b, int j | + b = branch(ret, config) and + j = join(out, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +/** + * Holds if data can flow into `call` and that this step is part of a + * path from a source to a sink. The `allowsFieldFlow` flag indicates whether + * the branching is within the limit specified by the configuration. + */ +pragma[nomagic] +private predicate flowIntoCallNodeCand1( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config +) { + flowIntoCallNodeCand1(call, arg, p, config) and + exists(int b, int j | + b = branch(arg, config) and + j = join(p, config) and + if b.minimum(j) <= config.fieldFlowBranchLimit() + then allowsFieldFlow = true + else allowsFieldFlow = false + ) +} + +private signature module StageSig { + class Ap; + + predicate revFlow(NodeEx node, Configuration config); + + bindingset[node, state, config] + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config); + + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config); + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config); + + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ); + + predicate readStepCand(NodeEx n1, Content c, NodeEx n2, Configuration config); +} + +private module MkStage { + class ApApprox = PrevStage::Ap; + + signature module StageParam { + class Ap; + + class ApNil extends Ap; + + bindingset[result, ap] + ApApprox getApprox(Ap ap); + + ApNil getApNil(NodeEx node); + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail); + + Content getHeadContent(Ap ap); + + class ApOption; + + ApOption apNone(); + + ApOption apSome(Ap ap); + + class Cc; + + class CcCall extends Cc; + + // TODO: member predicate on CcCall + predicate matchesCall(CcCall cc, DataFlowCall call); + + class CcNoCall extends Cc; + + Cc ccNone(); + + CcCall ccSomeCall(); + + class LocalCc; + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc); + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc); + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc); + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ); + + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, Configuration config + ); + + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ); + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config); + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType); + } + + module Stage implements StageSig { + import Param + + /* Begin: Stage logic. */ + bindingset[result, apa] + private ApApprox unbindApa(ApApprox apa) { + pragma[only_bind_out](apa) = pragma[only_bind_out](result) + } + + pragma[nomagic] + private predicate flowThroughOutOfCall( + DataFlowCall call, CcCall ccc, RetNodeEx ret, NodeEx out, boolean allowsFieldFlow, + Configuration config + ) { + flowOutOfCall(call, ret, out, allowsFieldFlow, pragma[only_bind_into](config)) and + PrevStage::callMayFlowThroughRev(call, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(_, ret.getEnclosingCallable(), _, + pragma[only_bind_into](config)) and + matchesCall(ccc, call) + } + + /** + * Holds if `node` is reachable with access path `ap` from a source in the + * configuration `config`. + * + * The call context `cc` records whether the node is reached through an + * argument in a call, and if so, `argAp` records the access path of that + * argument. + */ + pragma[nomagic] + predicate fwdFlow( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + fwdFlow0(node, state, cc, argAp, ap, config) and + PrevStage::revFlow(node, state, unbindApa(getApprox(ap)), config) and + filter(node, state, ap, config) + } + + pragma[nomagic] + private predicate fwdFlow0( + NodeEx node, FlowState state, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + sourceNode(node, state, config) and + (if hasSourceCallCtx(config) then cc = ccSomeCall() else cc = ccNone()) and + argAp = apNone() and + ap = getApNil(node) + or + exists(NodeEx mid, FlowState state0, Ap ap0, LocalCc localCc | + fwdFlow(mid, state0, cc, argAp, ap0, config) and + localCc = getLocalCc(mid, cc) + | + localStep(mid, state0, node, state, true, _, config, localCc) and + ap = ap0 + or + localStep(mid, state0, node, state, false, ap, config, localCc) and + ap0 instanceof ApNil + ) + or + exists(NodeEx mid | + fwdFlow(mid, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + jumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(mid, state, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStep(mid, node, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(mid, state0, _, _, nil, pragma[only_bind_into](config)) and + additionalJumpStateStep(mid, state0, node, state, config) and + cc = ccNone() and + argAp = apNone() and + ap = getApNil(node) + ) + or + // store + exists(TypedContent tc, Ap ap0 | + fwdFlowStore(_, ap0, tc, node, state, cc, argAp, config) and + ap = apCons(tc, ap0) + ) + or + // read + exists(Ap ap0, Content c | + fwdFlowRead(ap0, c, _, node, state, cc, argAp, config) and + fwdFlowConsCand(ap0, c, ap, config) + ) + or + // flow into a callable + exists(ApApprox apa | + fwdFlowIn(_, node, state, _, cc, _, ap, config) and + apa = getApprox(ap) and + if PrevStage::parameterMayFlowThrough(node, _, apa, config) + then argAp = apSome(ap) + else argAp = apNone() + ) + or + // flow out of a callable + fwdFlowOutNotFromArg(node, state, cc, argAp, ap, config) + or + exists(DataFlowCall call, Ap argAp0 | + fwdFlowOutFromArg(call, node, state, argAp0, ap, config) and + fwdFlowIsEntered(call, cc, argAp, argAp0, config) + ) + } + + pragma[nomagic] + private predicate fwdFlowStore( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + exists(DataFlowType contentType | + fwdFlow(node1, state, cc, argAp, ap1, config) and + PrevStage::storeStepCand(node1, unbindApa(getApprox(ap1)), tc, node2, contentType, config) and + typecheckStore(ap1, contentType) + ) + } + + /** + * Holds if forward flow with access path `tail` reaches a store of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate fwdFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(TypedContent tc | + fwdFlowStore(_, tail, tc, _, _, _, _, config) and + tc.getContent() = c and + cons = apCons(tc, tail) + ) + } + + pragma[nomagic] + private predicate fwdFlowRead( + Ap ap, Content c, NodeEx node1, NodeEx node2, FlowState state, Cc cc, ApOption argAp, + Configuration config + ) { + fwdFlow(node1, state, cc, argAp, ap, config) and + PrevStage::readStepCand(node1, c, node2, config) and + getHeadContent(ap) = c + } + + pragma[nomagic] + private predicate fwdFlowIn( + DataFlowCall call, ParamNodeEx p, FlowState state, Cc outercc, Cc innercc, ApOption argAp, + Ap ap, Configuration config + ) { + exists(ArgNodeEx arg, boolean allowsFieldFlow | + fwdFlow(arg, state, outercc, argAp, ap, config) and + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + innercc = getCallContextCall(call, p.getEnclosingCallable(), outercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutNotFromArg( + NodeEx out, FlowState state, Cc ccOut, ApOption argAp, Ap ap, Configuration config + ) { + exists( + DataFlowCall call, RetNodeEx ret, boolean allowsFieldFlow, CcNoCall innercc, + DataFlowCallable inner + | + fwdFlow(ret, state, innercc, argAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + inner = ret.getEnclosingCallable() and + ccOut = getCallContextReturn(inner, call, innercc) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate fwdFlowOutFromArg( + DataFlowCall call, NodeEx out, FlowState state, Ap argAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, boolean allowsFieldFlow, CcCall ccc | + fwdFlow(ret, state, ccc, apSome(argAp), ap, config) and + flowThroughOutOfCall(call, ccc, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an argument to `call` is reached in the flow covered by `fwdFlow` + * and data might flow through the target callable and back out at `call`. + */ + pragma[nomagic] + private predicate fwdFlowIsEntered( + DataFlowCall call, Cc cc, ApOption argAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p | + fwdFlowIn(call, p, _, cc, _, argAp, ap, config) and + PrevStage::parameterMayFlowThrough(p, _, unbindApa(getApprox(ap)), config) + ) + } + + pragma[nomagic] + private predicate storeStepFwd( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, Ap ap2, Configuration config + ) { + fwdFlowStore(node1, ap1, tc, node2, _, _, _, config) and + ap2 = apCons(tc, ap1) and + fwdFlowRead(ap2, tc.getContent(), _, _, _, _, _, config) + } + + private predicate readStepFwd( + NodeEx n1, Ap ap1, Content c, NodeEx n2, Ap ap2, Configuration config + ) { + fwdFlowRead(ap1, c, n1, n2, _, _, _, config) and + fwdFlowConsCand(ap1, c, ap2, config) + } + + pragma[nomagic] + private predicate callMayFlowThroughFwd(DataFlowCall call, Configuration config) { + exists(Ap argAp0, NodeEx out, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(out, state, pragma[only_bind_into](cc), pragma[only_bind_into](argAp), ap, + pragma[only_bind_into](config)) and + fwdFlowOutFromArg(call, out, state, argAp0, ap, config) and + fwdFlowIsEntered(pragma[only_bind_into](call), pragma[only_bind_into](cc), + pragma[only_bind_into](argAp), pragma[only_bind_into](argAp0), + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate flowThroughIntoCall( + DataFlowCall call, ArgNodeEx arg, ParamNodeEx p, boolean allowsFieldFlow, Configuration config + ) { + flowIntoCall(call, arg, p, allowsFieldFlow, config) and + fwdFlow(arg, _, _, _, _, pragma[only_bind_into](config)) and + PrevStage::parameterMayFlowThrough(p, _, _, pragma[only_bind_into](config)) and + callMayFlowThroughFwd(call, pragma[only_bind_into](config)) + } + + pragma[nomagic] + private predicate returnNodeMayFlowThrough( + RetNodeEx ret, FlowState state, Ap ap, Configuration config + ) { + fwdFlow(ret, state, any(CcCall ccc), apSome(_), ap, config) + } + + /** + * Holds if `node` with access path `ap` is part of a path from a source to a + * sink in the configuration `config`. + * + * The Boolean `toReturn` records whether the node must be returned from the + * enclosing callable in order to reach a sink, and if so, `returnAp` records + * the access path of the returned value. + */ + pragma[nomagic] + predicate revFlow( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + revFlow0(node, state, toReturn, returnAp, ap, config) and + fwdFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + private predicate revFlow0( + NodeEx node, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + fwdFlow(node, state, _, _, ap, config) and + sinkNode(node, state, config) and + (if hasSinkCallCtx(config) then toReturn = true else toReturn = false) and + returnAp = apNone() and + ap instanceof ApNil + or + exists(NodeEx mid, FlowState state0 | + localStep(node, state, mid, state0, true, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, ap, config) + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, pragma[only_bind_into](state), _, _, ap, pragma[only_bind_into](config)) and + localStep(node, pragma[only_bind_into](state), mid, state0, false, _, config, _) and + revFlow(mid, state0, toReturn, returnAp, nil, pragma[only_bind_into](config)) and + ap instanceof ApNil + ) + or + exists(NodeEx mid | + jumpStep(node, mid, config) and + revFlow(mid, state, _, _, ap, config) and + toReturn = false and + returnAp = apNone() + ) + or + exists(NodeEx mid, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStep(node, mid, config) and + revFlow(pragma[only_bind_into](mid), state, _, _, nil, pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + exists(NodeEx mid, FlowState state0, ApNil nil | + fwdFlow(node, _, _, _, ap, pragma[only_bind_into](config)) and + additionalJumpStateStep(node, state, mid, state0, config) and + revFlow(pragma[only_bind_into](mid), pragma[only_bind_into](state0), _, _, nil, + pragma[only_bind_into](config)) and + toReturn = false and + returnAp = apNone() and + ap instanceof ApNil + ) + or + // store + exists(Ap ap0, Content c | + revFlowStore(ap0, c, ap, node, state, _, _, toReturn, returnAp, config) and + revFlowConsCand(ap0, c, ap, config) + ) + or + // read + exists(NodeEx mid, Ap ap0 | + revFlow(mid, state, toReturn, returnAp, ap0, config) and + readStepFwd(node, ap, _, mid, ap0, config) + ) + or + // flow into a callable + revFlowInNotToReturn(node, state, returnAp, ap, config) and + toReturn = false + or + exists(DataFlowCall call, Ap returnAp0 | + revFlowInToReturn(call, node, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + or + // flow out of a callable + revFlowOut(_, node, state, _, _, ap, config) and + toReturn = true and + if returnNodeMayFlowThrough(node, state, ap, config) + then returnAp = apSome(ap) + else returnAp = apNone() + } + + pragma[nomagic] + private predicate revFlowStore( + Ap ap0, Content c, Ap ap, NodeEx node, FlowState state, TypedContent tc, NodeEx mid, + boolean toReturn, ApOption returnAp, Configuration config + ) { + revFlow(mid, state, toReturn, returnAp, ap0, config) and + storeStepFwd(node, ap, tc, mid, ap0, config) and + tc.getContent() = c + } + + /** + * Holds if reverse flow with access path `tail` reaches a read of `c` + * resulting in access path `cons`. + */ + pragma[nomagic] + private predicate revFlowConsCand(Ap cons, Content c, Ap tail, Configuration config) { + exists(NodeEx mid, Ap tail0 | + revFlow(mid, _, _, _, tail, config) and + tail = pragma[only_bind_into](tail0) and + readStepFwd(_, cons, c, mid, tail0, config) + ) + } + + pragma[nomagic] + private predicate revFlowOut( + DataFlowCall call, RetNodeEx ret, FlowState state, boolean toReturn, ApOption returnAp, Ap ap, + Configuration config + ) { + exists(NodeEx out, boolean allowsFieldFlow | + revFlow(out, state, toReturn, returnAp, ap, config) and + flowOutOfCall(call, ret, out, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInNotToReturn( + ArgNodeEx arg, FlowState state, ApOption returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, false, returnAp, ap, config) and + flowIntoCall(_, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + pragma[nomagic] + private predicate revFlowInToReturn( + DataFlowCall call, ArgNodeEx arg, FlowState state, Ap returnAp, Ap ap, Configuration config + ) { + exists(ParamNodeEx p, boolean allowsFieldFlow | + revFlow(p, state, true, apSome(returnAp), ap, config) and + flowThroughIntoCall(call, arg, p, allowsFieldFlow, config) and + if allowsFieldFlow = false then ap instanceof ApNil else any() + ) + } + + /** + * Holds if an output from `call` is reached in the flow covered by `revFlow` + * and data might flow through the target callable resulting in reverse flow + * reaching an argument of `call`. + */ + pragma[nomagic] + private predicate revFlowIsReturned( + DataFlowCall call, boolean toReturn, ApOption returnAp, Ap ap, Configuration config + ) { + exists(RetNodeEx ret, FlowState state, CcCall ccc | + revFlowOut(call, ret, state, toReturn, returnAp, ap, config) and + fwdFlow(ret, state, ccc, apSome(_), ap, config) and + matchesCall(ccc, call) + ) + } + + pragma[nomagic] + predicate storeStepCand( + NodeEx node1, Ap ap1, TypedContent tc, NodeEx node2, DataFlowType contentType, + Configuration config + ) { + exists(Ap ap2, Content c | + PrevStage::storeStepCand(node1, _, tc, node2, contentType, config) and + revFlowStore(ap2, c, ap1, node1, _, tc, node2, _, _, config) and + revFlowConsCand(ap2, c, ap1, config) + ) + } + + predicate readStepCand(NodeEx node1, Content c, NodeEx node2, Configuration config) { + exists(Ap ap1, Ap ap2 | + revFlow(node2, _, _, _, pragma[only_bind_into](ap2), pragma[only_bind_into](config)) and + readStepFwd(node1, ap1, c, node2, ap2, config) and + revFlowStore(ap1, c, pragma[only_bind_into](ap2), _, _, _, _, _, _, + pragma[only_bind_into](config)) + ) + } + + predicate revFlow(NodeEx node, FlowState state, Configuration config) { + revFlow(node, state, _, _, _, config) + } + + predicate revFlow(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, _, _, ap, config) + } + + pragma[nomagic] + predicate revFlow(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, Configuration config) { revFlow(node, _, _, _, _, config) } + + // use an alias as a workaround for bad functionality-induced joins + pragma[nomagic] + predicate revFlowAlias(NodeEx node, FlowState state, Ap ap, Configuration config) { + revFlow(node, state, ap, config) + } + + private predicate fwdConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepFwd(_, ap, tc, _, _, config) + } + + private predicate revConsCand(TypedContent tc, Ap ap, Configuration config) { + storeStepCand(_, ap, tc, _, _, config) + } + + private predicate validAp(Ap ap, Configuration config) { + revFlow(_, _, _, _, ap, config) and ap instanceof ApNil + or + exists(TypedContent head, Ap tail | + consCand(head, tail, config) and + ap = apCons(head, tail) + ) + } + + predicate consCand(TypedContent tc, Ap ap, Configuration config) { + revConsCand(tc, ap, config) and + validAp(ap, config) + } + + pragma[noinline] + private predicate parameterFlow( + ParamNodeEx p, Ap ap, Ap ap0, DataFlowCallable c, Configuration config + ) { + revFlow(p, _, true, apSome(ap0), ap, config) and + c = p.getEnclosingCallable() + } + + predicate parameterMayFlowThrough(ParamNodeEx p, DataFlowCallable c, Ap ap, Configuration config) { + exists(RetNodeEx ret, FlowState state, Ap ap0, ReturnKindExt kind, ParameterPosition pos | + parameterFlow(p, ap, ap0, c, config) and + c = ret.getEnclosingCallable() and + revFlow(pragma[only_bind_into](ret), pragma[only_bind_into](state), true, apSome(_), + pragma[only_bind_into](ap0), pragma[only_bind_into](config)) and + fwdFlow(ret, state, any(CcCall ccc), apSome(ap), ap0, config) and + kind = ret.getKind() and + p.getPosition() = pos and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + p.allowParameterReturnInSelf() + ) + ) + } + + pragma[nomagic] + predicate callMayFlowThroughRev(DataFlowCall call, Configuration config) { + exists( + Ap returnAp0, ArgNodeEx arg, FlowState state, boolean toReturn, ApOption returnAp, Ap ap + | + revFlow(arg, state, toReturn, returnAp, ap, config) and + revFlowInToReturn(call, arg, state, returnAp0, ap, config) and + revFlowIsReturned(call, toReturn, returnAp, returnAp0, config) + ) + } + + predicate stats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples, Configuration config + ) { + fwd = true and + nodes = count(NodeEx node | fwdFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | fwdConsCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | fwdConsCand(f0, ap, config)) and + states = count(FlowState state | fwdFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, Cc cc, ApOption argAp, Ap ap | + fwdFlow(n, state, cc, argAp, ap, config) + ) + or + fwd = false and + nodes = count(NodeEx node | revFlow(node, _, _, _, _, config)) and + fields = count(TypedContent f0 | consCand(f0, _, config)) and + conscand = count(TypedContent f0, Ap ap | consCand(f0, ap, config)) and + states = count(FlowState state | revFlow(_, state, _, _, _, config)) and + tuples = + count(NodeEx n, FlowState state, boolean b, ApOption retAp, Ap ap | + revFlow(n, state, b, retAp, ap, config) + ) + } + /* End: Stage logic. */ + } +} + +private module BooleanCallContext { + class Cc extends boolean { + Cc() { this in [true, false] } + } + + class CcCall extends Cc { + CcCall() { this = true } + } + + /** Holds if the call context may be `call`. */ + predicate matchesCall(CcCall cc, DataFlowCall call) { any() } + + class CcNoCall extends Cc { + CcNoCall() { this = false } + } + + Cc ccNone() { result = false } + + CcCall ccSomeCall() { result = true } + + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { any() } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { any() } +} + +private module Level1CallContext { + class Cc = CallContext; + + class CcCall = CallContextCall; + + pragma[inline] + predicate matchesCall(CcCall cc, DataFlowCall call) { cc.matchesCall(call) } + + class CcNoCall = CallContextNoCall; + + Cc ccNone() { result instanceof CallContextAny } + + CcCall ccSomeCall() { result instanceof CallContextSomeCall } + + module NoLocalCallContext { + class LocalCc = Unit; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { any() } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSiteDispatch(call, c) + then result = TSpecificCall(call) + else result = TSomeCall() + } + } + + module LocalCallContext { + class LocalCc = LocalCallContext; + + bindingset[node, cc] + LocalCc getLocalCc(NodeEx node, Cc cc) { + result = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + node.getEnclosingCallable()) + } + + bindingset[call, c, outercc] + CcCall getCallContextCall(DataFlowCall call, DataFlowCallable c, Cc outercc) { + checkCallContextCall(outercc, call, c) and + if recordDataFlowCallSite(call, c) then result = TSpecificCall(call) else result = TSomeCall() + } + } + + bindingset[call, c, innercc] + CcNoCall getCallContextReturn(DataFlowCallable c, DataFlowCall call, Cc innercc) { + checkCallContextReturn(innercc, c, call) and + if reducedViableImplInReturn(c, call) then result = TReturn(c, call) else result = ccNone() + } +} + +private module Stage2Param implements MkStage::StageParam { + private module PrevStage = Stage1; + + class Ap extends boolean { + Ap() { this in [true, false] } + } + + class ApNil extends Ap { + ApNil() { this = false } + } + + bindingset[result, ap] + PrevStage::Ap getApprox(Ap ap) { any() } + + ApNil getApNil(NodeEx node) { Stage1::revFlow(node, _) and exists(result) } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = true and exists(tc) and exists(tail) } + + pragma[inline] + Content getHeadContent(Ap ap) { exists(result) and ap = true } + + class ApOption = BooleanOption; + + ApOption apNone() { result = TBooleanNone() } + + ApOption apSome(Ap ap) { result = TBooleanSome(ap) } + + import Level1CallContext + import NoLocalCallContext + + bindingset[node1, state1, config] + bindingset[node2, state2, config] + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + ( + preservesValue = true and + localFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 + or + preservesValue = false and + additionalLocalStateStep(node1, state1, node2, state2, config) + ) and + exists(ap) and + exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand1/5; + + predicate flowIntoCall = flowIntoCallNodeCand1/5; + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::revFlowIsReadAndStored(c, pragma[only_bind_into](config)) and + expectsContentEx(node, c) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + PrevStage::revFlowState(state, pragma[only_bind_into](config)) and + exists(ap) and + not stateBarrier(node, state, config) and + ( + notExpectsContent(node) + or + ap = true and + expectsContentCand(node, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage2 implements StageSig { + import MkStage::Stage +} + +pragma[nomagic] +private predicate flowOutOfCallNodeCand2( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config +) { + flowOutOfCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +pragma[nomagic] +private predicate flowIntoCallNodeCand2( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config +) { + flowIntoCallNodeCand1(call, node1, node2, allowsFieldFlow, config) and + Stage2::revFlow(node2, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node1, pragma[only_bind_into](config)) +} + +private module LocalFlowBigStep { + /** + * A node where some checking is required, and hence the big-step relation + * is not allowed to step over. + */ + private class FlowCheckNode extends NodeEx { + FlowCheckNode() { + castNode(this.asNode()) or + clearsContentCached(this.asNode(), _) or + expectsContentCached(this.asNode(), _) + } + } + + /** + * Holds if `node` can be the first node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowEntry(NodeEx node, FlowState state, Configuration config) { + Stage2::revFlow(node, state, config) and + ( + sourceNode(node, state, config) + or + jumpStep(_, node, config) + or + additionalJumpStep(_, node, config) + or + additionalJumpStateStep(_, _, node, state, config) + or + node instanceof ParamNodeEx + or + node.asNode() instanceof OutNodeExt + or + Stage2::storeStepCand(_, _, _, node, _, config) + or + Stage2::readStepCand(_, _, node, config) + or + node instanceof FlowCheckNode + or + exists(FlowState s | + additionalLocalStateStep(_, s, node, state, config) and + s != state + ) + ) + } + + /** + * Holds if `node` can be the last node in a maximal subsequence of local + * flow steps in a dataflow path. + */ + private predicate localFlowExit(NodeEx node, FlowState state, Configuration config) { + exists(NodeEx next | Stage2::revFlow(next, state, config) | + jumpStep(node, next, config) or + additionalJumpStep(node, next, config) or + flowIntoCallNodeCand1(_, node, next, config) or + flowOutOfCallNodeCand1(_, node, next, config) or + Stage2::storeStepCand(node, _, _, next, _, config) or + Stage2::readStepCand(node, _, next, config) + ) + or + exists(NodeEx next, FlowState s | Stage2::revFlow(next, s, config) | + additionalJumpStateStep(node, state, next, s, config) + or + additionalLocalStateStep(node, state, next, s, config) and + s != state + ) + or + Stage2::revFlow(node, state, config) and + node instanceof FlowCheckNode + or + sinkNode(node, state, config) + } + + pragma[noinline] + private predicate additionalLocalFlowStepNodeCand2( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, Configuration config + ) { + additionalLocalFlowStepNodeCand1(node1, node2, config) and + state1 = state2 and + Stage2::revFlow(node1, pragma[only_bind_into](state1), false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, pragma[only_bind_into](state2), false, + pragma[only_bind_into](config)) + or + additionalLocalStateStep(node1, state1, node2, state2, config) and + Stage2::revFlow(node1, state1, false, pragma[only_bind_into](config)) and + Stage2::revFlowAlias(node2, state2, false, pragma[only_bind_into](config)) + } + + /** + * Holds if the local path from `node1` to `node2` is a prefix of a maximal + * subsequence of local flow steps in a dataflow path. + * + * This is the transitive closure of `[additional]localFlowStep` beginning + * at `localFlowEntry`. + */ + pragma[nomagic] + private predicate localFlowStepPlus( + NodeEx node1, FlowState state, NodeEx node2, boolean preservesValue, DataFlowType t, + Configuration config, LocalCallContext cc + ) { + not isUnreachableInCallCached(node2.asNode(), cc.(LocalCallContextSpecificCall).getCall()) and + ( + localFlowEntry(node1, pragma[only_bind_into](state), pragma[only_bind_into](config)) and + ( + localFlowStepNodeCand1(node1, node2, config) and + preservesValue = true and + t = node1.getDataFlowType() and // irrelevant dummy value + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + or + additionalLocalFlowStepNodeCand2(node1, state, node2, state, config) and + preservesValue = false and + t = node2.getDataFlowType() + ) and + node1 != node2 and + cc.relevantFor(node1.getEnclosingCallable()) and + not isUnreachableInCallCached(node1.asNode(), cc.(LocalCallContextSpecificCall).getCall()) + or + exists(NodeEx mid | + localFlowStepPlus(node1, pragma[only_bind_into](state), mid, preservesValue, t, + pragma[only_bind_into](config), cc) and + localFlowStepNodeCand1(mid, node2, config) and + not mid instanceof FlowCheckNode and + Stage2::revFlow(node2, pragma[only_bind_into](state), pragma[only_bind_into](config)) + ) + or + exists(NodeEx mid | + localFlowStepPlus(node1, state, mid, _, _, pragma[only_bind_into](config), cc) and + additionalLocalFlowStepNodeCand2(mid, state, node2, state, config) and + not mid instanceof FlowCheckNode and + preservesValue = false and + t = node2.getDataFlowType() + ) + ) + } + + /** + * Holds if `node1` can step to `node2` in one or more local steps and this + * path can occur as a maximal subsequence of local steps in a dataflow path. + */ + pragma[nomagic] + predicate localFlowBigStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + AccessPathFrontNil apf, Configuration config, LocalCallContext callContext + ) { + localFlowStepPlus(node1, state1, node2, preservesValue, apf.getType(), config, callContext) and + localFlowExit(node2, state1, config) and + state1 = state2 + or + additionalLocalFlowStepNodeCand2(node1, state1, node2, state2, config) and + state1 != state2 and + preservesValue = false and + apf = TFrontNil(node2.getDataFlowType()) and + callContext.relevantFor(node1.getEnclosingCallable()) and + not exists(DataFlowCall call | call = callContext.(LocalCallContextSpecificCall).getCall() | + isUnreachableInCallCached(node1.asNode(), call) or + isUnreachableInCallCached(node2.asNode(), call) + ) + } +} + +private import LocalFlowBigStep + +private module Stage3Param implements MkStage::StageParam { + private module PrevStage = Stage2; + + class Ap = AccessPathFront; + + class ApNil = AccessPathFrontNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.toBoolNonEmpty() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TFrontNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result.getHead() = tc and exists(tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathFrontOption; + + ApOption apNone() { result = TAccessPathFrontNone() } + + ApOption apSome(Ap ap) { result = TAccessPathFrontSome(ap) } + + import BooleanCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap, config, _) and exists(lcc) + } + + predicate flowOutOfCall = flowOutOfCallNodeCand2/5; + + predicate flowIntoCall = flowIntoCallNodeCand2/5; + + pragma[nomagic] + private predicate clearSet(NodeEx node, ContentSet c, Configuration config) { + PrevStage::revFlow(node, config) and + clearsContentCached(node.asNode(), c) + } + + pragma[nomagic] + private predicate clearContent(NodeEx node, Content c, Configuration config) { + exists(ContentSet cs | + PrevStage::readStepCand(_, pragma[only_bind_into](c), _, pragma[only_bind_into](config)) and + c = cs.getAReadContent() and + clearSet(node, cs, pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + private predicate clear(NodeEx node, Ap ap, Configuration config) { + clearContent(node, ap.getHead().getContent(), config) + } + + pragma[nomagic] + private predicate expectsContentCand(NodeEx node, Ap ap, Configuration config) { + exists(Content c | + PrevStage::revFlow(node, pragma[only_bind_into](config)) and + PrevStage::readStepCand(_, c, _, pragma[only_bind_into](config)) and + expectsContentEx(node, c) and + c = ap.getHead().getContent() + ) + } + + pragma[nomagic] + private predicate castingNodeEx(NodeEx node) { node.asNode() instanceof CastingNode } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { + exists(state) and + exists(config) and + not clear(node, ap, config) and + (if castingNodeEx(node) then compatibleTypes(node.getDataFlowType(), ap.getType()) else any()) and + ( + notExpectsContent(node) + or + expectsContentCand(node, ap, config) + ) + } + + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { + // We need to typecheck stores here, since reverse flow through a getter + // might have a different type here compared to inside the getter. + compatibleTypes(ap.getType(), contentType) + } +} + +private module Stage3 implements StageSig { + import MkStage::Stage +} + +/** + * Holds if `argApf` is recorded as the summary context for flow reaching `node` + * and remains relevant for the following pruning stage. + */ +private predicate flowCandSummaryCtx( + NodeEx node, FlowState state, AccessPathFront argApf, Configuration config +) { + exists(AccessPathFront apf | + Stage3::revFlow(node, state, true, _, apf, config) and + Stage3::fwdFlow(node, state, any(Stage3::CcCall ccc), TAccessPathFrontSome(argApf), apf, config) + ) +} + +/** + * Holds if a length 2 access path approximation with the head `tc` is expected + * to be expensive. + */ +private predicate expensiveLen2unfolding(TypedContent tc, Configuration config) { + exists(int tails, int nodes, int apLimit, int tupleLimit | + tails = strictcount(AccessPathFront apf | Stage3::consCand(tc, apf, config)) and + nodes = + strictcount(NodeEx n, FlowState state | + Stage3::revFlow(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + or + flowCandSummaryCtx(n, state, any(AccessPathFrontHead apf | apf.getHead() = tc), config) + ) and + accessPathApproxCostLimits(apLimit, tupleLimit) and + apLimit < tails and + tupleLimit < (tails - 1) * nodes and + not tc.forceHighPrecision() + ) +} + +private newtype TAccessPathApprox = + TNil(DataFlowType t) or + TConsNil(TypedContent tc, DataFlowType t) { + Stage3::consCand(tc, TFrontNil(t), _) and + not expensiveLen2unfolding(tc, _) + } or + TConsCons(TypedContent tc1, TypedContent tc2, int len) { + Stage3::consCand(tc1, TFrontHead(tc2), _) and + len in [2 .. accessPathLimit()] and + not expensiveLen2unfolding(tc1, _) + } or + TCons1(TypedContent tc, int len) { + len in [1 .. accessPathLimit()] and + expensiveLen2unfolding(tc, _) + } + +/** + * Conceptually a list of `TypedContent`s followed by a `DataFlowType`, but only + * the first two elements of the list and its length are tracked. If data flows + * from a source to a given node with a given `AccessPathApprox`, this indicates + * the sequence of dereference operations needed to get from the value in the node + * to the tracked object. The final type indicates the type of the tracked object. + */ +abstract private class AccessPathApprox extends TAccessPathApprox { + abstract string toString(); + + abstract TypedContent getHead(); + + abstract int len(); + + abstract DataFlowType getType(); + + abstract AccessPathFront getFront(); + + /** Gets the access path obtained by popping `head` from this path, if any. */ + abstract AccessPathApprox pop(TypedContent head); +} + +private class AccessPathApproxNil extends AccessPathApprox, TNil { + private DataFlowType t; + + AccessPathApproxNil() { this = TNil(t) } + + override string toString() { result = concat(": " + ppReprType(t)) } + + override TypedContent getHead() { none() } + + override int len() { result = 0 } + + override DataFlowType getType() { result = t } + + override AccessPathFront getFront() { result = TFrontNil(t) } + + override AccessPathApprox pop(TypedContent head) { none() } +} + +abstract private class AccessPathApproxCons extends AccessPathApprox { } + +private class AccessPathApproxConsNil extends AccessPathApproxCons, TConsNil { + private TypedContent tc; + private DataFlowType t; + + AccessPathApproxConsNil() { this = TConsNil(tc, t) } + + override string toString() { + // The `concat` becomes "" if `ppReprType` has no result. + result = "[" + tc.toString() + "]" + concat(" : " + ppReprType(t)) + } + + override TypedContent getHead() { result = tc } + + override int len() { result = 1 } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { head = tc and result = TNil(t) } +} + +private class AccessPathApproxConsCons extends AccessPathApproxCons, TConsCons { + private TypedContent tc1; + private TypedContent tc2; + private int len; + + AccessPathApproxConsCons() { this = TConsCons(tc1, tc2, len) } + + override string toString() { + if len = 2 + then result = "[" + tc1.toString() + ", " + tc2.toString() + "]" + else result = "[" + tc1.toString() + ", " + tc2.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc1 } + + override int len() { result = len } + + override DataFlowType getType() { result = tc1.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc1) } + + override AccessPathApprox pop(TypedContent head) { + head = tc1 and + ( + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + } +} + +private class AccessPathApproxCons1 extends AccessPathApproxCons, TCons1 { + private TypedContent tc; + private int len; + + AccessPathApproxCons1() { this = TCons1(tc, len) } + + override string toString() { + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + } + + override TypedContent getHead() { result = tc } + + override int len() { result = len } + + override DataFlowType getType() { result = tc.getContainerType() } + + override AccessPathFront getFront() { result = TFrontHead(tc) } + + override AccessPathApprox pop(TypedContent head) { + head = tc and + ( + exists(TypedContent tc2 | Stage3::consCand(tc, TFrontHead(tc2), _) | + result = TConsCons(tc2, _, len - 1) + or + len = 2 and + result = TConsNil(tc2, _) + or + result = TCons1(tc2, len - 1) + ) + or + exists(DataFlowType t | + len = 1 and + Stage3::consCand(tc, TFrontNil(t), _) and + result = TNil(t) + ) + ) + } +} + +/** Gets the access path obtained by popping `tc` from `ap`, if any. */ +private AccessPathApprox pop(TypedContent tc, AccessPathApprox apa) { result = apa.pop(tc) } + +/** Gets the access path obtained by pushing `tc` onto `ap`. */ +private AccessPathApprox push(TypedContent tc, AccessPathApprox apa) { apa = pop(tc, result) } + +private newtype TAccessPathApproxOption = + TAccessPathApproxNone() or + TAccessPathApproxSome(AccessPathApprox apa) + +private class AccessPathApproxOption extends TAccessPathApproxOption { + string toString() { + this = TAccessPathApproxNone() and result = "" + or + this = TAccessPathApproxSome(any(AccessPathApprox apa | result = apa.toString())) + } +} + +private module Stage4Param implements MkStage::StageParam { + private module PrevStage = Stage3; + + class Ap = AccessPathApprox; + + class ApNil = AccessPathApproxNil; + + PrevStage::Ap getApprox(Ap ap) { result = ap.getFront() } + + ApNil getApNil(NodeEx node) { + PrevStage::revFlow(node, _) and result = TNil(node.getDataFlowType()) + } + + bindingset[tc, tail] + Ap apCons(TypedContent tc, Ap tail) { result = push(tc, tail) } + + pragma[noinline] + Content getHeadContent(Ap ap) { result = ap.getHead().getContent() } + + class ApOption = AccessPathApproxOption; + + ApOption apNone() { result = TAccessPathApproxNone() } + + ApOption apSome(Ap ap) { result = TAccessPathApproxSome(ap) } + + import Level1CallContext + import LocalCallContext + + predicate localStep( + NodeEx node1, FlowState state1, NodeEx node2, FlowState state2, boolean preservesValue, + ApNil ap, Configuration config, LocalCc lcc + ) { + localFlowBigStep(node1, state1, node2, state2, preservesValue, ap.getFront(), config, lcc) + } + + pragma[nomagic] + predicate flowOutOfCall( + DataFlowCall call, RetNodeEx node1, NodeEx node2, boolean allowsFieldFlow, Configuration config + ) { + exists(FlowState state | + flowOutOfCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + pragma[nomagic] + predicate flowIntoCall( + DataFlowCall call, ArgNodeEx node1, ParamNodeEx node2, boolean allowsFieldFlow, + Configuration config + ) { + exists(FlowState state | + flowIntoCallNodeCand2(call, node1, node2, allowsFieldFlow, config) and + PrevStage::revFlow(node2, pragma[only_bind_into](state), _, pragma[only_bind_into](config)) and + PrevStage::revFlowAlias(node1, pragma[only_bind_into](state), _, + pragma[only_bind_into](config)) + ) + } + + bindingset[node, state, ap, config] + predicate filter(NodeEx node, FlowState state, Ap ap, Configuration config) { any() } + + // Type checking is not necessary here as it has already been done in stage 3. + bindingset[ap, contentType] + predicate typecheckStore(Ap ap, DataFlowType contentType) { any() } +} + +private module Stage4 = MkStage::Stage; + +bindingset[conf, result] +private Configuration unbindConf(Configuration conf) { + exists(Configuration c | result = pragma[only_bind_into](c) and conf = pragma[only_bind_into](c)) +} + +pragma[nomagic] +private predicate nodeMayUseSummary0( + NodeEx n, DataFlowCallable c, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(AccessPathApprox apa0 | + Stage4::parameterMayFlowThrough(_, c, _, _) and + Stage4::revFlow(n, state, true, _, apa0, config) and + Stage4::fwdFlow(n, state, any(CallContextCall ccc), TAccessPathApproxSome(apa), apa0, config) and + n.getEnclosingCallable() = c + ) +} + +pragma[nomagic] +private predicate nodeMayUseSummary( + NodeEx n, FlowState state, AccessPathApprox apa, Configuration config +) { + exists(DataFlowCallable c | + Stage4::parameterMayFlowThrough(_, c, apa, config) and + nodeMayUseSummary0(n, c, state, apa, config) + ) +} + +private newtype TSummaryCtx = + TSummaryCtxNone() or + TSummaryCtxSome(ParamNodeEx p, FlowState state, AccessPath ap) { + exists(Configuration config | + Stage4::parameterMayFlowThrough(p, _, ap.getApprox(), config) and + Stage4::revFlow(p, state, _, config) + ) + } + +/** + * A context for generating flow summaries. This represents flow entry through + * a specific parameter with an access path of a specific shape. + * + * Summaries are only created for parameters that may flow through. + */ +abstract private class SummaryCtx extends TSummaryCtx { + abstract string toString(); +} + +/** A summary context from which no flow summary can be generated. */ +private class SummaryCtxNone extends SummaryCtx, TSummaryCtxNone { + override string toString() { result = "" } +} + +/** A summary context from which a flow summary can be generated. */ +private class SummaryCtxSome extends SummaryCtx, TSummaryCtxSome { + private ParamNodeEx p; + private FlowState s; + private AccessPath ap; + + SummaryCtxSome() { this = TSummaryCtxSome(p, s, ap) } + + ParameterPosition getParameterPos() { p.isParameterOf(_, result) } + + ParamNodeEx getParamNode() { result = p } + + override string toString() { result = p + ": " + ap } + + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + p.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** + * Gets the number of length 2 access path approximations that correspond to `apa`. + */ +private int count1to2unfold(AccessPathApproxCons1 apa, Configuration config) { + exists(TypedContent tc, int len | + tc = apa.getHead() and + len = apa.len() and + result = + strictcount(AccessPathFront apf | + Stage4::consCand(tc, any(AccessPathApprox ap | ap.getFront() = apf and ap.len() = len - 1), + config) + ) + ) +} + +private int countNodesUsingAccessPath(AccessPathApprox apa, Configuration config) { + result = + strictcount(NodeEx n, FlowState state | + Stage4::revFlow(n, state, apa, config) or nodeMayUseSummary(n, state, apa, config) + ) +} + +/** + * Holds if a length 2 access path approximation matching `apa` is expected + * to be expensive. + */ +private predicate expensiveLen1to2unfolding(AccessPathApproxCons1 apa, Configuration config) { + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = count1to2unfold(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + apLimit < aps and + tupleLimit < (aps - 1) * nodes + ) +} + +private AccessPathApprox getATail(AccessPathApprox apa, Configuration config) { + exists(TypedContent head | + apa.pop(head) = result and + Stage4::consCand(head, result, config) + ) +} + +/** + * Holds with `unfold = false` if a precise head-tail representation of `apa` is + * expected to be expensive. Holds with `unfold = true` otherwise. + */ +private predicate evalUnfold(AccessPathApprox apa, boolean unfold, Configuration config) { + if apa.getHead().forceHighPrecision() + then unfold = true + else + exists(int aps, int nodes, int apLimit, int tupleLimit | + aps = countPotentialAps(apa, config) and + nodes = countNodesUsingAccessPath(apa, config) and + accessPathCostLimits(apLimit, tupleLimit) and + if apLimit < aps and tupleLimit < (aps - 1) * nodes then unfold = false else unfold = true + ) +} + +/** + * Gets the number of `AccessPath`s that correspond to `apa`. + */ +private int countAps(AccessPathApprox apa, Configuration config) { + evalUnfold(apa, false, config) and + result = 1 and + (not apa instanceof AccessPathApproxCons1 or expensiveLen1to2unfolding(apa, config)) + or + evalUnfold(apa, false, config) and + result = count1to2unfold(apa, config) and + not expensiveLen1to2unfolding(apa, config) + or + evalUnfold(apa, true, config) and + result = countPotentialAps(apa, config) +} + +/** + * Gets the number of `AccessPath`s that would correspond to `apa` assuming + * that it is expanded to a precise head-tail representation. + */ +language[monotonicAggregates] +private int countPotentialAps(AccessPathApprox apa, Configuration config) { + apa instanceof AccessPathApproxNil and result = 1 + or + result = strictsum(AccessPathApprox tail | tail = getATail(apa, config) | countAps(tail, config)) +} + +private newtype TAccessPath = + TAccessPathNil(DataFlowType t) or + TAccessPathCons(TypedContent head, AccessPath tail) { + exists(AccessPathApproxCons apa | + not evalUnfold(apa, false, _) and + head = apa.getHead() and + tail.getApprox() = getATail(apa, _) + ) + } or + TAccessPathCons2(TypedContent head1, TypedContent head2, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + not expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head1 = apa.getHead() and + head2 = getATail(apa, _).getHead() + ) + } or + TAccessPathCons1(TypedContent head, int len) { + exists(AccessPathApproxCons apa | + evalUnfold(apa, false, _) and + expensiveLen1to2unfolding(apa, _) and + apa.len() = len and + head = apa.getHead() + ) + } + +private newtype TPathNode = + TPathNodeMid( + NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, Configuration config + ) { + // A PathNode is introduced by a source ... + Stage4::revFlow(node, state, config) and + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + or + // ... or a step from an existing PathNode to another node. + exists(PathNodeMid mid | + pathStep(mid, node, state, cc, sc, ap) and + pragma[only_bind_into](config) = mid.getConfiguration() and + Stage4::revFlow(node, state, ap.getApprox(), pragma[only_bind_into](config)) + ) + } or + TPathNodeSink(NodeEx node, FlowState state, Configuration config) { + exists(PathNodeMid sink | + sink.isAtSink() and + node = sink.getNodeEx() and + state = sink.getState() and + config = sink.getConfiguration() + ) + } + +/** + * A list of `TypedContent`s followed by a `DataFlowType`. If data flows from a + * source to a given node with a given `AccessPath`, this indicates the sequence + * of dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ +private class AccessPath extends TAccessPath { + /** Gets the head of this access path, if any. */ + abstract TypedContent getHead(); + + /** Gets the tail of this access path, if any. */ + abstract AccessPath getTail(); + + /** Gets the front of this access path. */ + abstract AccessPathFront getFront(); + + /** Gets the approximation of this access path. */ + abstract AccessPathApprox getApprox(); + + /** Gets the length of this access path. */ + abstract int length(); + + /** Gets a textual representation of this access path. */ + abstract string toString(); + + /** Gets the access path obtained by popping `tc` from this access path, if any. */ + final AccessPath pop(TypedContent tc) { + result = this.getTail() and + tc = this.getHead() + } + + /** Gets the access path obtained by pushing `tc` onto this access path. */ + final AccessPath push(TypedContent tc) { this = result.pop(tc) } +} + +private class AccessPathNil extends AccessPath, TAccessPathNil { + private DataFlowType t; + + AccessPathNil() { this = TAccessPathNil(t) } + + DataFlowType getType() { result = t } + + override TypedContent getHead() { none() } + + override AccessPath getTail() { none() } + + override AccessPathFrontNil getFront() { result = TFrontNil(t) } + + override AccessPathApproxNil getApprox() { result = TNil(t) } + + override int length() { result = 0 } + + override string toString() { result = concat(": " + ppReprType(t)) } +} + +private class AccessPathCons extends AccessPath, TAccessPathCons { + private TypedContent head; + private AccessPath tail; + + AccessPathCons() { this = TAccessPathCons(head, tail) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { result = tail } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { + result = TConsNil(head, tail.(AccessPathNil).getType()) + or + result = TConsCons(head, tail.getHead(), this.length()) + or + result = TCons1(head, this.length()) + } + + override int length() { result = 1 + tail.length() } + + private string toStringImpl(boolean needsSuffix) { + exists(DataFlowType t | + tail = TAccessPathNil(t) and + needsSuffix = false and + result = head.toString() + "]" + concat(" : " + ppReprType(t)) + ) + or + result = head + ", " + tail.(AccessPathCons).toStringImpl(needsSuffix) + or + exists(TypedContent tc2, TypedContent tc3, int len | tail = TAccessPathCons2(tc2, tc3, len) | + result = head + ", " + tc2 + ", " + tc3 + ", ... (" and len > 2 and needsSuffix = true + or + result = head + ", " + tc2 + ", " + tc3 + "]" and len = 2 and needsSuffix = false + ) + or + exists(TypedContent tc2, int len | tail = TAccessPathCons1(tc2, len) | + result = head + ", " + tc2 + ", ... (" and len > 1 and needsSuffix = true + or + result = head + ", " + tc2 + "]" and len = 1 and needsSuffix = false + ) + } + + override string toString() { + result = "[" + this.toStringImpl(true) + this.length().toString() + ")]" + or + result = "[" + this.toStringImpl(false) + } +} + +private class AccessPathCons2 extends AccessPath, TAccessPathCons2 { + private TypedContent head1; + private TypedContent head2; + private int len; + + AccessPathCons2() { this = TAccessPathCons2(head1, head2, len) } + + override TypedContent getHead() { result = head1 } + + override AccessPath getTail() { + Stage4::consCand(head1, result.getApprox(), _) and + result.getHead() = head2 and + result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head1) } + + override AccessPathApproxCons getApprox() { + result = TConsCons(head1, head2, len) or + result = TCons1(head1, len) + } + + override int length() { result = len } + + override string toString() { + if len = 2 + then result = "[" + head1.toString() + ", " + head2.toString() + "]" + else + result = "[" + head1.toString() + ", " + head2.toString() + ", ... (" + len.toString() + ")]" + } +} + +private class AccessPathCons1 extends AccessPath, TAccessPathCons1 { + private TypedContent head; + private int len; + + AccessPathCons1() { this = TAccessPathCons1(head, len) } + + override TypedContent getHead() { result = head } + + override AccessPath getTail() { + Stage4::consCand(head, result.getApprox(), _) and result.length() = len - 1 + } + + override AccessPathFrontHead getFront() { result = TFrontHead(head) } + + override AccessPathApproxCons getApprox() { result = TCons1(head, len) } + + override int length() { result = len } + + override string toString() { + if len = 1 + then result = "[" + head.toString() + "]" + else result = "[" + head.toString() + ", ... (" + len.toString() + ")]" + } +} + +/** + * A `Node` augmented with a call context (except for sinks), an access path, and a configuration. + * Only those `PathNode`s that are reachable from a source are generated. + */ +class PathNode extends TPathNode { + /** Gets a textual representation of this element. */ + string toString() { none() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { none() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + none() + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.(PathNodeImpl).getNodeEx().projectToNode() = result } + + /** Gets the `FlowState` of this node. */ + FlowState getState() { none() } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + final PathNode getASuccessor() { + result = this.(PathNodeImpl).getANonHiddenSuccessor() and + reach(this) and + reach(result) + } + + /** Holds if this node is a source. */ + predicate isSource() { none() } +} + +abstract private class PathNodeImpl extends PathNode { + abstract PathNodeImpl getASuccessorImpl(); + + private PathNodeImpl getASuccessorIfHidden() { + this.isHidden() and + result = this.getASuccessorImpl() + } + + final PathNodeImpl getANonHiddenSuccessor() { + result = this.getASuccessorImpl().getASuccessorIfHidden*() and + not this.isHidden() and + not result.isHidden() + } + + abstract NodeEx getNodeEx(); + + predicate isHidden() { + not this.getConfiguration().includeHiddenNodes() and + ( + hiddenNode(this.getNodeEx().asNode()) and + not this.isSource() and + not this instanceof PathNodeSink + or + this.getNodeEx() instanceof TNodeImplicitRead + ) + } + + private string ppAp() { + this instanceof PathNodeSink and result = "" + or + exists(string s | s = this.(PathNodeMid).getAp().toString() | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + this instanceof PathNodeSink and result = "" + or + result = " <" + this.(PathNodeMid).getCallContext().toString() + ">" + } + + override string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + override string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + override predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } +} + +/** Holds if `n` can reach a sink. */ +private predicate directReach(PathNodeImpl n) { + n instanceof PathNodeSink or directReach(n.getANonHiddenSuccessor()) +} + +/** Holds if `n` can reach a sink or is used in a subpath that can reach a sink. */ +private predicate reach(PathNode n) { directReach(n) or Subpaths::retReach(n) } + +/** Holds if `n1.getASuccessor() = n2` and `n2` can reach a sink. */ +private predicate pathSucc(PathNodeImpl n1, PathNode n2) { + n1.getANonHiddenSuccessor() = n2 and directReach(n2) +} + +private predicate pathSuccPlus(PathNode n1, PathNode n2) = fastTC(pathSucc/2)(n1, n2) + +/** + * Provides the query predicates needed to include a graph in a path-problem query. + */ +module PathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PathNode a, PathNode b) { a.getASuccessor() = b } + + /** Holds if `n` is a node in the graph of data flow path explanations. */ + query predicate nodes(PathNode n, string key, string val) { + reach(n) and key = "semmle.label" and val = n.toString() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + query predicate subpaths(PathNode arg, PathNode par, PathNode ret, PathNode out) { + Subpaths::subpaths(arg, par, ret, out) and + reach(arg) and + reach(par) and + reach(ret) and + reach(out) + } +} + +/** + * An intermediate flow graph node. This is a triple consisting of a `Node`, + * a `CallContext`, and a `Configuration`. + */ +private class PathNodeMid extends PathNodeImpl, TPathNodeMid { + NodeEx node; + FlowState state; + CallContext cc; + SummaryCtx sc; + AccessPath ap; + Configuration config; + + PathNodeMid() { this = TPathNodeMid(node, state, cc, sc, ap, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + SummaryCtx getSummaryCtx() { result = sc } + + AccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + private PathNodeMid getSuccMid() { + pathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx(), result.getAp()) and + result.getConfiguration() = unbindConf(this.getConfiguration()) + } + + override PathNodeImpl getASuccessorImpl() { + // an intermediate step to another intermediate node + result = this.getSuccMid() + or + // a final step to a sink + result = this.getSuccMid().projectToSink() + } + + override predicate isSource() { + sourceNode(node, state, config) and + ( + if hasSourceCallCtx(config) + then cc instanceof CallContextSomeCall + else cc instanceof CallContextAny + ) and + sc instanceof SummaryCtxNone and + ap = TAccessPathNil(node.getDataFlowType()) + } + + predicate isAtSink() { + sinkNode(node, state, config) and + ap instanceof AccessPathNil and + if hasSinkCallCtx(config) + then + // For `FeatureHasSinkCallContext` the condition `cc instanceof CallContextNoCall` + // is exactly what we need to check. This also implies + // `sc instanceof SummaryCtxNone`. + // For `FeatureEqualSourceSinkCallContext` the initial call context was + // set to `CallContextSomeCall` and jumps are disallowed, so + // `cc instanceof CallContextNoCall` never holds. On the other hand, + // in this case there's never any need to enter a call except to identify + // a summary, so the condition in `pathIntoCallable` enforces this, which + // means that `sc instanceof SummaryCtxNone` holds if and only if we are + // in the call context of the source. + sc instanceof SummaryCtxNone or + cc instanceof CallContextNoCall + else any() + } + + PathNodeSink projectToSink() { + this.isAtSink() and + result.getNodeEx() = node and + result.getState() = state and + result.getConfiguration() = unbindConf(config) + } +} + +/** + * A flow graph node corresponding to a sink. This is disjoint from the + * intermediate nodes in order to uniquely correspond to a given sink by + * excluding the `CallContext`. + */ +private class PathNodeSink extends PathNodeImpl, TPathNodeSink { + NodeEx node; + FlowState state; + Configuration config; + + PathNodeSink() { this = TPathNodeSink(node, state, config) } + + override NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + override Configuration getConfiguration() { result = config } + + override PathNodeImpl getASuccessorImpl() { none() } + + override predicate isSource() { sourceNode(node, state, config) } +} + +private predicate pathNode( + PathNodeMid mid, NodeEx midnode, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap, + Configuration conf, LocalCallContext localCC +) { + midnode = mid.getNodeEx() and + state = mid.getState() and + conf = mid.getConfiguration() and + cc = mid.getCallContext() and + sc = mid.getSummaryCtx() and + localCC = + getLocalCallContext(pragma[only_bind_into](pragma[only_bind_out](cc)), + midnode.getEnclosingCallable()) and + ap = mid.getAp() +} + +/** + * Holds if data may flow from `mid` to `node`. The last step in or out of + * a callable is recorded by `cc`. + */ +pragma[nomagic] +private predicate pathStep( + PathNodeMid mid, NodeEx node, FlowState state, CallContext cc, SummaryCtx sc, AccessPath ap +) { + exists(NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC | + pathNode(mid, midnode, state0, cc, sc, ap, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, true, _, conf, localCC) + ) + or + exists( + AccessPath ap0, NodeEx midnode, FlowState state0, Configuration conf, LocalCallContext localCC + | + pathNode(mid, midnode, state0, cc, sc, ap0, conf, localCC) and + localFlowBigStep(midnode, state0, node, state, false, ap.getFront(), conf, localCC) and + ap0 instanceof AccessPathNil + ) + or + jumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + ap = mid.getAp() + or + additionalJumpStep(mid.getNodeEx(), node, mid.getConfiguration()) and + state = mid.getState() and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, mid.getConfiguration()) and + cc instanceof CallContextAny and + sc instanceof SummaryCtxNone and + mid.getAp() instanceof AccessPathNil and + ap = TAccessPathNil(node.getDataFlowType()) + or + exists(TypedContent tc | pathStoreStep(mid, node, state, ap.pop(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + exists(TypedContent tc | pathReadStep(mid, node, state, ap.push(tc), tc, cc)) and + sc = mid.getSummaryCtx() + or + pathIntoCallable(mid, node, state, _, cc, sc, _, _) and ap = mid.getAp() + or + pathOutOfCallable(mid, node, state, cc) and ap = mid.getAp() and sc instanceof SummaryCtxNone + or + pathThroughCallable(mid, node, state, cc, ap) and sc = mid.getSummaryCtx() +} + +pragma[nomagic] +private predicate pathReadStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + tc = ap0.getHead() and + Stage4::readStepCand(mid.getNodeEx(), tc.getContent(), node, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +pragma[nomagic] +private predicate pathStoreStep( + PathNodeMid mid, NodeEx node, FlowState state, AccessPath ap0, TypedContent tc, CallContext cc +) { + ap0 = mid.getAp() and + Stage4::storeStepCand(mid.getNodeEx(), _, tc, node, _, mid.getConfiguration()) and + state = mid.getState() and + cc = mid.getCallContext() +} + +private predicate pathOutOfCallable0( + PathNodeMid mid, ReturnPosition pos, FlowState state, CallContext innercc, AccessPathApprox apa, + Configuration config +) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + apa = mid.getAp().getApprox() and + config = mid.getConfiguration() +} + +pragma[nomagic] +private predicate pathOutOfCallable1( + PathNodeMid mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPathApprox apa, Configuration config +) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + pathOutOfCallable0(mid, pos, state, innercc, apa, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) +} + +pragma[noinline] +private NodeEx getAnOutNodeFlow( + ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config +) { + result.asNode() = kind.getAnOutNode(call) and + Stage4::revFlow(result, _, apa, config) +} + +/** + * Holds if data may flow from `mid` to `out`. The last step of this path + * is a return from a callable and is recorded by `cc`, if needed. + */ +pragma[noinline] +private predicate pathOutOfCallable(PathNodeMid mid, NodeEx out, FlowState state, CallContext cc) { + exists(ReturnKindExt kind, DataFlowCall call, AccessPathApprox apa, Configuration config | + pathOutOfCallable1(mid, call, kind, state, cc, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` to the `i`th argument of `call` in `cc`. + */ +pragma[noinline] +private predicate pathIntoArg( + PathNodeMid mid, ParameterPosition ppos, FlowState state, CallContext cc, DataFlowCall call, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(ArgNodeEx arg, ArgumentPosition apos | + pathNode(mid, arg, state, cc, _, ap, config, _) and + arg.asNode().(ArgNode).argumentOf(call, apos) and + apa = ap.getApprox() and + parameterMatch(ppos, apos) + ) +} + +pragma[nomagic] +private predicate parameterCand( + DataFlowCallable callable, ParameterPosition pos, AccessPathApprox apa, Configuration config +) { + exists(ParamNodeEx p | + Stage4::revFlow(p, _, apa, config) and + p.isParameterOf(callable, pos) + ) +} + +pragma[nomagic] +private predicate pathIntoCallable0( + PathNodeMid mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, AccessPath ap, Configuration config +) { + exists(AccessPathApprox apa | + pathIntoArg(mid, pragma[only_bind_into](pos), state, outercc, call, ap, + pragma[only_bind_into](apa), pragma[only_bind_into](config)) and + callable = resolveCall(call, outercc) and + parameterCand(callable, pragma[only_bind_into](pos), pragma[only_bind_into](apa), + pragma[only_bind_into](config)) + ) +} + +/** + * Holds if data may flow from `mid` to `p` through `call`. The contexts + * before and after entering the callable are `outercc` and `innercc`, + * respectively. + */ +pragma[nomagic] +private predicate pathIntoCallable( + PathNodeMid mid, ParamNodeEx p, FlowState state, CallContext outercc, CallContextCall innercc, + SummaryCtx sc, DataFlowCall call, Configuration config +) { + exists(ParameterPosition pos, DataFlowCallable callable, AccessPath ap | + pathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + ( + sc = TSummaryCtxSome(p, state, ap) + or + not exists(TSummaryCtxSome(p, state, ap)) and + sc = TSummaryCtxNone() and + // When the call contexts of source and sink needs to match then there's + // never any reason to enter a callable except to find a summary. See also + // the comment in `PathNodeMid::isAtSink`. + not config.getAFeature() instanceof FeatureEqualSourceSinkCallContext + ) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) +} + +/** Holds if data may flow from a parameter given by `sc` to a return of kind `kind`. */ +pragma[nomagic] +private predicate paramFlowsThrough( + ReturnKindExt kind, FlowState state, CallContextCall cc, SummaryCtxSome sc, AccessPath ap, + AccessPathApprox apa, Configuration config +) { + exists(PathNodeMid mid, RetNodeEx ret, ParameterPosition pos | + pathNode(mid, ret, state, cc, sc, ap, config, _) and + kind = ret.getKind() and + apa = ap.getApprox() and + pos = sc.getParameterPos() and + // we don't expect a parameter to return stored in itself, unless explicitly allowed + ( + not kind.(ParamUpdateReturnKind).getPosition() = pos + or + sc.getParamNode().allowParameterReturnInSelf() + ) + ) +} + +pragma[nomagic] +private predicate pathThroughCallable0( + DataFlowCall call, PathNodeMid mid, ReturnKindExt kind, FlowState state, CallContext cc, + AccessPath ap, AccessPathApprox apa, Configuration config +) { + exists(CallContext innercc, SummaryCtx sc | + pathIntoCallable(mid, _, _, cc, innercc, sc, call, config) and + paramFlowsThrough(kind, state, innercc, sc, ap, apa, config) + ) +} + +/** + * Holds if data may flow from `mid` through a callable to the node `out`. + * The context `cc` is restored to its value prior to entering the callable. + */ +pragma[noinline] +private predicate pathThroughCallable( + PathNodeMid mid, NodeEx out, FlowState state, CallContext cc, AccessPath ap +) { + exists(DataFlowCall call, ReturnKindExt kind, AccessPathApprox apa, Configuration config | + pathThroughCallable0(call, mid, kind, state, cc, ap, apa, config) and + out = getAnOutNodeFlow(kind, call, apa, config) + ) +} + +private module Subpaths { + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths01( + PathNodeImpl arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + exists(Configuration config | + pathThroughCallable(arg, out, pragma[only_bind_into](sout), _, pragma[only_bind_into](apout)) and + pathIntoCallable(arg, par, _, _, innercc, sc, _, config) and + paramFlowsThrough(kind, pragma[only_bind_into](sout), innercc, sc, + pragma[only_bind_into](apout), _, unbindConf(config)) and + not arg.isHidden() + ) + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple and `ret` is determined by + * `kind`, `sc`, `sout`, `apout`, and `innercc`. + */ + pragma[nomagic] + private predicate subpaths02( + PathNode arg, ParamNodeEx par, SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, + NodeEx out, FlowState sout, AccessPath apout + ) { + subpaths01(arg, par, sc, innercc, kind, out, sout, apout) and + out.asNode() = kind.getAnOutNode(_) + } + + pragma[nomagic] + private Configuration getPathNodeConf(PathNode n) { result = n.getConfiguration() } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple. + */ + pragma[nomagic] + private predicate subpaths03( + PathNode arg, ParamNodeEx par, PathNodeMid ret, NodeEx out, FlowState sout, AccessPath apout + ) { + exists(SummaryCtxSome sc, CallContext innercc, ReturnKindExt kind, RetNodeEx retnode | + subpaths02(arg, par, sc, innercc, kind, out, sout, apout) and + pathNode(ret, retnode, sout, innercc, sc, apout, unbindConf(getPathNodeConf(arg)), _) and + kind = retnode.getKind() + ) + } + + private PathNodeImpl localStepToHidden(PathNodeImpl n) { + n.getASuccessorImpl() = result and + result.isHidden() and + exists(NodeEx n1, NodeEx n2 | n1 = n.getNodeEx() and n2 = result.getNodeEx() | + localFlowBigStep(n1, _, n2, _, _, _, _, _) or + store(n1, _, n2, _, _) or + readSet(n1, _, n2, _) + ) + } + + pragma[nomagic] + private predicate hasSuccessor(PathNodeImpl pred, PathNodeMid succ, NodeEx succNode) { + succ = pred.getANonHiddenSuccessor() and + succNode = succ.getNodeEx() + } + + /** + * Holds if `(arg, par, ret, out)` forms a subpath-tuple, that is, flow through + * a subpath between `par` and `ret` with the connecting edges `arg -> par` and + * `ret -> out` is summarized as the edge `arg -> out`. + */ + predicate subpaths(PathNodeImpl arg, PathNodeImpl par, PathNodeImpl ret, PathNode out) { + exists(ParamNodeEx p, NodeEx o, FlowState sout, AccessPath apout, PathNodeMid out0 | + pragma[only_bind_into](arg).getANonHiddenSuccessor() = pragma[only_bind_into](out0) and + subpaths03(pragma[only_bind_into](arg), p, localStepToHidden*(ret), o, sout, apout) and + hasSuccessor(pragma[only_bind_into](arg), par, p) and + not ret.isHidden() and + pathNode(out0, o, sout, _, _, apout, _, _) + | + out = out0 or out = out0.projectToSink() + ) + } + + /** + * Holds if `n` can reach a return node in a summarized subpath that can reach a sink. + */ + predicate retReach(PathNodeImpl n) { + exists(PathNode out | subpaths(_, _, n, out) | directReach(out) or retReach(out)) + or + exists(PathNodeImpl mid | + retReach(mid) and + n.getANonHiddenSuccessor() = mid and + not subpaths(_, mid, _, _) + ) + } +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +private predicate flowsTo( + PathNode flowsource, PathNodeSink flowsink, Node source, Node sink, Configuration configuration +) { + flowsource.isSource() and + flowsource.getConfiguration() = configuration and + flowsource.(PathNodeImpl).getNodeEx().asNode() = source and + (flowsource = flowsink or pathSuccPlus(flowsource, flowsink)) and + flowsink.getNodeEx().asNode() = sink +} + +/** + * Holds if data can flow (inter-procedurally) from `source` to `sink`. + * + * Will only have results if `configuration` has non-empty sources and + * sinks. + */ +predicate flowsTo(Node source, Node sink, Configuration configuration) { + flowsTo(_, _, source, sink, configuration) +} + +private predicate finalStats( + boolean fwd, int nodes, int fields, int conscand, int states, int tuples +) { + fwd = true and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0)) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0)) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap)) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state)) and + tuples = count(PathNode pn) + or + fwd = false and + nodes = count(NodeEx n0 | exists(PathNodeImpl pn | pn.getNodeEx() = n0 and reach(pn))) and + fields = count(TypedContent f0 | exists(PathNodeMid pn | pn.getAp().getHead() = f0 and reach(pn))) and + conscand = count(AccessPath ap | exists(PathNodeMid pn | pn.getAp() = ap and reach(pn))) and + states = count(FlowState state | exists(PathNodeMid pn | pn.getState() = state and reach(pn))) and + tuples = count(PathNode pn | reach(pn)) +} + +/** + * INTERNAL: Only for debugging. + * + * Calculates per-stage metrics for data flow. + */ +predicate stageStats( + int n, string stage, int nodes, int fields, int conscand, int states, int tuples, + Configuration config +) { + stage = "1 Fwd" and + n = 10 and + Stage1::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "1 Rev" and + n = 15 and + Stage1::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Fwd" and + n = 20 and + Stage2::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "2 Rev" and + n = 25 and + Stage2::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Fwd" and + n = 30 and + Stage3::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "3 Rev" and + n = 35 and + Stage3::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Fwd" and + n = 40 and + Stage4::stats(true, nodes, fields, conscand, states, tuples, config) + or + stage = "4 Rev" and + n = 45 and + Stage4::stats(false, nodes, fields, conscand, states, tuples, config) + or + stage = "5 Fwd" and n = 50 and finalStats(true, nodes, fields, conscand, states, tuples) + or + stage = "5 Rev" and n = 55 and finalStats(false, nodes, fields, conscand, states, tuples) +} + +private module FlowExploration { + private predicate callableStep(DataFlowCallable c1, DataFlowCallable c2, Configuration config) { + exists(NodeEx node1, NodeEx node2 | + jumpStep(node1, node2, config) + or + additionalJumpStep(node1, node2, config) + or + additionalJumpStateStep(node1, _, node2, _, config) + or + // flow into callable + viableParamArgEx(_, node2, node1) + or + // flow out of a callable + viableReturnPosOutEx(_, node1.(RetNodeEx).getReturnPosition(), node2) + | + c1 = node1.getEnclosingCallable() and + c2 = node2.getEnclosingCallable() and + c1 != c2 + ) + } + + private predicate interestingCallableSrc(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSource(n) or config.isSource(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSrc(mid, config) and callableStep(mid, c, config) + ) + } + + private predicate interestingCallableSink(DataFlowCallable c, Configuration config) { + exists(Node n | config.isSink(n) or config.isSink(n, _) | c = getNodeEnclosingCallable(n)) + or + exists(DataFlowCallable mid | + interestingCallableSink(mid, config) and callableStep(c, mid, config) + ) + } + + private newtype TCallableExt = + TCallable(DataFlowCallable c, Configuration config) { + interestingCallableSrc(c, config) or + interestingCallableSink(c, config) + } or + TCallableSrc() or + TCallableSink() + + private predicate callableExtSrc(TCallableSrc src) { any() } + + private predicate callableExtSink(TCallableSink sink) { any() } + + private predicate callableExtStepFwd(TCallableExt ce1, TCallableExt ce2) { + exists(DataFlowCallable c1, DataFlowCallable c2, Configuration config | + callableStep(c1, c2, config) and + ce1 = TCallable(c1, pragma[only_bind_into](config)) and + ce2 = TCallable(c2, pragma[only_bind_into](config)) + ) + or + exists(Node n, Configuration config | + ce1 = TCallableSrc() and + (config.isSource(n) or config.isSource(n, _)) and + ce2 = TCallable(getNodeEnclosingCallable(n), config) + ) + or + exists(Node n, Configuration config | + ce2 = TCallableSink() and + (config.isSink(n) or config.isSink(n, _)) and + ce1 = TCallable(getNodeEnclosingCallable(n), config) + ) + } + + private predicate callableExtStepRev(TCallableExt ce1, TCallableExt ce2) { + callableExtStepFwd(ce2, ce1) + } + + private int distSrcExt(TCallableExt c) = + shortestDistances(callableExtSrc/1, callableExtStepFwd/2)(_, c, result) + + private int distSinkExt(TCallableExt c) = + shortestDistances(callableExtSink/1, callableExtStepRev/2)(_, c, result) + + private int distSrc(DataFlowCallable c, Configuration config) { + result = distSrcExt(TCallable(c, config)) - 1 + } + + private int distSink(DataFlowCallable c, Configuration config) { + result = distSinkExt(TCallable(c, config)) - 1 + } + + private newtype TPartialAccessPath = + TPartialNil(DataFlowType t) or + TPartialCons(TypedContent tc, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `TypedContent`s followed by a `Type`, but only the first + * element of the list and its length are tracked. If data flows from a source to + * a given node with a given `AccessPath`, this indicates the sequence of + * dereference operations needed to get from the value in the node to the + * tracked object. The final type indicates the type of the tracked object. + */ + private class PartialAccessPath extends TPartialAccessPath { + abstract string toString(); + + TypedContent getHead() { this = TPartialCons(result, _) } + + int len() { + this = TPartialNil(_) and result = 0 + or + this = TPartialCons(_, result) + } + + DataFlowType getType() { + this = TPartialNil(result) + or + exists(TypedContent head | this = TPartialCons(head, _) | result = head.getContainerType()) + } + } + + private class PartialAccessPathNil extends PartialAccessPath, TPartialNil { + override string toString() { + exists(DataFlowType t | this = TPartialNil(t) | result = concat(": " + ppReprType(t))) + } + } + + private class PartialAccessPathCons extends PartialAccessPath, TPartialCons { + override string toString() { + exists(TypedContent tc, int len | this = TPartialCons(tc, len) | + if len = 1 + then result = "[" + tc.toString() + "]" + else result = "[" + tc.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private newtype TRevPartialAccessPath = + TRevPartialNil() or + TRevPartialCons(Content c, int len) { len in [1 .. accessPathLimit()] } + + /** + * Conceptually a list of `Content`s, but only the first + * element of the list and its length are tracked. + */ + private class RevPartialAccessPath extends TRevPartialAccessPath { + abstract string toString(); + + Content getHead() { this = TRevPartialCons(result, _) } + + int len() { + this = TRevPartialNil() and result = 0 + or + this = TRevPartialCons(_, result) + } + } + + private class RevPartialAccessPathNil extends RevPartialAccessPath, TRevPartialNil { + override string toString() { result = "" } + } + + private class RevPartialAccessPathCons extends RevPartialAccessPath, TRevPartialCons { + override string toString() { + exists(Content c, int len | this = TRevPartialCons(c, len) | + if len = 1 + then result = "[" + c.toString() + "]" + else result = "[" + c.toString() + ", ... (" + len.toString() + ")]" + ) + } + } + + private predicate relevantState(FlowState state) { + sourceNode(_, state, _) or + sinkNode(_, state, _) or + additionalLocalStateStep(_, state, _, _, _) or + additionalLocalStateStep(_, _, _, state, _) or + additionalJumpStateStep(_, state, _, _, _) or + additionalJumpStateStep(_, _, _, state, _) + } + + private newtype TSummaryCtx1 = + TSummaryCtx1None() or + TSummaryCtx1Param(ParamNodeEx p) + + private newtype TSummaryCtx2 = + TSummaryCtx2None() or + TSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TSummaryCtx3 = + TSummaryCtx3None() or + TSummaryCtx3Some(PartialAccessPath ap) + + private newtype TRevSummaryCtx1 = + TRevSummaryCtx1None() or + TRevSummaryCtx1Some(ReturnPosition pos) + + private newtype TRevSummaryCtx2 = + TRevSummaryCtx2None() or + TRevSummaryCtx2Some(FlowState s) { relevantState(s) } + + private newtype TRevSummaryCtx3 = + TRevSummaryCtx3None() or + TRevSummaryCtx3Some(RevPartialAccessPath ap) + + private newtype TPartialPathNode = + TPartialPathNodeFwd( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = TPartialNil(node.getDataFlowType()) and + exists(config.explorationLimit()) + or + partialPathNodeMk0(node, state, cc, sc1, sc2, sc3, ap, config) and + distSrc(node.getEnclosingCallable(), config) <= config.explorationLimit() + } or + TPartialPathNodeRev( + NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, TRevSummaryCtx3 sc3, + RevPartialAccessPath ap, Configuration config + ) { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() and + exists(config.explorationLimit()) + or + exists(PartialPathNodeRev mid | + revPartialPathStep(mid, node, state, sc1, sc2, sc3, ap, config) and + not clearsContentEx(node, ap.getHead()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead()) + ) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + distSink(node.getEnclosingCallable(), config) <= config.explorationLimit() + ) + } + + pragma[nomagic] + private predicate partialPathNodeMk0( + NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStep(mid, node, state, cc, sc1, sc2, sc3, ap, config) and + not fullBarrier(node, config) and + not stateBarrier(node, state, config) and + not clearsContentEx(node, ap.getHead().getContent()) and + ( + notExpectsContent(node) or + expectsContentEx(node, ap.getHead().getContent()) + ) and + if node.asNode() instanceof CastingNode + then compatibleTypes(node.getDataFlowType(), ap.getType()) + else any() + ) + } + + /** + * A `Node` augmented with a call context, an access path, and a configuration. + */ + class PartialPathNode extends TPartialPathNode { + /** Gets a textual representation of this element. */ + string toString() { result = this.getNodeEx().toString() + this.ppAp() } + + /** + * Gets a textual representation of this element, including a textual + * representation of the call context. + */ + string toStringWithContext() { + result = this.getNodeEx().toString() + this.ppAp() + this.ppCtx() + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getNodeEx().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets the underlying `Node`. */ + final Node getNode() { this.getNodeEx().projectToNode() = result } + + FlowState getState() { none() } + + private NodeEx getNodeEx() { + result = this.(PartialPathNodeFwd).getNodeEx() or + result = this.(PartialPathNodeRev).getNodeEx() + } + + /** Gets the associated configuration. */ + Configuration getConfiguration() { none() } + + /** Gets a successor of this node, if any. */ + PartialPathNode getASuccessor() { none() } + + /** + * Gets the approximate distance to the nearest source measured in number + * of interprocedural steps. + */ + int getSourceDistance() { + result = distSrc(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + /** + * Gets the approximate distance to the nearest sink measured in number + * of interprocedural steps. + */ + int getSinkDistance() { + result = distSink(this.getNodeEx().getEnclosingCallable(), this.getConfiguration()) + } + + private string ppAp() { + exists(string s | + s = this.(PartialPathNodeFwd).getAp().toString() or + s = this.(PartialPathNodeRev).getAp().toString() + | + if s = "" then result = "" else result = " " + s + ) + } + + private string ppCtx() { + result = " <" + this.(PartialPathNodeFwd).getCallContext().toString() + ">" + } + + /** Holds if this is a source in a forward-flow path. */ + predicate isFwdSource() { this.(PartialPathNodeFwd).isSource() } + + /** Holds if this is a sink in a reverse-flow path. */ + predicate isRevSink() { this.(PartialPathNodeRev).isSink() } + } + + /** + * Provides the query predicates needed to include a graph in a path-problem query. + */ + module PartialPathGraph { + /** Holds if `(a,b)` is an edge in the graph of data flow path explanations. */ + query predicate edges(PartialPathNode a, PartialPathNode b) { a.getASuccessor() = b } + } + + private class PartialPathNodeFwd extends PartialPathNode, TPartialPathNodeFwd { + NodeEx node; + FlowState state; + CallContext cc; + TSummaryCtx1 sc1; + TSummaryCtx2 sc2; + TSummaryCtx3 sc3; + PartialAccessPath ap; + Configuration config; + + PartialPathNodeFwd() { this = TPartialPathNodeFwd(node, state, cc, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + CallContext getCallContext() { result = cc } + + TSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TSummaryCtx3 getSummaryCtx3() { result = sc3 } + + PartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeFwd getASuccessor() { + partialPathStep(this, result.getNodeEx(), result.getState(), result.getCallContext(), + result.getSummaryCtx1(), result.getSummaryCtx2(), result.getSummaryCtx3(), result.getAp(), + result.getConfiguration()) + } + + predicate isSource() { + sourceNode(node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap instanceof TPartialNil + } + } + + private class PartialPathNodeRev extends PartialPathNode, TPartialPathNodeRev { + NodeEx node; + FlowState state; + TRevSummaryCtx1 sc1; + TRevSummaryCtx2 sc2; + TRevSummaryCtx3 sc3; + RevPartialAccessPath ap; + Configuration config; + + PartialPathNodeRev() { this = TPartialPathNodeRev(node, state, sc1, sc2, sc3, ap, config) } + + NodeEx getNodeEx() { result = node } + + override FlowState getState() { result = state } + + TRevSummaryCtx1 getSummaryCtx1() { result = sc1 } + + TRevSummaryCtx2 getSummaryCtx2() { result = sc2 } + + TRevSummaryCtx3 getSummaryCtx3() { result = sc3 } + + RevPartialAccessPath getAp() { result = ap } + + override Configuration getConfiguration() { result = config } + + override PartialPathNodeRev getASuccessor() { + revPartialPathStep(result, this.getNodeEx(), this.getState(), this.getSummaryCtx1(), + this.getSummaryCtx2(), this.getSummaryCtx3(), this.getAp(), this.getConfiguration()) + } + + predicate isSink() { + sinkNode(node, state, config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = TRevPartialNil() + } + } + + private predicate partialPathStep( + PartialPathNodeFwd mid, NodeEx node, FlowState state, CallContext cc, TSummaryCtx1 sc1, + TSummaryCtx2 sc2, TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + not isUnreachableInCallCached(node.asNode(), cc.(CallContextSpecificCall).getCall()) and + ( + localFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalLocalStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + ) + or + jumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(mid.getNodeEx(), node, config) and + state = mid.getState() and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + additionalJumpStateStep(mid.getNodeEx(), mid.getState(), node, state, config) and + cc instanceof CallContextAny and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() and + mid.getAp() instanceof PartialAccessPathNil and + ap = TPartialNil(node.getDataFlowType()) and + config = mid.getConfiguration() + or + partialPathStoreStep(mid, _, _, node, ap) and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(PartialAccessPath ap0, TypedContent tc | + partialPathReadStep(mid, ap0, tc, node, cc, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsFwd(ap, tc, ap0, config) + ) + or + partialPathIntoCallable(mid, node, state, _, cc, sc1, sc2, sc3, _, ap, config) + or + partialPathOutOfCallable(mid, node, state, cc, ap, config) and + sc1 = TSummaryCtx1None() and + sc2 = TSummaryCtx2None() and + sc3 = TSummaryCtx3None() + or + partialPathThroughCallable(mid, node, state, cc, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + bindingset[result, i] + private int unbindInt(int i) { pragma[only_bind_out](i) = pragma[only_bind_out](result) } + + pragma[inline] + private predicate partialPathStoreStep( + PartialPathNodeFwd mid, PartialAccessPath ap1, TypedContent tc, NodeEx node, + PartialAccessPath ap2 + ) { + exists(NodeEx midNode, DataFlowType contentType | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + store(midNode, tc, node, contentType, mid.getConfiguration()) and + ap2.getHead() = tc and + ap2.len() = unbindInt(ap1.len() + 1) and + compatibleTypes(ap1.getType(), contentType) + ) + } + + pragma[nomagic] + private predicate apConsFwd( + PartialAccessPath ap1, TypedContent tc, PartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeFwd mid | + partialPathStoreStep(mid, ap1, tc, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate partialPathReadStep( + PartialPathNodeFwd mid, PartialAccessPath ap, TypedContent tc, NodeEx node, CallContext cc, + Configuration config + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + read(midNode, tc.getContent(), node, pragma[only_bind_into](config)) and + ap.getHead() = tc and + pragma[only_bind_into](config) = mid.getConfiguration() and + cc = mid.getCallContext() + ) + } + + private predicate partialPathOutOfCallable0( + PartialPathNodeFwd mid, ReturnPosition pos, FlowState state, CallContext innercc, + PartialAccessPath ap, Configuration config + ) { + pos = mid.getNodeEx().(RetNodeEx).getReturnPosition() and + state = mid.getState() and + innercc = mid.getCallContext() and + innercc instanceof CallContextNoCall and + ap = mid.getAp() and + config = mid.getConfiguration() + } + + pragma[nomagic] + private predicate partialPathOutOfCallable1( + PartialPathNodeFwd mid, DataFlowCall call, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(ReturnPosition pos, DataFlowCallable c, CallContext innercc | + partialPathOutOfCallable0(mid, pos, state, innercc, ap, config) and + c = pos.getCallable() and + kind = pos.getKind() and + resolveReturn(innercc, c, call) + | + if reducedViableImplInReturn(c, call) then cc = TReturn(c, call) else cc = TAnyCallContext() + ) + } + + private predicate partialPathOutOfCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(ReturnKindExt kind, DataFlowCall call | + partialPathOutOfCallable1(mid, call, kind, state, cc, ap, config) + | + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[noinline] + private predicate partialPathIntoArg( + PartialPathNodeFwd mid, ParameterPosition ppos, FlowState state, CallContext cc, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ArgNode arg, ArgumentPosition apos | + arg = mid.getNodeEx().asNode() and + state = mid.getState() and + cc = mid.getCallContext() and + arg.argumentOf(call, apos) and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate partialPathIntoCallable0( + PartialPathNodeFwd mid, DataFlowCallable callable, ParameterPosition pos, FlowState state, + CallContext outercc, DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + partialPathIntoArg(mid, pos, state, outercc, call, ap, config) and + callable = resolveCall(call, outercc) + } + + private predicate partialPathIntoCallable( + PartialPathNodeFwd mid, ParamNodeEx p, FlowState state, CallContext outercc, + CallContextCall innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3, + DataFlowCall call, PartialAccessPath ap, Configuration config + ) { + exists(ParameterPosition pos, DataFlowCallable callable | + partialPathIntoCallable0(mid, callable, pos, state, outercc, call, ap, config) and + p.isParameterOf(callable, pos) and + sc1 = TSummaryCtx1Param(p) and + sc2 = TSummaryCtx2Some(state) and + sc3 = TSummaryCtx3Some(ap) + | + if recordDataFlowCallSite(call, callable) + then innercc = TSpecificCall(call) + else innercc = TSomeCall() + ) + } + + pragma[nomagic] + private predicate paramFlowsThroughInPartialPath( + ReturnKindExt kind, FlowState state, CallContextCall cc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, + TSummaryCtx3 sc3, PartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeFwd mid, RetNodeEx ret | + mid.getNodeEx() = ret and + kind = ret.getKind() and + state = mid.getState() and + cc = mid.getCallContext() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() and + ap = mid.getAp() + ) + } + + pragma[noinline] + private predicate partialPathThroughCallable0( + DataFlowCall call, PartialPathNodeFwd mid, ReturnKindExt kind, FlowState state, CallContext cc, + PartialAccessPath ap, Configuration config + ) { + exists(CallContext innercc, TSummaryCtx1 sc1, TSummaryCtx2 sc2, TSummaryCtx3 sc3 | + partialPathIntoCallable(mid, _, _, cc, innercc, sc1, sc2, sc3, call, _, config) and + paramFlowsThroughInPartialPath(kind, state, innercc, sc1, sc2, sc3, ap, config) + ) + } + + private predicate partialPathThroughCallable( + PartialPathNodeFwd mid, NodeEx out, FlowState state, CallContext cc, PartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ReturnKindExt kind | + partialPathThroughCallable0(call, mid, kind, state, cc, ap, config) and + out.asNode() = kind.getAnOutNode(call) + ) + } + + pragma[nomagic] + private predicate revPartialPathStep( + PartialPathNodeRev mid, NodeEx node, FlowState state, TRevSummaryCtx1 sc1, TRevSummaryCtx2 sc2, + TRevSummaryCtx3 sc3, RevPartialAccessPath ap, Configuration config + ) { + localFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalLocalFlowStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalLocalStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + jumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + or + additionalJumpStep(node, mid.getNodeEx(), config) and + state = mid.getState() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + additionalJumpStateStep(node, state, mid.getNodeEx(), mid.getState(), config) and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + mid.getAp() instanceof RevPartialAccessPathNil and + ap = TRevPartialNil() and + config = mid.getConfiguration() + or + revPartialPathReadStep(mid, _, _, node, ap) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + config = mid.getConfiguration() + or + exists(RevPartialAccessPath ap0, Content c | + revPartialPathStoreStep(mid, ap0, c, node, config) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + apConsRev(ap, c, ap0, config) + ) + or + exists(ParamNodeEx p | + mid.getNodeEx() = p and + viableParamArgEx(_, p, node) and + state = mid.getState() and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + sc1 = TRevSummaryCtx1None() and + sc2 = TRevSummaryCtx2None() and + sc3 = TRevSummaryCtx3None() and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + or + exists(ReturnPosition pos | + revPartialPathIntoReturn(mid, pos, state, sc1, sc2, sc3, _, ap, config) and + pos = getReturnPosition(node.asNode()) + ) + or + revPartialPathThroughCallable(mid, node, state, ap, config) and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() + } + + pragma[inline] + private predicate revPartialPathReadStep( + PartialPathNodeRev mid, RevPartialAccessPath ap1, Content c, NodeEx node, + RevPartialAccessPath ap2 + ) { + exists(NodeEx midNode | + midNode = mid.getNodeEx() and + ap1 = mid.getAp() and + read(node, c, midNode, mid.getConfiguration()) and + ap2.getHead() = c and + ap2.len() = unbindInt(ap1.len() + 1) + ) + } + + pragma[nomagic] + private predicate apConsRev( + RevPartialAccessPath ap1, Content c, RevPartialAccessPath ap2, Configuration config + ) { + exists(PartialPathNodeRev mid | + revPartialPathReadStep(mid, ap1, c, _, ap2) and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathStoreStep( + PartialPathNodeRev mid, RevPartialAccessPath ap, Content c, NodeEx node, Configuration config + ) { + exists(NodeEx midNode, TypedContent tc | + midNode = mid.getNodeEx() and + ap = mid.getAp() and + store(node, tc, midNode, _, config) and + ap.getHead() = c and + config = mid.getConfiguration() and + tc.getContent() = c + ) + } + + pragma[nomagic] + private predicate revPartialPathIntoReturn( + PartialPathNodeRev mid, ReturnPosition pos, FlowState state, TRevSummaryCtx1Some sc1, + TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3, DataFlowCall call, RevPartialAccessPath ap, + Configuration config + ) { + exists(NodeEx out | + mid.getNodeEx() = out and + mid.getState() = state and + viableReturnPosOutEx(call, pos, out) and + sc1 = TRevSummaryCtx1Some(pos) and + sc2 = TRevSummaryCtx2Some(state) and + sc3 = TRevSummaryCtx3Some(ap) and + ap = mid.getAp() and + config = mid.getConfiguration() + ) + } + + pragma[nomagic] + private predicate revPartialPathFlowsThrough( + ArgumentPosition apos, FlowState state, TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, + TRevSummaryCtx3Some sc3, RevPartialAccessPath ap, Configuration config + ) { + exists(PartialPathNodeRev mid, ParamNodeEx p, ParameterPosition ppos | + mid.getNodeEx() = p and + mid.getState() = state and + p.getPosition() = ppos and + sc1 = mid.getSummaryCtx1() and + sc2 = mid.getSummaryCtx2() and + sc3 = mid.getSummaryCtx3() and + ap = mid.getAp() and + config = mid.getConfiguration() and + parameterMatch(ppos, apos) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable0( + DataFlowCall call, PartialPathNodeRev mid, ArgumentPosition pos, FlowState state, + RevPartialAccessPath ap, Configuration config + ) { + exists(TRevSummaryCtx1Some sc1, TRevSummaryCtx2Some sc2, TRevSummaryCtx3Some sc3 | + revPartialPathIntoReturn(mid, _, _, sc1, sc2, sc3, call, _, config) and + revPartialPathFlowsThrough(pos, state, sc1, sc2, sc3, ap, config) + ) + } + + pragma[nomagic] + private predicate revPartialPathThroughCallable( + PartialPathNodeRev mid, ArgNodeEx node, FlowState state, RevPartialAccessPath ap, + Configuration config + ) { + exists(DataFlowCall call, ArgumentPosition pos | + revPartialPathThroughCallable0(call, mid, pos, state, ap, config) and + node.asNode().(ArgNode).argumentOf(call, pos) + ) + } +} + +import FlowExploration + +private predicate partialFlow( + PartialPathNode source, PartialPathNode node, Configuration configuration +) { + source.getConfiguration() = configuration and + source.isFwdSource() and + node = source.getASuccessor+() +} + +private predicate revPartialFlow( + PartialPathNode node, PartialPathNode sink, Configuration configuration +) { + sink.getConfiguration() = configuration and + sink.isRevSink() and + node.getASuccessor+() = sink +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll new file mode 100644 index 000000000000..95b34f15dad6 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplCommon.qll @@ -0,0 +1,1374 @@ +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +import Cached + +module DataFlowImplCommonPublic { + /** A state value to track during data flow. */ + class FlowState = string; + + /** + * The default state, which is used when the state is unspecified for a source + * or a sink. + */ + class FlowStateEmpty extends FlowState { + FlowStateEmpty() { this = "" } + } + + private newtype TFlowFeature = + TFeatureHasSourceCallContext() or + TFeatureHasSinkCallContext() or + TFeatureEqualSourceSinkCallContext() + + /** A flow configuration feature for use in `Configuration::getAFeature()`. */ + class FlowFeature extends TFlowFeature { + string toString() { none() } + } + + /** + * A flow configuration feature that implies that sources have some existing + * call context. + */ + class FeatureHasSourceCallContext extends FlowFeature, TFeatureHasSourceCallContext { + override string toString() { result = "FeatureHasSourceCallContext" } + } + + /** + * A flow configuration feature that implies that sinks have some existing + * call context. + */ + class FeatureHasSinkCallContext extends FlowFeature, TFeatureHasSinkCallContext { + override string toString() { result = "FeatureHasSinkCallContext" } + } + + /** + * A flow configuration feature that implies that source-sink pairs have some + * shared existing call context. + */ + class FeatureEqualSourceSinkCallContext extends FlowFeature, TFeatureEqualSourceSinkCallContext { + override string toString() { result = "FeatureEqualSourceSinkCallContext" } + } +} + +/** + * The cost limits for the `AccessPathFront` to `AccessPathApprox` expansion. + * + * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the + * estimated per-`AccessPathFront` tuple cost. Access paths exceeding both of + * these limits are represented with lower precision during pruning. + */ +predicate accessPathApproxCostLimits(int apLimit, int tupleLimit) { + apLimit = 10 and + tupleLimit = 10000 +} + +/** + * The cost limits for the `AccessPathApprox` to `AccessPath` expansion. + * + * `apLimit` bounds the acceptable fan-out, and `tupleLimit` bounds the + * estimated per-`AccessPathApprox` tuple cost. Access paths exceeding both of + * these limits are represented with lower precision. + */ +predicate accessPathCostLimits(int apLimit, int tupleLimit) { + apLimit = 5 and + tupleLimit = 1000 +} + +/** + * Holds if `arg` is an argument of `call` with an argument position that matches + * parameter position `ppos`. + */ +pragma[noinline] +predicate argumentPositionMatch(DataFlowCall call, ArgNode arg, ParameterPosition ppos) { + exists(ArgumentPosition apos | + arg.argumentOf(call, apos) and + parameterMatch(ppos, apos) + ) +} + +/** + * Provides a simple data-flow analysis for resolving lambda calls. The analysis + * currently excludes read-steps, store-steps, and flow-through. + * + * The analysis uses non-linear recursion: When computing a flow path in or out + * of a call, we use the results of the analysis recursively to resolve lambda + * calls. For this reason, we cannot reuse the code from `DataFlowImpl.qll` directly. + */ +private module LambdaFlow { + pragma[noinline] + private predicate viableParamNonLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) { + p.isParameterOf(viableCallable(call), ppos) + } + + pragma[noinline] + private predicate viableParamLambda(DataFlowCall call, ParameterPosition ppos, ParamNode p) { + p.isParameterOf(viableCallableLambda(call, _), ppos) + } + + private predicate viableParamArgNonLambda(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(ParameterPosition ppos | + viableParamNonLambda(call, ppos, p) and + argumentPositionMatch(call, arg, ppos) + ) + } + + private predicate viableParamArgLambda(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(ParameterPosition ppos | + viableParamLambda(call, ppos, p) and + argumentPositionMatch(call, arg, ppos) + ) + } + + private newtype TReturnPositionSimple = + TReturnPositionSimple0(DataFlowCallable c, ReturnKind kind) { + exists(ReturnNode ret | + c = getNodeEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + pragma[noinline] + private TReturnPositionSimple getReturnPositionSimple(ReturnNode ret, ReturnKind kind) { + result = TReturnPositionSimple0(getNodeEnclosingCallable(ret), kind) + } + + pragma[nomagic] + private TReturnPositionSimple viableReturnPosNonLambda(DataFlowCall call, ReturnKind kind) { + result = TReturnPositionSimple0(viableCallable(call), kind) + } + + pragma[nomagic] + private TReturnPositionSimple viableReturnPosLambda( + DataFlowCall call, DataFlowCallOption lastCall, ReturnKind kind + ) { + result = TReturnPositionSimple0(viableCallableLambda(call, lastCall), kind) + } + + private predicate viableReturnPosOutNonLambda( + DataFlowCall call, TReturnPositionSimple pos, OutNode out + ) { + exists(ReturnKind kind | + pos = viableReturnPosNonLambda(call, kind) and + out = getAnOutNode(call, kind) + ) + } + + private predicate viableReturnPosOutLambda( + DataFlowCall call, DataFlowCallOption lastCall, TReturnPositionSimple pos, OutNode out + ) { + exists(ReturnKind kind | + pos = viableReturnPosLambda(call, lastCall, kind) and + out = getAnOutNode(call, kind) + ) + } + + /** + * Holds if data can flow (inter-procedurally) from `node` (of type `t`) to + * the lambda call `lambdaCall`. + * + * The parameter `toReturn` indicates whether the path from `node` to + * `lambdaCall` goes through a return, and `toJump` whether the path goes + * through a jump step. + * + * The call context `lastCall` records the last call on the path from `node` + * to `lambdaCall`, if any. That is, `lastCall` is able to target the enclosing + * callable of `lambdaCall`. + */ + pragma[nomagic] + predicate revLambdaFlow( + DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn, + boolean toJump, DataFlowCallOption lastCall + ) { + revLambdaFlow0(lambdaCall, kind, node, t, toReturn, toJump, lastCall) and + if castNode(node) or node instanceof ArgNode or node instanceof ReturnNode + then compatibleTypes(t, getNodeDataFlowType(node)) + else any() + } + + pragma[nomagic] + predicate revLambdaFlow0( + DataFlowCall lambdaCall, LambdaCallKind kind, Node node, DataFlowType t, boolean toReturn, + boolean toJump, DataFlowCallOption lastCall + ) { + lambdaCall(lambdaCall, kind, node) and + t = getNodeDataFlowType(node) and + toReturn = false and + toJump = false and + lastCall = TDataFlowCallNone() + or + // local flow + exists(Node mid, DataFlowType t0 | + revLambdaFlow(lambdaCall, kind, mid, t0, toReturn, toJump, lastCall) + | + simpleLocalFlowStep(node, mid) and + t = t0 + or + exists(boolean preservesValue | + additionalLambdaFlowStep(node, mid, preservesValue) and + getNodeEnclosingCallable(node) = getNodeEnclosingCallable(mid) + | + preservesValue = false and + t = getNodeDataFlowType(node) + or + preservesValue = true and + t = t0 + ) + ) + or + // jump step + exists(Node mid, DataFlowType t0 | + revLambdaFlow(lambdaCall, kind, mid, t0, _, _, lastCall) and + toReturn = false and + toJump = true + | + jumpStepCached(node, mid) and + t = t0 + or + exists(boolean preservesValue | + additionalLambdaFlowStep(node, mid, preservesValue) and + getNodeEnclosingCallable(node) != getNodeEnclosingCallable(mid) + | + preservesValue = false and + t = getNodeDataFlowType(node) + or + preservesValue = true and + t = t0 + ) + ) + or + // flow into a callable + exists(ParamNode p, DataFlowCallOption lastCall0, DataFlowCall call | + revLambdaFlowIn(lambdaCall, kind, p, t, toJump, lastCall0) and + ( + if lastCall0 = TDataFlowCallNone() and toJump = false + then lastCall = TDataFlowCallSome(call) + else lastCall = lastCall0 + ) and + toReturn = false + | + viableParamArgNonLambda(call, p, node) + or + viableParamArgLambda(call, p, node) // non-linear recursion + ) + or + // flow out of a callable + exists(TReturnPositionSimple pos | + revLambdaFlowOut(lambdaCall, kind, pos, t, toJump, lastCall) and + getReturnPositionSimple(node, node.(ReturnNode).getKind()) = pos and + toReturn = true + ) + } + + pragma[nomagic] + predicate revLambdaFlowOutLambdaCall( + DataFlowCall lambdaCall, LambdaCallKind kind, OutNode out, DataFlowType t, boolean toJump, + DataFlowCall call, DataFlowCallOption lastCall + ) { + revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and + exists(ReturnKindExt rk | + out = rk.getAnOutNode(call) and + lambdaCall(call, _, _) + ) + } + + pragma[nomagic] + predicate revLambdaFlowOut( + DataFlowCall lambdaCall, LambdaCallKind kind, TReturnPositionSimple pos, DataFlowType t, + boolean toJump, DataFlowCallOption lastCall + ) { + exists(DataFlowCall call, OutNode out | + revLambdaFlow(lambdaCall, kind, out, t, _, toJump, lastCall) and + viableReturnPosOutNonLambda(call, pos, out) + or + // non-linear recursion + revLambdaFlowOutLambdaCall(lambdaCall, kind, out, t, toJump, call, lastCall) and + viableReturnPosOutLambda(call, _, pos, out) + ) + } + + pragma[nomagic] + predicate revLambdaFlowIn( + DataFlowCall lambdaCall, LambdaCallKind kind, ParamNode p, DataFlowType t, boolean toJump, + DataFlowCallOption lastCall + ) { + revLambdaFlow(lambdaCall, kind, p, t, false, toJump, lastCall) + } +} + +private DataFlowCallable viableCallableExt(DataFlowCall call) { + result = viableCallable(call) + or + result = viableCallableLambda(call, _) +} + +cached +private module Cached { + /** + * If needed, call this predicate from `DataFlowImplSpecific.qll` in order to + * force a stage-dependency on the `DataFlowImplCommon.qll` stage and thereby + * collapsing the two stages. + */ + cached + predicate forceCachingInSameStage() { any() } + + cached + predicate nodeEnclosingCallable(Node n, DataFlowCallable c) { c = nodeGetEnclosingCallable(n) } + + cached + predicate callEnclosingCallable(DataFlowCall call, DataFlowCallable c) { + c = call.getEnclosingCallable() + } + + cached + predicate nodeDataFlowType(Node n, DataFlowType t) { t = getNodeType(n) } + + cached + predicate jumpStepCached(Node node1, Node node2) { jumpStep(node1, node2) } + + cached + predicate clearsContentCached(Node n, ContentSet c) { clearsContent(n, c) } + + cached + predicate expectsContentCached(Node n, ContentSet c) { expectsContent(n, c) } + + cached + predicate isUnreachableInCallCached(Node n, DataFlowCall call) { isUnreachableInCall(n, call) } + + cached + predicate outNodeExt(Node n) { + n instanceof OutNode + or + n.(PostUpdateNode).getPreUpdateNode() instanceof ArgNode + } + + cached + predicate hiddenNode(Node n) { nodeIsHidden(n) } + + cached + OutNodeExt getAnOutNodeExt(DataFlowCall call, ReturnKindExt k) { + result = getAnOutNode(call, k.(ValueReturnKind).getKind()) + or + exists(ArgNode arg | + result.(PostUpdateNode).getPreUpdateNode() = arg and + arg.argumentOf(call, k.(ParamUpdateReturnKind).getAMatchingArgumentPosition()) + ) + } + + cached + predicate returnNodeExt(Node n, ReturnKindExt k) { + k = TValueReturn(n.(ReturnNode).getKind()) + or + exists(ParamNode p, ParameterPosition pos | + parameterValueFlowsToPreUpdate(p, n) and + p.isParameterOf(_, pos) and + k = TParamUpdate(pos) + ) + } + + cached + predicate castNode(Node n) { n instanceof CastNode } + + cached + predicate castingNode(Node n) { + castNode(n) or + n instanceof ParamNode or + n instanceof OutNodeExt or + // For reads, `x.f`, we want to check that the tracked type after the read (which + // is obtained by popping the head of the access path stack) is compatible with + // the type of `x.f`. + readSet(_, _, n) + } + + cached + predicate parameterNode(Node p, DataFlowCallable c, ParameterPosition pos) { + isParameterNode(p, c, pos) + } + + cached + predicate argumentNode(Node n, DataFlowCall call, ArgumentPosition pos) { + isArgumentNode(n, call, pos) + } + + /** + * Gets a viable target for the lambda call `call`. + * + * `lastCall` records the call required to reach `call` in order for the result + * to be a viable target, if any. + */ + cached + DataFlowCallable viableCallableLambda(DataFlowCall call, DataFlowCallOption lastCall) { + exists(Node creation, LambdaCallKind kind | + LambdaFlow::revLambdaFlow(call, kind, creation, _, _, _, lastCall) and + lambdaCreation(creation, kind, result) + ) + } + + /** + * Holds if `p` is the parameter of a viable dispatch target of `call`, + * and `p` has position `ppos`. + */ + pragma[nomagic] + private predicate viableParam(DataFlowCall call, ParameterPosition ppos, ParamNode p) { + p.isParameterOf(viableCallableExt(call), ppos) + } + + /** + * Holds if `arg` is a possible argument to `p` in `call`, taking virtual + * dispatch into account. + */ + cached + predicate viableParamArg(DataFlowCall call, ParamNode p, ArgNode arg) { + exists(ParameterPosition ppos | + viableParam(call, ppos, p) and + argumentPositionMatch(call, arg, ppos) and + compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(p)) + ) + } + + pragma[nomagic] + private ReturnPosition viableReturnPos(DataFlowCall call, ReturnKindExt kind) { + viableCallableExt(call) = result.getCallable() and + kind = result.getKind() + } + + /** + * Holds if a value at return position `pos` can be returned to `out` via `call`, + * taking virtual dispatch into account. + */ + cached + predicate viableReturnPosOut(DataFlowCall call, ReturnPosition pos, Node out) { + exists(ReturnKindExt kind | + pos = viableReturnPos(call, kind) and + out = kind.getAnOutNode(call) + ) + } + + /** Provides predicates for calculating flow-through summaries. */ + private module FlowThrough { + /** + * The first flow-through approximation: + * + * - Input access paths are abstracted with a Boolean parameter + * that indicates (non-)emptiness. + */ + private module Cand { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps. + * + * `read` indicates whether it is contents of `p` that can flow to `node`. + */ + pragma[nomagic] + private predicate parameterValueFlowCand(ParamNode p, Node node, boolean read) { + p = node and + read = false + or + // local flow + exists(Node mid | + parameterValueFlowCand(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlowCand(p, mid, false) and + readSet(mid, _, node) and + read = true + ) + or + // flow through: no prior read + exists(ArgNode arg | + parameterValueFlowArgCand(p, arg, false) and + argumentValueFlowsThroughCand(arg, node, read) + ) + or + // flow through: no read inside method + exists(ArgNode arg | + parameterValueFlowArgCand(p, arg, read) and + argumentValueFlowsThroughCand(arg, node, false) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArgCand(ParamNode p, ArgNode arg, boolean read) { + parameterValueFlowCand(p, arg, read) + } + + pragma[nomagic] + predicate parameterValueFlowsToPreUpdateCand(ParamNode p, PostUpdateNode n) { + parameterValueFlowCand(p, n.getPreUpdateNode(), false) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps, not taking call contexts + * into account. + * + * `read` indicates whether it is contents of `p` that can flow to the return + * node. + */ + predicate parameterValueFlowReturnCand(ParamNode p, ReturnKind kind, boolean read) { + exists(ReturnNode ret | + parameterValueFlowCand(p, ret, read) and + kind = ret.getKind() + ) + } + + pragma[nomagic] + private predicate argumentValueFlowsThroughCand0( + DataFlowCall call, ArgNode arg, ReturnKind kind, boolean read + ) { + exists(ParamNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturnCand(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only value-preserving steps, + * not taking call contexts into account. + * + * `read` indicates whether it is contents of `arg` that can flow to `out`. + */ + predicate argumentValueFlowsThroughCand(ArgNode arg, Node out, boolean read) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThroughCand0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + ) + } + + predicate cand(ParamNode p, Node n) { + parameterValueFlowCand(p, n, _) and + ( + parameterValueFlowReturnCand(p, _, _) + or + parameterValueFlowsToPreUpdateCand(p, _) + ) + } + } + + /** + * The final flow-through calculation: + * + * - Calculated flow is either value-preserving (`read = TReadStepTypesNone()`) + * or summarized as a single read step with before and after types recorded + * in the `ReadStepTypesOption` parameter. + * - Types are checked using the `compatibleTypes()` relation. + */ + private module Final { + /** + * Holds if `p` can flow to `node` in the same callable using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + predicate parameterValueFlow(ParamNode p, Node node, ReadStepTypesOption read) { + parameterValueFlow0(p, node, read) and + if node instanceof CastingNode + then + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeDataFlowType(p), getNodeDataFlowType(node)) + or + // getter + compatibleTypes(read.getContentType(), getNodeDataFlowType(node)) + else any() + } + + pragma[nomagic] + private predicate parameterValueFlow0(ParamNode p, Node node, ReadStepTypesOption read) { + p = node and + Cand::cand(p, _) and + read = TReadStepTypesNone() + or + // local flow + exists(Node mid | + parameterValueFlow(p, mid, read) and + simpleLocalFlowStep(mid, node) + ) + or + // read + exists(Node mid | + parameterValueFlow(p, mid, TReadStepTypesNone()) and + readStepWithTypes(mid, read.getContainerType(), read.getContent(), node, + read.getContentType()) and + Cand::parameterValueFlowReturnCand(p, _, true) and + compatibleTypes(getNodeDataFlowType(p), read.getContainerType()) + ) + or + parameterValueFlow0_0(TReadStepTypesNone(), p, node, read) + } + + pragma[nomagic] + private predicate parameterValueFlow0_0( + ReadStepTypesOption mustBeNone, ParamNode p, Node node, ReadStepTypesOption read + ) { + // flow through: no prior read + exists(ArgNode arg | + parameterValueFlowArg(p, arg, mustBeNone) and + argumentValueFlowsThrough(arg, read, node) + ) + or + // flow through: no read inside method + exists(ArgNode arg | + parameterValueFlowArg(p, arg, read) and + argumentValueFlowsThrough(arg, mustBeNone, node) + ) + } + + pragma[nomagic] + private predicate parameterValueFlowArg(ParamNode p, ArgNode arg, ReadStepTypesOption read) { + parameterValueFlow(p, arg, read) and + Cand::argumentValueFlowsThroughCand(arg, _, _) + } + + pragma[nomagic] + private predicate argumentValueFlowsThrough0( + DataFlowCall call, ArgNode arg, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ParamNode param | viableParamArg(call, param, arg) | + parameterValueFlowReturn(param, kind, read) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and possibly a single read step, not taking + * call contexts into account. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + pragma[nomagic] + predicate argumentValueFlowsThrough(ArgNode arg, ReadStepTypesOption read, Node out) { + exists(DataFlowCall call, ReturnKind kind | + argumentValueFlowsThrough0(call, arg, kind, read) and + out = getAnOutNode(call, kind) + | + // normal flow through + read = TReadStepTypesNone() and + compatibleTypes(getNodeDataFlowType(arg), getNodeDataFlowType(out)) + or + // getter + compatibleTypes(getNodeDataFlowType(arg), read.getContainerType()) and + compatibleTypes(read.getContentType(), getNodeDataFlowType(out)) + ) + } + + /** + * Holds if `arg` flows to `out` through a call using only + * value-preserving steps and a single read step, not taking call + * contexts into account, thus representing a getter-step. + * + * This predicate is exposed for testing only. + */ + predicate getterStep(ArgNode arg, ContentSet c, Node out) { + argumentValueFlowsThrough(arg, TReadStepTypesSome(_, c, _), out) + } + + /** + * Holds if `p` can flow to a return node of kind `kind` in the same + * callable using only value-preserving steps and possibly a single read + * step. + * + * If a read step was taken, then `read` captures the `Content`, the + * container type, and the content type. + */ + private predicate parameterValueFlowReturn( + ParamNode p, ReturnKind kind, ReadStepTypesOption read + ) { + exists(ReturnNode ret | + parameterValueFlow(p, ret, read) and + kind = ret.getKind() + ) + } + } + + import Final + } + + import FlowThrough + + cached + private module DispatchWithCallContext { + /** + * Holds if the set of viable implementations that can be called by `call` + * might be improved by knowing the call context. + */ + pragma[nomagic] + private predicate mayBenefitFromCallContextExt(DataFlowCall call, DataFlowCallable callable) { + mayBenefitFromCallContext(call, callable) + or + callEnclosingCallable(call, callable) and + exists(viableCallableLambda(call, TDataFlowCallSome(_))) + } + + /** + * Gets a viable dispatch target of `call` in the context `ctx`. This is + * restricted to those `call`s for which a context might make a difference. + */ + pragma[nomagic] + private DataFlowCallable viableImplInCallContextExt(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContext(call, ctx) + or + result = viableCallableLambda(call, TDataFlowCallSome(ctx)) + or + exists(DataFlowCallable enclosing | + mayBenefitFromCallContextExt(call, enclosing) and + enclosing = viableCallableExt(ctx) and + result = viableCallableLambda(call, TDataFlowCallNone()) + ) + } + + /** + * Holds if the call context `ctx` reduces the set of viable run-time + * dispatch targets of call `call` in `c`. + */ + cached + predicate reducedViableImplInCallContext(DataFlowCall call, DataFlowCallable c, DataFlowCall ctx) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContextExt(call, c) and + c = viableCallableExt(ctx) and + ctxtgts = count(viableImplInCallContextExt(call, ctx)) and + tgts = strictcount(viableCallableExt(call)) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls for which a context + * makes a difference. + */ + cached + DataFlowCallable prunedViableImplInCallContext(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContextExt(call, ctx) and + reducedViableImplInCallContext(call, _, ctx) + } + + /** + * Holds if flow returning from callable `c` to call `call` might return + * further and if this path restricts the set of call sites that can be + * returned to. + */ + cached + predicate reducedViableImplInReturn(DataFlowCallable c, DataFlowCall call) { + exists(int tgts, int ctxtgts | + mayBenefitFromCallContextExt(call, _) and + c = viableCallableExt(call) and + ctxtgts = count(DataFlowCall ctx | c = viableImplInCallContextExt(call, ctx)) and + tgts = strictcount(DataFlowCall ctx | callEnclosingCallable(call, viableCallableExt(ctx))) and + ctxtgts < tgts + ) + } + + /** + * Gets a viable run-time dispatch target for the call `call` in the + * context `ctx`. This is restricted to those calls and results for which + * the return flow from the result to `call` restricts the possible context + * `ctx`. + */ + cached + DataFlowCallable prunedViableImplInCallContextReverse(DataFlowCall call, DataFlowCall ctx) { + result = viableImplInCallContextExt(call, ctx) and + reducedViableImplInReturn(result, call) + } + } + + import DispatchWithCallContext + + /** + * Holds if `p` can flow to the pre-update node associated with post-update + * node `n`, in the same callable, using only value-preserving steps. + */ + private predicate parameterValueFlowsToPreUpdate(ParamNode p, PostUpdateNode n) { + parameterValueFlow(p, n.getPreUpdateNode(), TReadStepTypesNone()) + } + + cached + predicate readSet(Node node1, ContentSet c, Node node2) { readStep(node1, c, node2) } + + cached + predicate storeSet( + Node node1, ContentSet c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + storeStep(node1, c, node2) and + contentType = getNodeDataFlowType(node1) and + containerType = getNodeDataFlowType(node2) + or + exists(Node n1, Node n2 | + n1 = node1.(PostUpdateNode).getPreUpdateNode() and + n2 = node2.(PostUpdateNode).getPreUpdateNode() + | + argumentValueFlowsThrough(n2, TReadStepTypesSome(containerType, c, contentType), n1) + or + readSet(n2, c, n1) and + contentType = getNodeDataFlowType(n1) and + containerType = getNodeDataFlowType(n2) + ) + } + + private predicate store( + Node node1, Content c, Node node2, DataFlowType contentType, DataFlowType containerType + ) { + exists(ContentSet cs | + c = cs.getAStoreContent() and storeSet(node1, cs, node2, contentType, containerType) + ) + } + + /** + * Holds if data can flow from `node1` to `node2` via a direct assignment to + * `f`. + * + * This includes reverse steps through reads when the result of the read has + * been stored into, in order to handle cases like `x.f1.f2 = y`. + */ + cached + predicate store(Node node1, TypedContent tc, Node node2, DataFlowType contentType) { + store(node1, tc.getContent(), node2, contentType, tc.getContainerType()) + } + + /** + * Holds if data can flow from `fromNode` to `toNode` because they are the post-update + * nodes of some function output and input respectively, where the output and input + * are aliases. A typical example is a function returning `this`, implementing a fluent + * interface. + */ + private predicate reverseStepThroughInputOutputAlias( + PostUpdateNode fromNode, PostUpdateNode toNode + ) { + exists(Node fromPre, Node toPre | + fromPre = fromNode.getPreUpdateNode() and + toPre = toNode.getPreUpdateNode() + | + exists(DataFlowCall c | + // Does the language-specific simpleLocalFlowStep already model flow + // from function input to output? + fromPre = getAnOutNode(c, _) and + toPre.(ArgNode).argumentOf(c, _) and + simpleLocalFlowStep(toPre.(ArgNode), fromPre) + ) + or + argumentValueFlowsThrough(toPre, TReadStepTypesNone(), fromPre) + ) + } + + cached + predicate simpleLocalFlowStepExt(Node node1, Node node2) { + simpleLocalFlowStep(node1, node2) or + reverseStepThroughInputOutputAlias(node1, node2) + } + + /** + * Holds if the call context `call` improves virtual dispatch in `callable`. + */ + cached + predicate recordDataFlowCallSiteDispatch(DataFlowCall call, DataFlowCallable callable) { + reducedViableImplInCallContext(_, callable, call) + } + + /** + * Holds if the call context `call` allows us to prune unreachable nodes in `callable`. + */ + cached + predicate recordDataFlowCallSiteUnreachable(DataFlowCall call, DataFlowCallable callable) { + exists(Node n | getNodeEnclosingCallable(n) = callable | isUnreachableInCallCached(n, call)) + } + + cached + predicate allowParameterReturnInSelfCached(ParamNode p) { allowParameterReturnInSelf(p) } + + cached + newtype TCallContext = + TAnyCallContext() or + TSpecificCall(DataFlowCall call) { recordDataFlowCallSite(call, _) } or + TSomeCall() or + TReturn(DataFlowCallable c, DataFlowCall call) { reducedViableImplInReturn(c, call) } + + cached + newtype TReturnPosition = + TReturnPosition0(DataFlowCallable c, ReturnKindExt kind) { + exists(ReturnNodeExt ret | + c = returnNodeGetEnclosingCallable(ret) and + kind = ret.getKind() + ) + } + + cached + newtype TLocalFlowCallContext = + TAnyLocalCall() or + TSpecificLocalCall(DataFlowCall call) { isUnreachableInCallCached(_, call) } + + cached + newtype TReturnKindExt = + TValueReturn(ReturnKind kind) or + TParamUpdate(ParameterPosition pos) { exists(ParamNode p | p.isParameterOf(_, pos)) } + + cached + newtype TBooleanOption = + TBooleanNone() or + TBooleanSome(boolean b) { b = true or b = false } + + cached + newtype TDataFlowCallOption = + TDataFlowCallNone() or + TDataFlowCallSome(DataFlowCall call) + + cached + newtype TTypedContent = MkTypedContent(Content c, DataFlowType t) { store(_, c, _, _, t) } + + cached + newtype TAccessPathFront = + TFrontNil(DataFlowType t) or + TFrontHead(TypedContent tc) + + cached + newtype TAccessPathFrontOption = + TAccessPathFrontNone() or + TAccessPathFrontSome(AccessPathFront apf) +} + +/** + * Holds if the call context `call` either improves virtual dispatch in + * `callable` or if it allows us to prune unreachable nodes in `callable`. + */ +predicate recordDataFlowCallSite(DataFlowCall call, DataFlowCallable callable) { + recordDataFlowCallSiteDispatch(call, callable) or + recordDataFlowCallSiteUnreachable(call, callable) +} + +/** + * A `Node` at which a cast can occur such that the type should be checked. + */ +class CastingNode extends Node { + CastingNode() { castingNode(this) } +} + +private predicate readStepWithTypes( + Node n1, DataFlowType container, ContentSet c, Node n2, DataFlowType content +) { + readSet(n1, c, n2) and + container = getNodeDataFlowType(n1) and + content = getNodeDataFlowType(n2) +} + +private newtype TReadStepTypesOption = + TReadStepTypesNone() or + TReadStepTypesSome(DataFlowType container, ContentSet c, DataFlowType content) { + readStepWithTypes(_, container, c, _, content) + } + +private class ReadStepTypesOption extends TReadStepTypesOption { + predicate isSome() { this instanceof TReadStepTypesSome } + + DataFlowType getContainerType() { this = TReadStepTypesSome(result, _, _) } + + ContentSet getContent() { this = TReadStepTypesSome(_, result, _) } + + DataFlowType getContentType() { this = TReadStepTypesSome(_, _, result) } + + string toString() { if this.isSome() then result = "Some(..)" else result = "None()" } +} + +/** + * A call context to restrict the targets of virtual dispatch, prune local flow, + * and match the call sites of flow into a method with flow out of a method. + * + * There are four cases: + * - `TAnyCallContext()` : No restrictions on method flow. + * - `TSpecificCall(DataFlowCall call)` : Flow entered through the + * given `call`. This call improves the set of viable + * dispatch targets for at least one method call in the current callable + * or helps prune unreachable nodes in the current callable. + * - `TSomeCall()` : Flow entered through a parameter. The + * originating call does not improve the set of dispatch targets for any + * method call in the current callable and was therefore not recorded. + * - `TReturn(Callable c, DataFlowCall call)` : Flow reached `call` from `c` and + * this dispatch target of `call` implies a reduced set of dispatch origins + * to which data may flow if it should reach a `return` statement. + */ +abstract class CallContext extends TCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +abstract class CallContextNoCall extends CallContext { } + +class CallContextAny extends CallContextNoCall, TAnyCallContext { + override string toString() { result = "CcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +abstract class CallContextCall extends CallContext { + /** Holds if this call context may be `call`. */ + bindingset[call] + abstract predicate matchesCall(DataFlowCall call); +} + +class CallContextSpecificCall extends CallContextCall, TSpecificCall { + override string toString() { + exists(DataFlowCall call | this = TSpecificCall(call) | result = "CcCall(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + recordDataFlowCallSite(this.getCall(), callable) + } + + override predicate matchesCall(DataFlowCall call) { call = this.getCall() } + + DataFlowCall getCall() { this = TSpecificCall(result) } +} + +class CallContextSomeCall extends CallContextCall, TSomeCall { + override string toString() { result = "CcSomeCall" } + + override predicate relevantFor(DataFlowCallable callable) { + exists(ParamNode p | getNodeEnclosingCallable(p) = callable) + } + + override predicate matchesCall(DataFlowCall call) { any() } +} + +class CallContextReturn extends CallContextNoCall, TReturn { + override string toString() { + exists(DataFlowCall call | this = TReturn(_, call) | result = "CcReturn(" + call + ")") + } + + override predicate relevantFor(DataFlowCallable callable) { + exists(DataFlowCall call | this = TReturn(_, call) and callEnclosingCallable(call, callable)) + } +} + +/** + * A call context that is relevant for pruning local flow. + */ +abstract class LocalCallContext extends TLocalFlowCallContext { + abstract string toString(); + + /** Holds if this call context is relevant for `callable`. */ + abstract predicate relevantFor(DataFlowCallable callable); +} + +class LocalCallContextAny extends LocalCallContext, TAnyLocalCall { + override string toString() { result = "LocalCcAny" } + + override predicate relevantFor(DataFlowCallable callable) { any() } +} + +class LocalCallContextSpecificCall extends LocalCallContext, TSpecificLocalCall { + LocalCallContextSpecificCall() { this = TSpecificLocalCall(call) } + + DataFlowCall call; + + DataFlowCall getCall() { result = call } + + override string toString() { result = "LocalCcCall(" + call + ")" } + + override predicate relevantFor(DataFlowCallable callable) { relevantLocalCCtx(call, callable) } +} + +private predicate relevantLocalCCtx(DataFlowCall call, DataFlowCallable callable) { + exists(Node n | getNodeEnclosingCallable(n) = callable and isUnreachableInCallCached(n, call)) +} + +/** + * Gets the local call context given the call context and the callable that + * the contexts apply to. + */ +LocalCallContext getLocalCallContext(CallContext ctx, DataFlowCallable callable) { + ctx.relevantFor(callable) and + if relevantLocalCCtx(ctx.(CallContextSpecificCall).getCall(), callable) + then result.(LocalCallContextSpecificCall).getCall() = ctx.(CallContextSpecificCall).getCall() + else result instanceof LocalCallContextAny +} + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. + */ +class ParamNode extends Node { + ParamNode() { parameterNode(this, _, _) } + + /** + * Holds if this node is the parameter of callable `c` at the specified + * position. + */ + predicate isParameterOf(DataFlowCallable c, ParameterPosition pos) { parameterNode(this, c, pos) } +} + +/** A data-flow node that represents a call argument. */ +class ArgNode extends Node { + ArgNode() { argumentNode(this, _, _) } + + /** Holds if this argument occurs at the given position in the given call. */ + final predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { + argumentNode(this, call, pos) + } +} + +/** + * A node from which flow can return to the caller. This is either a regular + * `ReturnNode` or a `PostUpdateNode` corresponding to the value of a parameter. + */ +class ReturnNodeExt extends Node { + ReturnNodeExt() { returnNodeExt(this, _) } + + /** Gets the kind of this returned value. */ + ReturnKindExt getKind() { returnNodeExt(this, result) } +} + +/** + * A node to which data can flow from a call. Either an ordinary out node + * or a post-update node associated with a call argument. + */ +class OutNodeExt extends Node { + OutNodeExt() { outNodeExt(this) } +} + +/** + * An extended return kind. A return kind describes how data can be returned + * from a callable. This can either be through a returned value or an updated + * parameter. + */ +abstract class ReturnKindExt extends TReturnKindExt { + /** Gets a textual representation of this return kind. */ + abstract string toString(); + + /** Gets a node corresponding to data flow out of `call`. */ + final OutNodeExt getAnOutNode(DataFlowCall call) { result = getAnOutNodeExt(call, this) } +} + +class ValueReturnKind extends ReturnKindExt, TValueReturn { + private ReturnKind kind; + + ValueReturnKind() { this = TValueReturn(kind) } + + ReturnKind getKind() { result = kind } + + override string toString() { result = kind.toString() } +} + +class ParamUpdateReturnKind extends ReturnKindExt, TParamUpdate { + private ParameterPosition pos; + + ParamUpdateReturnKind() { this = TParamUpdate(pos) } + + ParameterPosition getPosition() { result = pos } + + pragma[nomagic] + ArgumentPosition getAMatchingArgumentPosition() { parameterMatch(pos, result) } + + override string toString() { result = "param update " + pos } +} + +/** A callable tagged with a relevant return kind. */ +class ReturnPosition extends TReturnPosition0 { + private DataFlowCallable c; + private ReturnKindExt kind; + + ReturnPosition() { this = TReturnPosition0(c, kind) } + + /** Gets the callable. */ + DataFlowCallable getCallable() { result = c } + + /** Gets the return kind. */ + ReturnKindExt getKind() { result = kind } + + /** Gets a textual representation of this return position. */ + string toString() { result = "[" + kind + "] " + c } +} + +/** + * Gets the enclosing callable of `n`. Unlike `n.getEnclosingCallable()`, this + * predicate ensures that joins go from `n` to the result instead of the other + * way around. + */ +pragma[inline] +DataFlowCallable getNodeEnclosingCallable(Node n) { + nodeEnclosingCallable(pragma[only_bind_out](n), pragma[only_bind_into](result)) +} + +/** Gets the type of `n` used for type pruning. */ +pragma[inline] +DataFlowType getNodeDataFlowType(Node n) { + nodeDataFlowType(pragma[only_bind_out](n), pragma[only_bind_into](result)) +} + +pragma[noinline] +private DataFlowCallable returnNodeGetEnclosingCallable(ReturnNodeExt ret) { + result = getNodeEnclosingCallable(ret) +} + +pragma[noinline] +private ReturnPosition getReturnPosition0(ReturnNodeExt ret, ReturnKindExt kind) { + result.getCallable() = returnNodeGetEnclosingCallable(ret) and + kind = result.getKind() +} + +pragma[noinline] +ReturnPosition getReturnPosition(ReturnNodeExt ret) { + result = getReturnPosition0(ret, ret.getKind()) +} + +/** + * Checks whether `inner` can return to `call` in the call context `innercc`. + * Assumes a context of `inner = viableCallableExt(call)`. + */ +bindingset[innercc, inner, call] +predicate checkCallContextReturn(CallContext innercc, DataFlowCallable inner, DataFlowCall call) { + innercc instanceof CallContextAny + or + exists(DataFlowCallable c0, DataFlowCall call0 | + callEnclosingCallable(call0, inner) and + innercc = TReturn(c0, call0) and + c0 = prunedViableImplInCallContextReverse(call0, call) + ) +} + +/** + * Checks whether `call` can resolve to `calltarget` in the call context `cc`. + * Assumes a context of `calltarget = viableCallableExt(call)`. + */ +bindingset[cc, call, calltarget] +predicate checkCallContextCall(CallContext cc, DataFlowCall call, DataFlowCallable calltarget) { + exists(DataFlowCall ctx | cc = TSpecificCall(ctx) | + if reducedViableImplInCallContext(call, _, ctx) + then calltarget = prunedViableImplInCallContext(call, ctx) + else any() + ) + or + cc instanceof CallContextSomeCall + or + cc instanceof CallContextAny + or + cc instanceof CallContextReturn +} + +/** + * Resolves a return from `callable` in `cc` to `call`. This is equivalent to + * `callable = viableCallableExt(call) and checkCallContextReturn(cc, callable, call)`. + */ +bindingset[cc, callable] +predicate resolveReturn(CallContext cc, DataFlowCallable callable, DataFlowCall call) { + cc instanceof CallContextAny and callable = viableCallableExt(call) + or + exists(DataFlowCallable c0, DataFlowCall call0 | + callEnclosingCallable(call0, callable) and + cc = TReturn(c0, call0) and + c0 = prunedViableImplInCallContextReverse(call0, call) + ) +} + +/** + * Resolves a call from `call` in `cc` to `result`. This is equivalent to + * `result = viableCallableExt(call) and checkCallContextCall(cc, call, result)`. + */ +bindingset[call, cc] +DataFlowCallable resolveCall(DataFlowCall call, CallContext cc) { + exists(DataFlowCall ctx | cc = TSpecificCall(ctx) | + if reducedViableImplInCallContext(call, _, ctx) + then result = prunedViableImplInCallContext(call, ctx) + else result = viableCallableExt(call) + ) + or + result = viableCallableExt(call) and cc instanceof CallContextSomeCall + or + result = viableCallableExt(call) and cc instanceof CallContextAny + or + result = viableCallableExt(call) and cc instanceof CallContextReturn +} + +/** An optional Boolean value. */ +class BooleanOption extends TBooleanOption { + string toString() { + this = TBooleanNone() and result = "" + or + this = TBooleanSome(any(boolean b | result = b.toString())) + } +} + +/** An optional `DataFlowCall`. */ +class DataFlowCallOption extends TDataFlowCallOption { + string toString() { + this = TDataFlowCallNone() and + result = "(none)" + or + exists(DataFlowCall call | + this = TDataFlowCallSome(call) and + result = call.toString() + ) + } +} + +/** A `Content` tagged with the type of a containing object. */ +class TypedContent extends MkTypedContent { + private Content c; + private DataFlowType t; + + TypedContent() { this = MkTypedContent(c, t) } + + /** Gets the content. */ + Content getContent() { result = c } + + /** Gets the container type. */ + DataFlowType getContainerType() { result = t } + + /** Gets a textual representation of this content. */ + string toString() { result = c.toString() } + + /** + * Holds if access paths with this `TypedContent` at their head always should + * be tracked at high precision. This disables adaptive access path precision + * for such access paths. + */ + predicate forceHighPrecision() { forceHighPrecision(c) } +} + +/** + * The front of an access path. This is either a head or a nil. + */ +abstract class AccessPathFront extends TAccessPathFront { + abstract string toString(); + + abstract DataFlowType getType(); + + abstract boolean toBoolNonEmpty(); + + TypedContent getHead() { this = TFrontHead(result) } +} + +class AccessPathFrontNil extends AccessPathFront, TFrontNil { + private DataFlowType t; + + AccessPathFrontNil() { this = TFrontNil(t) } + + override string toString() { result = ppReprType(t) } + + override DataFlowType getType() { result = t } + + override boolean toBoolNonEmpty() { result = false } +} + +class AccessPathFrontHead extends AccessPathFront, TFrontHead { + private TypedContent tc; + + AccessPathFrontHead() { this = TFrontHead(tc) } + + override string toString() { result = tc.toString() } + + override DataFlowType getType() { result = tc.getContainerType() } + + override boolean toBoolNonEmpty() { result = true } +} + +/** An optional access path front. */ +class AccessPathFrontOption extends TAccessPathFrontOption { + string toString() { + this = TAccessPathFrontNone() and result = "" + or + this = TAccessPathFrontSome(any(AccessPathFront apf | result = apf.toString())) + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll new file mode 100644 index 000000000000..d6c1bad2744d --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplConsistency.qll @@ -0,0 +1,212 @@ +/** + * Provides consistency queries for checking invariants in the language-specific + * data-flow classes and predicates. + */ + +private import DataFlowImplSpecific::Private +private import DataFlowImplSpecific::Public +private import tainttracking1.TaintTrackingParameter::Private +private import tainttracking1.TaintTrackingParameter::Public + +module Consistency { + private newtype TConsistencyConfiguration = MkConsistencyConfiguration() + + /** A class for configuring the consistency queries. */ + class ConsistencyConfiguration extends TConsistencyConfiguration { + string toString() { none() } + + /** Holds if `n` should be excluded from the consistency test `uniqueEnclosingCallable`. */ + predicate uniqueEnclosingCallableExclude(Node n) { none() } + + /** Holds if `n` should be excluded from the consistency test `uniqueNodeLocation`. */ + predicate uniqueNodeLocationExclude(Node n) { none() } + + /** Holds if `n` should be excluded from the consistency test `missingLocation`. */ + predicate missingLocationExclude(Node n) { none() } + + /** Holds if `n` should be excluded from the consistency test `postWithInFlow`. */ + predicate postWithInFlowExclude(Node n) { none() } + + /** Holds if `n` should be excluded from the consistency test `argHasPostUpdate`. */ + predicate argHasPostUpdateExclude(ArgumentNode n) { none() } + + /** Holds if `n` should be excluded from the consistency test `reverseRead`. */ + predicate reverseReadExclude(Node n) { none() } + } + + private class RelevantNode extends Node { + RelevantNode() { + this instanceof ArgumentNode or + this instanceof ParameterNode or + this instanceof ReturnNode or + this = getAnOutNode(_, _) or + simpleLocalFlowStep(this, _) or + simpleLocalFlowStep(_, this) or + jumpStep(this, _) or + jumpStep(_, this) or + storeStep(this, _, _) or + storeStep(_, _, this) or + readStep(this, _, _) or + readStep(_, _, this) or + defaultAdditionalTaintStep(this, _) or + defaultAdditionalTaintStep(_, this) + } + } + + query predicate uniqueEnclosingCallable(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(nodeGetEnclosingCallable(n)) and + c != 1 and + not any(ConsistencyConfiguration conf).uniqueEnclosingCallableExclude(n) and + msg = "Node should have one enclosing callable but has " + c + "." + ) + } + + query predicate uniqueType(Node n, string msg) { + exists(int c | + n instanceof RelevantNode and + c = count(getNodeType(n)) and + c != 1 and + msg = "Node should have one type but has " + c + "." + ) + } + + query predicate uniqueNodeLocation(Node n, string msg) { + exists(int c | + c = + count(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) and + c != 1 and + not any(ConsistencyConfiguration conf).uniqueNodeLocationExclude(n) and + msg = "Node should have one location but has " + c + "." + ) + } + + query predicate missingLocation(string msg) { + exists(int c | + c = + strictcount(Node n | + not exists(string filepath, int startline, int startcolumn, int endline, int endcolumn | + n.hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + ) and + not any(ConsistencyConfiguration conf).missingLocationExclude(n) + ) and + msg = "Nodes without location: " + c + ) + } + + query predicate uniqueNodeToString(Node n, string msg) { + exists(int c | + c = count(n.toString()) and + c != 1 and + msg = "Node should have one toString but has " + c + "." + ) + } + + query predicate missingToString(string msg) { + exists(int c | + c = strictcount(Node n | not exists(n.toString())) and + msg = "Nodes without toString: " + c + ) + } + + query predicate parameterCallable(ParameterNode p, string msg) { + exists(DataFlowCallable c | isParameterNode(p, c, _) and c != nodeGetEnclosingCallable(p)) and + msg = "Callable mismatch for parameter." + } + + query predicate localFlowIsLocal(Node n1, Node n2, string msg) { + simpleLocalFlowStep(n1, n2) and + nodeGetEnclosingCallable(n1) != nodeGetEnclosingCallable(n2) and + msg = "Local flow step does not preserve enclosing callable." + } + + private DataFlowType typeRepr() { result = getNodeType(_) } + + query predicate compatibleTypesReflexive(DataFlowType t, string msg) { + t = typeRepr() and + not compatibleTypes(t, t) and + msg = "Type compatibility predicate is not reflexive." + } + + query predicate unreachableNodeCCtx(Node n, DataFlowCall call, string msg) { + isUnreachableInCall(n, call) and + exists(DataFlowCallable c | + c = nodeGetEnclosingCallable(n) and + not viableCallable(call) = c + ) and + msg = "Call context for isUnreachableInCall is inconsistent with call graph." + } + + query predicate localCallNodes(DataFlowCall call, Node n, string msg) { + ( + n = getAnOutNode(call, _) and + msg = "OutNode and call does not share enclosing callable." + or + n.(ArgumentNode).argumentOf(call, _) and + msg = "ArgumentNode and call does not share enclosing callable." + ) and + nodeGetEnclosingCallable(n) != call.getEnclosingCallable() + } + + // This predicate helps the compiler forget that in some languages + // it is impossible for a result of `getPreUpdateNode` to be an + // instance of `PostUpdateNode`. + private Node getPre(PostUpdateNode n) { + result = n.getPreUpdateNode() + or + none() + } + + query predicate postIsNotPre(PostUpdateNode n, string msg) { + getPre(n) = n and + msg = "PostUpdateNode should not equal its pre-update node." + } + + query predicate postHasUniquePre(PostUpdateNode n, string msg) { + exists(int c | + c = count(n.getPreUpdateNode()) and + c != 1 and + msg = "PostUpdateNode should have one pre-update node but has " + c + "." + ) + } + + query predicate uniquePostUpdate(Node n, string msg) { + 1 < strictcount(PostUpdateNode post | post.getPreUpdateNode() = n) and + msg = "Node has multiple PostUpdateNodes." + } + + query predicate postIsInSameCallable(PostUpdateNode n, string msg) { + nodeGetEnclosingCallable(n) != nodeGetEnclosingCallable(n.getPreUpdateNode()) and + msg = "PostUpdateNode does not share callable with its pre-update node." + } + + private predicate hasPost(Node n) { exists(PostUpdateNode post | post.getPreUpdateNode() = n) } + + query predicate reverseRead(Node n, string msg) { + exists(Node n2 | readStep(n, _, n2) and hasPost(n2) and not hasPost(n)) and + not any(ConsistencyConfiguration conf).reverseReadExclude(n) and + msg = "Origin of readStep is missing a PostUpdateNode." + } + + query predicate argHasPostUpdate(ArgumentNode n, string msg) { + not hasPost(n) and + not any(ConsistencyConfiguration c).argHasPostUpdateExclude(n) and + msg = "ArgumentNode is missing PostUpdateNode." + } + + // This predicate helps the compiler forget that in some languages + // it is impossible for a `PostUpdateNode` to be the target of + // `simpleLocalFlowStep`. + private predicate isPostUpdateNode(Node n) { n instanceof PostUpdateNode or none() } + + query predicate postWithInFlow(Node n, string msg) { + isPostUpdateNode(n) and + not clearsContent(n, _) and + simpleLocalFlowStep(_, n) and + not any(ConsistencyConfiguration c).postWithInFlowExclude(n) and + msg = "PostUpdateNode should not be the target of local flow." + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll new file mode 100644 index 000000000000..4ea383b20a11 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowImplSpecific.qll @@ -0,0 +1,11 @@ +/** + * Provides IR-specific definitions for use in the data flow library. + */ +module Private { + import DataFlowPrivate + import DataFlowDispatch +} + +module Public { + import DataFlowUtil +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll new file mode 100644 index 000000000000..d6b2d455dd26 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowPrivate.qll @@ -0,0 +1,560 @@ +private import cpp as Cpp +private import DataFlowUtil +private import semmle.code.cpp.ir.IR +private import DataFlowDispatch +private import DataFlowImplConsistency +private import semmle.code.cpp.ir.internal.IRCppLanguage +private import SsaInternals as Ssa + +/** Gets the callable in which this node occurs. */ +DataFlowCallable nodeGetEnclosingCallable(Node n) { result = n.getEnclosingCallable() } + +/** Holds if `p` is a `ParameterNode` of `c` with position `pos`. */ +predicate isParameterNode(ParameterNode p, DataFlowCallable c, ParameterPosition pos) { + p.isParameterOf(c, pos) +} + +/** Holds if `arg` is an `ArgumentNode` of `c` with position `pos`. */ +predicate isArgumentNode(ArgumentNode arg, DataFlowCall c, ArgumentPosition pos) { + arg.argumentOf(c, pos) +} + +/** + * A data flow node that occurs as the argument of a call and is passed as-is + * to the callable. Instance arguments (`this` pointer) and read side effects + * on parameters are also included. + */ +abstract class ArgumentNode extends Node { + /** + * Holds if this argument occurs at the given position in the given call. + * The instance argument is considered to have index `-1`. + */ + abstract predicate argumentOf(DataFlowCall call, ArgumentPosition pos); + + /** Gets the call in which this node is an argument. */ + DataFlowCall getCall() { this.argumentOf(result, _) } +} + +/** + * A data flow node that occurs as the argument to a call, or an + * implicit `this` pointer argument. + */ +private class PrimaryArgumentNode extends ArgumentNode, OperandNode { + override ArgumentOperand op; + + PrimaryArgumentNode() { exists(CallInstruction call | op = call.getAnArgumentOperand()) } + + override predicate argumentOf(DataFlowCall call, ArgumentPosition pos) { + op = call.getArgumentOperand(pos.(DirectPosition).getIndex()) + } + + override string toStringImpl() { result = argumentOperandToString(op) } +} + +private string argumentOperandToString(ArgumentOperand op) { + exists(Expr unconverted | + unconverted = op.getDef().getUnconvertedResultExpression() and + result = unconverted.toString() + ) + or + // Certain instructions don't map to an unconverted result expression. For these cases + // we fall back to a simpler naming scheme. This can happen in IR-generated constructors. + not exists(op.getDef().getUnconvertedResultExpression()) and + ( + result = "Argument " + op.(PositionalArgumentOperand).getIndex() + or + op instanceof ThisArgumentOperand and result = "Argument this" + ) +} + +private class SideEffectArgumentNode extends ArgumentNode, SideEffectOperandNode { + override predicate argumentOf(DataFlowCall dfCall, ArgumentPosition pos) { + this.getCallInstruction() = dfCall and + pos.(IndirectionPosition).getArgumentIndex() = this.getArgumentIndex() and + pos.(IndirectionPosition).getIndirectionIndex() = super.getIndirectionIndex() + } + + override string toStringImpl() { + result = argumentOperandToString(this.getAddressOperand()) + " indirection" + } +} + +/** A parameter position represented by an integer. */ +class ParameterPosition = Position; + +/** An argument position represented by an integer. */ +class ArgumentPosition = Position; + +class Position extends TPosition { + abstract string toString(); +} + +class DirectPosition extends Position, TDirectPosition { + int index; + + DirectPosition() { this = TDirectPosition(index) } + + override string toString() { if index = -1 then result = "this" else result = index.toString() } + + int getIndex() { result = index } +} + +class IndirectionPosition extends Position, TIndirectionPosition { + int argumentIndex; + int indirectionIndex; + + IndirectionPosition() { this = TIndirectionPosition(argumentIndex, indirectionIndex) } + + override string toString() { + if argumentIndex = -1 + then if indirectionIndex > 0 then result = "this indirection" else result = "this" + else + if indirectionIndex > 0 + then result = argumentIndex.toString() + " indirection" + else result = argumentIndex.toString() + } + + int getArgumentIndex() { result = argumentIndex } + + int getIndirectionIndex() { result = indirectionIndex } +} + +newtype TPosition = + TDirectPosition(int index) { exists(any(CallInstruction c).getArgument(index)) } or + TIndirectionPosition(int argumentIndex, int indirectionIndex) { + hasOperandAndIndex(_, any(CallInstruction call).getArgumentOperand(argumentIndex), + indirectionIndex) + } + +private newtype TReturnKind = + TNormalReturnKind(int index) { + exists(IndirectReturnNode return | + return.getAddressOperand() = any(ReturnValueInstruction r).getReturnAddressOperand() and + index = return.getIndirectionIndex() - 1 // We subtract one because the return loads the value. + ) + } or + TIndirectReturnKind(int argumentIndex, int indirectionIndex) { + exists(IndirectReturnNode return, ReturnIndirectionInstruction returnInd | + returnInd.hasIndex(argumentIndex) and + return.getAddressOperand() = returnInd.getSourceAddressOperand() and + indirectionIndex = return.getIndirectionIndex() - 1 // We subtract one because the return loads the value. + ) + } + +/** + * A return kind. A return kind describes how a value can be returned + * from a callable. For C++, this is simply a function return. + */ +class ReturnKind extends TReturnKind { + /** Gets a textual representation of this return kind. */ + abstract string toString(); +} + +private class NormalReturnKind extends ReturnKind, TNormalReturnKind { + int index; + + NormalReturnKind() { this = TNormalReturnKind(index) } + + override string toString() { result = "indirect return" } +} + +private class IndirectReturnKind extends ReturnKind, TIndirectReturnKind { + int argumentIndex; + int indirectionIndex; + + IndirectReturnKind() { this = TIndirectReturnKind(argumentIndex, indirectionIndex) } + + override string toString() { result = "indirect outparam[" + argumentIndex.toString() + "]" } +} + +/** A data flow node that occurs as the result of a `ReturnStmt`. */ +class ReturnNode extends Node instanceof IndirectReturnNode { + /** Gets the kind of this returned value. */ + abstract ReturnKind getKind(); +} + +/** + * This predicate represents an annoying hack that we have to do. We use the + * `ReturnIndirectionInstruction` to determine which variables need flow back + * out of a function. However, the IR will unconditionally create those for a + * variable passed to a function even though the variable was never updated by + * the function. And if a function has too many `ReturnNode`s the dataflow + * library lowers its precision for that function by disabling field flow. + * + * So we those eliminate `ReturnNode`s that would have otherwise been created + * by this unconditional `ReturnIndirectionInstruction` by requiring that there + * must exist an SSA definition of the IR variable in the function. + */ +private predicate hasNonInitializeParameterDef(IRVariable v) { + exists(Ssa::Def def | + not def.getDefiningInstruction() instanceof InitializeParameterInstruction and + v = def.getSourceVariable().getBaseVariable().(Ssa::BaseIRVariable).getIRVariable() + ) +} + +class ReturnIndirectionNode extends IndirectReturnNode, ReturnNode { + override ReturnKind getKind() { + exists(int argumentIndex, ReturnIndirectionInstruction returnInd | + returnInd.hasIndex(argumentIndex) and + this.getAddressOperand() = returnInd.getSourceAddressOperand() and + result = TIndirectReturnKind(argumentIndex, this.getIndirectionIndex() - 1) and + hasNonInitializeParameterDef(returnInd.getIRVariable()) + ) + or + this.getAddressOperand() = any(ReturnValueInstruction r).getReturnAddressOperand() and + result = TNormalReturnKind(this.getIndirectionIndex() - 1) + } +} + +private Operand fullyConvertedCallStep(Operand op) { + not exists(getANonConversionUse(op)) and + exists(Instruction instr | + conversionFlow(op, instr, _) and + result = getAUse(instr) + ) +} + +/** + * Gets the instruction that uses this operand, if the instruction is not + * ignored for dataflow purposes. + */ +private Instruction getUse(Operand op) { + result = op.getUse() and + not Ssa::ignoreOperand(op) +} + +/** Gets a use of the instruction `instr` that is not ignored for dataflow purposes. */ +Operand getAUse(Instruction instr) { + result = instr.getAUse() and + not Ssa::ignoreOperand(result) +} + +/** + * Gets a use of `operand` that is: + * - not ignored for dataflow purposes, and + * - not a conversion-like instruction. + */ +private Instruction getANonConversionUse(Operand operand) { + result = getUse(operand) and + not conversionFlow(_, result, _) +} + +/** + * Gets the operand that represents the first use of the value of `call` following + * a sequnce of conversion-like instructions. + */ +predicate operandForfullyConvertedCall(Operand operand, CallInstruction call) { + exists(getANonConversionUse(operand)) and + ( + operand = getAUse(call) + or + operand = fullyConvertedCallStep*(getAUse(call)) + ) +} + +/** + * Gets the instruction that represents the first use of the value of `call` following + * a sequnce of conversion-like instructions. + * + * This predicate only holds if there is no suitable operand (i.e., no operand of a non- + * conversion instruction) to use to represent the value of `call` after conversions. + */ +predicate instructionForfullyConvertedCall(Instruction instr, CallInstruction call) { + not operandForfullyConvertedCall(_, call) and + ( + // If there is no use of the call then we pick the call instruction + not exists(getAUse(call)) and + instr = call + or + // Otherwise, flow to the first non-conversion use. + exists(Operand operand | operand = fullyConvertedCallStep*(getAUse(call)) | + instr = getANonConversionUse(operand) + ) + ) +} + +/** Holds if `node` represents the output node for `call`. */ +private predicate simpleOutNode(Node node, CallInstruction call) { + operandForfullyConvertedCall(node.asOperand(), call) + or + instructionForfullyConvertedCall(node.asInstruction(), call) +} + +/** A data flow node that represents the output of a call. */ +class OutNode extends Node { + OutNode() { + // Return values not hidden behind indirections + simpleOutNode(this, _) + or + // Return values hidden behind indirections + this instanceof IndirectReturnOutNode + or + // Modified arguments hidden behind indirections + this instanceof IndirectArgumentOutNode + } + + /** Gets the underlying call. */ + abstract DataFlowCall getCall(); + + abstract ReturnKind getReturnKind(); +} + +private class DirectCallOutNode extends OutNode { + CallInstruction call; + + DirectCallOutNode() { simpleOutNode(this, call) } + + override DataFlowCall getCall() { result = call } + + override ReturnKind getReturnKind() { result = TNormalReturnKind(0) } +} + +private class IndirectCallOutNode extends OutNode, IndirectReturnOutNode { + override DataFlowCall getCall() { result = this.getCallInstruction() } + + override ReturnKind getReturnKind() { result = TNormalReturnKind(this.getIndirectionIndex()) } +} + +private class SideEffectOutNode extends OutNode, IndirectArgumentOutNode { + override DataFlowCall getCall() { result = this.getCallInstruction() } + + override ReturnKind getReturnKind() { + result = TIndirectReturnKind(this.getArgumentIndex(), this.getIndirectionIndex()) + } +} + +/** + * Gets a node that can read the value returned from `call` with return kind + * `kind`. + */ +OutNode getAnOutNode(DataFlowCall call, ReturnKind kind) { + result.getCall() = call and + result.getReturnKind() = kind +} + +/** + * Holds if data can flow from `node1` to `node2` in a way that loses the + * calling context. For example, this would happen with flow through a + * global or static variable. + */ +predicate jumpStep(Node n1, Node n2) { + exists(Cpp::GlobalOrNamespaceVariable v | + v = + n1.asInstruction() + .(StoreInstruction) + .getResultAddress() + .(VariableAddressInstruction) + .getAstVariable() and + v = n2.asVariable() + or + v = + n2.asInstruction() + .(LoadInstruction) + .getSourceAddress() + .(VariableAddressInstruction) + .getAstVariable() and + v = n1.asVariable() + ) +} + +/** + * Holds if data can flow from `node1` to `node2` via an assignment to `f`. + * Thus, `node2` references an object with a field `f` that contains the + * value of `node1`. + */ +predicate storeStep(Node node1, Content c, PostFieldUpdateNode node2) { + exists(int indirectionIndex1, int numberOfLoads, StoreInstruction store | + nodeHasInstruction(node1, store, pragma[only_bind_into](indirectionIndex1)) and + node2.getIndirectionIndex() = 0 and + numberOfLoadsFromOperand(node2.getFieldAddress(), store.getDestinationAddressOperand(), + numberOfLoads) + | + exists(FieldContent fc | fc = c | + fc.getField() = node2.getUpdatedField() and + fc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads + ) + or + exists(UnionContent uc | uc = c | + uc.getAField() = node2.getUpdatedField() and + uc.getIndirectionIndex() = 1 + indirectionIndex1 + numberOfLoads + ) + ) +} + +/** + * Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like + * operations and exactly `n` `LoadInstruction` operations. + */ +private predicate numberOfLoadsFromOperandRec(Operand operandFrom, Operand operandTo, int ind) { + exists(LoadInstruction load | load.getSourceAddressOperand() = operandFrom | + operandTo = operandFrom and ind = 0 + or + numberOfLoadsFromOperand(load.getAUse(), operandTo, ind - 1) + ) + or + exists(Operand op, Instruction instr | + instr = op.getDef() and + conversionFlow(operandFrom, instr, _) and + numberOfLoadsFromOperand(op, operandTo, ind) + ) +} + +/** + * Holds if `operandFrom` flows to `operandTo` using a sequence of conversion-like + * operations and exactly `n` `LoadInstruction` operations. + */ +private predicate numberOfLoadsFromOperand(Operand operandFrom, Operand operandTo, int n) { + numberOfLoadsFromOperandRec(operandFrom, operandTo, n) + or + not any(LoadInstruction load).getSourceAddressOperand() = operandFrom and + not conversionFlow(operandFrom, _, _) and + operandFrom = operandTo and + n = 0 +} + +// Needed to join on both an operand and an index at the same time. +pragma[noinline] +predicate nodeHasOperand(Node node, Operand operand, int indirectionIndex) { + node.asOperand() = operand and indirectionIndex = 0 + or + hasOperandAndIndex(node, operand, indirectionIndex) +} + +// Needed to join on both an instruction and an index at the same time. +pragma[noinline] +predicate nodeHasInstruction(Node node, Instruction instr, int indirectionIndex) { + node.asInstruction() = instr and indirectionIndex = 0 + or + hasInstructionAndIndex(node, instr, indirectionIndex) +} + +/** + * Holds if data can flow from `node1` to `node2` via a read of `f`. + * Thus, `node1` references an object with a field `f` whose value ends up in + * `node2`. + */ +predicate readStep(Node node1, Content c, Node node2) { + exists(FieldAddress fa1, Operand operand, int numberOfLoads, int indirectionIndex2 | + nodeHasOperand(node2, operand, indirectionIndex2) and + nodeHasOperand(node1, fa1.getObjectAddressOperand(), _) and + numberOfLoadsFromOperand(fa1, operand, numberOfLoads) + | + exists(FieldContent fc | fc = c | + fc.getField() = fa1.getField() and + fc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads + ) + or + exists(UnionContent uc | uc = c | + uc.getAField() = fa1.getField() and + uc.getIndirectionIndex() = indirectionIndex2 + numberOfLoads + ) + ) +} + +/** + * Holds if values stored inside content `c` are cleared at node `n`. + */ +predicate clearsContent(Node n, Content c) { + none() // stub implementation +} + +/** + * Holds if the value that is being tracked is expected to be stored inside content `c` + * at node `n`. + */ +predicate expectsContent(Node n, ContentSet c) { none() } + +/** Gets the type of `n` used for type pruning. */ +IRType getNodeType(Node n) { + suppressUnusedNode(n) and + result instanceof IRVoidType // stub implementation +} + +/** Gets a string representation of a type returned by `getNodeType`. */ +string ppReprType(IRType t) { none() } // stub implementation + +/** + * Holds if `t1` and `t2` are compatible, that is, whether data can flow from + * a node of type `t1` to a node of type `t2`. + */ +pragma[inline] +predicate compatibleTypes(IRType t1, IRType t2) { + any() // stub implementation +} + +private predicate suppressUnusedNode(Node n) { any() } + +////////////////////////////////////////////////////////////////////////////// +// Java QL library compatibility wrappers +////////////////////////////////////////////////////////////////////////////// +/** A node that performs a type cast. */ +class CastNode extends Node { + CastNode() { none() } // stub implementation +} + +/** + * A function that may contain code or a variable that may contain itself. When + * flow crosses from one _enclosing callable_ to another, the interprocedural + * data-flow library discards call contexts and inserts a node in the big-step + * relation used for human-readable path explanations. + */ +class DataFlowCallable = Cpp::Declaration; + +class DataFlowExpr = Expr; + +class DataFlowType = IRType; + +/** A function call relevant for data flow. */ +class DataFlowCall extends CallInstruction { + Function getEnclosingCallable() { result = this.getEnclosingFunction() } +} + +predicate isUnreachableInCall(Node n, DataFlowCall call) { none() } // stub implementation + +int accessPathLimit() { result = 5 } + +/** + * Holds if access paths with `c` at their head always should be tracked at high + * precision. This disables adaptive access path precision for such access paths. + */ +predicate forceHighPrecision(Content c) { none() } + +/** The unit type. */ +private newtype TUnit = TMkUnit() + +/** The trivial type with a single element. */ +class Unit extends TUnit { + /** Gets a textual representation of this element. */ + string toString() { result = "unit" } +} + +/** Holds if `n` should be hidden from path explanations. */ +predicate nodeIsHidden(Node n) { n instanceof OperandNode and not n instanceof ArgumentNode } + +class LambdaCallKind = Unit; + +/** Holds if `creation` is an expression that creates a lambda of kind `kind` for `c`. */ +predicate lambdaCreation(Node creation, LambdaCallKind kind, DataFlowCallable c) { none() } + +/** Holds if `call` is a lambda call of kind `kind` where `receiver` is the lambda expression. */ +predicate lambdaCall(DataFlowCall call, LambdaCallKind kind, Node receiver) { none() } + +/** Extra data-flow steps needed for lambda flow analysis. */ +predicate additionalLambdaFlowStep(Node nodeFrom, Node nodeTo, boolean preservesValue) { none() } + +/** + * Holds if flow is allowed to pass from parameter `p` and back to itself as a + * side-effect, resulting in a summary from `p` to itself. + * + * One example would be to allow flow like `p.foo = p.bar;`, which is disallowed + * by default as a heuristic. + */ +predicate allowParameterReturnInSelf(ParameterNode p) { none() } + +private class MyConsistencyConfiguration extends Consistency::ConsistencyConfiguration { + override predicate argHasPostUpdateExclude(ArgumentNode n) { + // The rules for whether an IR argument gets a post-update node are too + // complex to model here. + any() + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll new file mode 100644 index 000000000000..b3b055bc9a19 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -0,0 +1,1346 @@ +/** + * Provides C++-specific definitions for use in the data flow library. + */ + +private import cpp +// The `ValueNumbering` library has to be imported right after `cpp` to ensure +// that the cached IR gets the same checksum here as it does in queries that use +// `ValueNumbering` without `DataFlow`. +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.controlflow.IRGuards +private import semmle.code.cpp.models.interfaces.DataFlow +private import DataFlowPrivate +private import ModelUtil +private import SsaInternals as Ssa + +cached +private module Cached { + /** + * The IR dataflow graph consists of the following nodes: + * - `InstructionNode`, which injects most instructions directly into the dataflow graph. + * - `OperandNode`, which similarly injects most operands directly into the dataflow graph. + * - `VariableNode`, which is used to model flow through global variables. + * - `PostFieldUpdateNode`, which is used to model the state of a field after a value has been stored + * into an address after a number of loads. + * - `SsaPhiNode`, which represents phi nodes as computed by the shared SSA library. + * - `IndirectArgumentOutNode`, which represents the value of an argument (and its indirections) after + * it leaves a function call. + * - `IndirectOperand`, which represents the value of `operand` after loading the address a number + * of times. + * - `IndirectInstruction`, which represents the value of `instr` after loading the address a number + * of times. + */ + cached + newtype TIRDataFlowNode = + TInstructionNode(Instruction i) { not Ssa::ignoreInstruction(i) } or + TOperandNode(Operand op) { not Ssa::ignoreOperand(op) } or + TVariableNode(Variable var) or + TPostFieldUpdateNode(FieldAddress operand, int indirectionIndex) { + indirectionIndex = + [0 .. Ssa::countIndirectionsForCppType(operand.getObjectAddress().getResultLanguageType()) - + 1] + } or + TSsaPhiNode(Ssa::PhiNode phi) or + TIndirectArgumentOutNode(ArgumentOperand operand, int indirectionIndex) { + Ssa::isModifiableByCall(operand) and + indirectionIndex = [0 .. Ssa::countIndirectionsForCppType(operand.getLanguageType()) - 1] + } or + TIndirectOperand(Operand op, int indirectionIndex) { + Ssa::hasIndirectOperand(op, indirectionIndex) + } or + TIndirectInstruction(Instruction instr, int indirectionIndex) { + Ssa::hasIndirectInstruction(instr, indirectionIndex) + } +} + +/** + * An operand that is defined by a `FieldAddressInstruction`. + */ +class FieldAddress extends Operand { + FieldAddressInstruction fai; + + FieldAddress() { fai = this.getDef() } + + /** Gets the field associated with this instruction. */ + Field getField() { result = fai.getField() } + + /** Gets the instruction whose result provides the address of the object containing the field. */ + Instruction getObjectAddress() { result = fai.getObjectAddress() } + + /** Gets the operand that provides the address of the object containing the field. */ + Operand getObjectAddressOperand() { result = fai.getObjectAddressOperand() } +} + +/** + * Holds if `opFrom` is an operand whose value flows to the result of `instrTo`. + * + * `isPointerArith` is `true` if `instrTo` is a `PointerArithmeticInstruction` and `opFrom` + * is the left operand. + */ +predicate conversionFlow(Operand opFrom, Instruction instrTo, boolean isPointerArith) { + isPointerArith = false and + ( + instrTo.(CopyValueInstruction).getSourceValueOperand() = opFrom + or + instrTo.(ConvertInstruction).getUnaryOperand() = opFrom + or + instrTo.(CheckedConvertOrNullInstruction).getUnaryOperand() = opFrom + or + instrTo.(InheritanceConversionInstruction).getUnaryOperand() = opFrom + ) + or + isPointerArith = true and + instrTo.(PointerArithmeticInstruction).getLeftOperand() = opFrom +} + +private import Cached + +/** + * A node in a data flow graph. + * + * A node can be either an expression, a parameter, or an uninitialized local + * variable. Such nodes are created with `DataFlow::exprNode`, + * `DataFlow::parameterNode`, and `DataFlow::uninitializedNode` respectively. + */ +class Node extends TIRDataFlowNode { + /** + * INTERNAL: Do not use. + */ + Declaration getEnclosingCallable() { none() } // overridden in subclasses + + /** Gets the function to which this node belongs, if any. */ + Declaration getFunction() { none() } // overridden in subclasses + + /** Gets the type of this node. */ + IRType getType() { none() } // overridden in subclasses + + /** Gets the instruction corresponding to this node, if any. */ + Instruction asInstruction() { result = this.(InstructionNode).getInstruction() } + + /** Gets the operands corresponding to this node, if any. */ + Operand asOperand() { result = this.(OperandNode).getOperand() } + + /** + * Gets the non-conversion expression corresponding to this node, if any. + * This predicate only has a result on nodes that represent the value of + * evaluating the expression. For data flowing _out of_ an expression, like + * when an argument is passed by reference, use `asDefiningArgument` instead + * of `asExpr`. + * + * If this node strictly (in the sense of `asConvertedExpr`) corresponds to + * a `Conversion`, then the result is the underlying non-`Conversion` base + * expression. + */ + Expr asExpr() { result = this.(ExprNode).getExpr() } + + /** + * Gets the non-conversion expression that's indirectly tracked by this node + * under `index` number of indirections. + */ + Expr asIndirectExpr(int index) { + exists(Operand operand | hasOperandAndIndex(this, operand, index) | + result = operand.getDef().getUnconvertedResultExpression() + ) + } + + /** + * Gets the non-conversion expression that's indirectly tracked by this node + * behind a number of indirections. + */ + Expr asIndirectExpr() { result = this.asIndirectExpr(_) } + + /** + * Gets the expression corresponding to this node, if any. The returned + * expression may be a `Conversion`. + */ + Expr asConvertedExpr() { result = this.(ExprNode).getConvertedExpr() } + + /** + * Gets the expression that's indirectly tracked by this node + * behind `index` number of indirections. + */ + Expr asIndirectConvertedExpr(int index) { + exists(Operand operand | hasOperandAndIndex(this, operand, index) | + result = operand.getDef().getConvertedResultExpression() + ) + } + + /** + * Gets the expression that's indirectly tracked by this node behind a + * number of indirections. + */ + Expr asIndirectConvertedExpr() { result = this.asIndirectConvertedExpr(_) } + + /** + * Gets the argument that defines this `DefinitionByReferenceNode`, if any. + * This predicate should be used instead of `asExpr` when referring to the + * value of a reference argument _after_ the call has returned. For example, + * in `f(&x)`, this predicate will have `&x` as its result for the `Node` + * that represents the new value of `x`. + */ + Expr asDefiningArgument() { result = this.asDefiningArgument(_) } + + /** + * Gets the argument that defines this `DefinitionByReferenceNode`, if any. + * + * Unlike `Node::asDefiningArgument/0`, this predicate gets the node representing + * the value of the `index`'th indirection after leaving a function. For example, + * in: + * ```cpp + * void f(int**); + * ... + * int** x = ...; + * f(x); + * ``` + * The node `n` such that `n.asDefiningArgument(1)` is the argument `x` will + * contain the value of `*x` after `f` has returned, and the node `n` such that + * `n.asDefiningArgument(2)` is the argument `x` will contain the value of `**x` + * after the `f` has returned. + */ + Expr asDefiningArgument(int index) { + // Subtract one because `DefinitionByReferenceNode` is defined to be in + // the range `[0 ... n - 1]` for some `n` instead of `[1 ... n]`. + this.(DefinitionByReferenceNode).getIndirectionIndex() = index - 1 and + result = this.(DefinitionByReferenceNode).getArgument() + } + + /** + * Gets the the argument going into a function for a node that represents + * the indirect value of the argument after `index` loads. For example, in: + * ```cpp + * void f(int**); + * ... + * int** x = ...; + * f(x); + * ``` + * The node `n` such that `n.asIndirectArgument(1)` represents the value of + * `*x` going into `f`, and the node `n` such that `n.asIndirectArgument(2)` + * represents the value of `**x` going into `f`. + */ + Expr asIndirectArgument(int index) { + this.(SideEffectOperandNode).getIndirectionIndex() = index and + result = this.(SideEffectOperandNode).getArgument() + } + + /** + * Gets the the argument going into a function for a node that represents + * the indirect value of the argument after any non-zero number of loads. + */ + Expr asIndirectArgument() { result = this.asIndirectArgument(_) } + + /** Gets the positional parameter corresponding to this node, if any. */ + Parameter asParameter() { result = asParameter(0) } + + /** + * Gets the positional parameter corresponding to the node that represents + * the value of the parameter after `index` number of loads, if any. For + * example, in: + * ```cpp + * void f(int** x) { ... } + * ``` + * - The node `n` such that `n.asParameter(0)` is the parameter `x` represents + * the value of `x`. + * - The node `n` such that `n.asParameter(1)` is the parameter `x` represents + * the value of `*x`. + * The node `n` such that `n.asParameter(2)` is the parameter `x` represents + * the value of `**x`. + */ + Parameter asParameter(int index) { + index = 0 and + result = this.(ExplicitParameterNode).getParameter() + or + this.(IndirectParameterNode).getIndirectionIndex() = index and + result = this.(IndirectParameterNode).getParameter() + } + + /** + * Gets the variable corresponding to this node, if any. This can be used for + * modeling flow in and out of global variables. + */ + Variable asVariable() { result = this.(VariableNode).getVariable() } + + /** + * Gets the expression that is partially defined by this node, if any. + * + * Partial definitions are created for field stores (`x.y = taint();` is a partial + * definition of `x`), and for calls that may change the value of an object (so + * `x.set(taint())` is a partial definition of `x`, and `transfer(&x, taint())` is + * a partial definition of `&x`). + */ + Expr asPartialDefinition() { result = this.(PartialDefinitionNode).getDefinedExpr() } + + /** + * Gets an upper bound on the type of this node. + */ + IRType getTypeBound() { result = this.getType() } + + /** Gets the location of this element. */ + cached + final Location getLocation() { result = this.getLocationImpl() } + + /** INTERNAL: Do not use. */ + Location getLocationImpl() { + none() // overridden by subclasses + } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo( + string filepath, int startline, int startcolumn, int endline, int endcolumn + ) { + this.getLocation().hasLocationInfo(filepath, startline, startcolumn, endline, endcolumn) + } + + /** Gets a textual representation of this element. */ + cached + final string toString() { result = this.toStringImpl() } + + /** INTERNAL: Do not use. */ + string toStringImpl() { + none() // overridden by subclasses + } +} + +/** + * An instruction, viewed as a node in a data flow graph. + */ +class InstructionNode extends Node, TInstructionNode { + Instruction instr; + + InstructionNode() { this = TInstructionNode(instr) } + + /** Gets the instruction corresponding to this node. */ + Instruction getInstruction() { result = instr } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Declaration getFunction() { result = instr.getEnclosingFunction() } + + override IRType getType() { result = instr.getResultIRType() } + + final override Location getLocationImpl() { result = instr.getLocation() } + + override string toStringImpl() { + // This predicate is overridden in subclasses. This default implementation + // does not use `Instruction.toString` because that's expensive to compute. + result = this.getInstruction().getOpcode().toString() + } +} + +/** + * An operand, viewed as a node in a data flow graph. + */ +class OperandNode extends Node, TOperandNode { + Operand op; + + OperandNode() { this = TOperandNode(op) } + + /** Gets the operand corresponding to this node. */ + Operand getOperand() { result = op } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Declaration getFunction() { result = op.getUse().getEnclosingFunction() } + + override IRType getType() { result = op.getIRType() } + + final override Location getLocationImpl() { result = op.getLocation() } + + override string toStringImpl() { result = this.getOperand().toString() } +} + +/** + * INTERNAL: do not use. + * + * The node representing the value of a field after it has been updated. + */ +class PostFieldUpdateNode extends TPostFieldUpdateNode, PartialDefinitionNode { + int indirectionIndex; + FieldAddress fieldAddress; + + PostFieldUpdateNode() { this = TPostFieldUpdateNode(fieldAddress, indirectionIndex) } + + override Function getFunction() { result = fieldAddress.getUse().getEnclosingFunction() } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override IRType getType() { result = fieldAddress.getIRType() } + + FieldAddress getFieldAddress() { result = fieldAddress } + + Field getUpdatedField() { result = fieldAddress.getField() } + + int getIndirectionIndex() { result = indirectionIndex } + + override Node getPreUpdateNode() { + // + 1 because we're storing into an lvalue, and the original node should be the rvalue of + // the same address. + hasOperandAndIndex(result, pragma[only_bind_into](fieldAddress).getObjectAddressOperand(), + indirectionIndex + 1) + } + + override Expr getDefinedExpr() { + result = fieldAddress.getObjectAddress().getUnconvertedResultExpression() + } + + override Location getLocationImpl() { result = fieldAddress.getLocation() } + + override string toStringImpl() { result = this.getPreUpdateNode() + " [post update]" } +} + +/** + * INTERNAL: do not use. + * + * A phi node produced by the shared SSA library, viewed as a node in a data flow graph. + */ +class SsaPhiNode extends Node, TSsaPhiNode { + Ssa::PhiNode phi; + + SsaPhiNode() { this = TSsaPhiNode(phi) } + + /** Gets the phi node associated with this node. */ + Ssa::PhiNode getPhiNode() { result = phi } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Declaration getFunction() { result = phi.getBasicBlock().getEnclosingFunction() } + + override IRType getType() { result instanceof IRVoidType } + + final override Location getLocationImpl() { result = phi.getBasicBlock().getLocation() } + + override string toStringImpl() { result = "Phi" } +} + +/** + * INTERNAL: do not use. + * + * A node representing a value after leaving a function. + */ +class SideEffectOperandNode extends Node, IndirectOperand { + CallInstruction call; + int argumentIndex; + + SideEffectOperandNode() { operand = call.getArgumentOperand(argumentIndex) } + + CallInstruction getCallInstruction() { result = call } + + Operand getAddressOperand() { result = operand } + + int getArgumentIndex() { result = argumentIndex } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Function getFunction() { result = call.getEnclosingFunction() } + + override IRType getType() { result instanceof IRVoidType } + + Expr getArgument() { result = call.getArgument(argumentIndex).getUnconvertedResultExpression() } +} + +/** + * INTERNAL: do not use. + * + * A node representing an indirection of a parameter. + */ +class IndirectParameterNode extends Node, IndirectInstruction { + InitializeParameterInstruction init; + + IndirectParameterNode() { this.getInstruction() = init } + + int getArgumentIndex() { init.hasIndex(result) } + + /** Gets the parameter whose indirection is initialized. */ + Parameter getParameter() { result = init.getParameter() } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Function getFunction() { result = this.getInstruction().getEnclosingFunction() } + + override IRType getType() { result instanceof IRVoidType } + + override string toStringImpl() { + result = this.getParameter().toString() + " indirection" + or + not exists(this.getParameter()) and + result = "this indirection" + } +} + +/** + * INTERNAL: do not use. + * + * A node representing the indirection of a value that is + * about to be returned from a function. + */ +class IndirectReturnNode extends IndirectOperand { + IndirectReturnNode() { + this.getOperand() = any(ReturnIndirectionInstruction ret).getSourceAddressOperand() + or + this.getOperand() = any(ReturnValueInstruction ret).getReturnAddressOperand() + } + + Operand getAddressOperand() { result = operand } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override IRType getType() { result instanceof IRVoidType } +} + +/** + * INTERNAL: do not use. + * + * A node representing the indirection of a value after it + * has been returned from a function. + */ +class IndirectArgumentOutNode extends Node, TIndirectArgumentOutNode, PostUpdateNode { + ArgumentOperand operand; + int indirectionIndex; + + IndirectArgumentOutNode() { this = TIndirectArgumentOutNode(operand, indirectionIndex) } + + int getIndirectionIndex() { result = indirectionIndex } + + int getArgumentIndex() { + exists(CallInstruction call | call.getArgumentOperand(result) = operand) + } + + Operand getAddressOperand() { result = operand } + + CallInstruction getCallInstruction() { result.getAnArgumentOperand() = operand } + + Function getStaticCallTarget() { result = this.getCallInstruction().getStaticCallTarget() } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override Function getFunction() { result = this.getCallInstruction().getEnclosingFunction() } + + override IRType getType() { result instanceof IRVoidType } + + override Node getPreUpdateNode() { hasOperandAndIndex(result, operand, indirectionIndex + 1) } + + override string toStringImpl() { + // This string should be unique enough to be helpful but common enough to + // avoid storing too many different strings. + result = this.getStaticCallTarget().getName() + " output argument" + or + not exists(this.getStaticCallTarget()) and + result = "output argument" + } + + override Location getLocationImpl() { result = operand.getLocation() } +} + +pragma[nomagic] +predicate indirectReturnOutNodeOperand0(CallInstruction call, Operand operand, int indirectionIndex) { + Ssa::hasIndirectInstruction(call, indirectionIndex) and + operandForfullyConvertedCall(operand, call) +} + +pragma[nomagic] +predicate indirectReturnOutNodeInstruction0( + CallInstruction call, Instruction instr, int indirectionIndex +) { + Ssa::hasIndirectInstruction(call, indirectionIndex) and + instructionForfullyConvertedCall(instr, call) +} + +/** + * INTERNAL: do not use. + * + * A node representing the value of a function call. + */ +class IndirectReturnOutNode extends Node { + CallInstruction call; + int indirectionIndex; + + IndirectReturnOutNode() { + // Annoyingly, we need to pick the fully converted value as the output of the function to + // make flow through in the shared dataflow library work correctly. + exists(Operand operand | + indirectReturnOutNodeOperand0(call, operand, indirectionIndex) and + hasOperandAndIndex(this, operand, indirectionIndex) + ) + or + exists(Instruction instr | + indirectReturnOutNodeInstruction0(call, instr, indirectionIndex) and + hasInstructionAndIndex(this, instr, indirectionIndex) + ) + } + + CallInstruction getCallInstruction() { result = call } + + int getIndirectionIndex() { result = indirectionIndex } +} + +/** + * INTERNAL: Do not use. + * + * A node that represents the indirect value of an operand in the IR + * after `index` number of loads. + */ +class IndirectOperand extends Node, TIndirectOperand { + Operand operand; + int indirectionIndex; + + IndirectOperand() { this = TIndirectOperand(operand, indirectionIndex) } + + /** Gets the underlying instruction. */ + Operand getOperand() { result = operand } + + int getIndirectionIndex() { result = indirectionIndex } + + override Function getFunction() { result = this.getOperand().getDef().getEnclosingFunction() } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override IRType getType() { result = this.getOperand().getIRType() } + + final override Location getLocationImpl() { result = this.getOperand().getLocation() } + + override string toStringImpl() { + result = instructionNode(this.getOperand().getDef()).toStringImpl() + " indirection" + } +} + +/** + * INTERNAL: Do not use. + * + * A node that represents the indirect value of an instruction in the IR + * after `index` number of loads. + */ +class IndirectInstruction extends Node, TIndirectInstruction { + Instruction instr; + int indirectionIndex; + + IndirectInstruction() { this = TIndirectInstruction(instr, indirectionIndex) } + + /** Gets the underlying instruction. */ + Instruction getInstruction() { result = instr } + + int getIndirectionIndex() { result = indirectionIndex } + + override Function getFunction() { result = this.getInstruction().getEnclosingFunction() } + + override Declaration getEnclosingCallable() { result = this.getFunction() } + + override IRType getType() { result = this.getInstruction().getResultIRType() } + + final override Location getLocationImpl() { result = this.getInstruction().getLocation() } + + override string toStringImpl() { + result = instructionNode(this.getInstruction()).toStringImpl() + " indirection" + } +} + +private predicate isFullyConvertedArgument(Expr e) { + e = any(Call call).getAnArgument().getFullyConverted() +} + +private predicate isFullyConvertedCall(Expr e) { e = any(Call call).getFullyConverted() } + +/** Holds if `Node::asExpr` should map an some operand node to `e`. */ +private predicate convertedExprMustBeOperand(Expr e) { + isFullyConvertedArgument(e) + or + isFullyConvertedCall(e) +} + +/** Holds if `node` is an `OperandNode` that should map `node.asExpr()` to `e`. */ +predicate exprNodeShouldBeOperand(Node node, Expr e) { + e = node.asOperand().getDef().getConvertedResultExpression() and + convertedExprMustBeOperand(e) +} + +/** + * Holds if `load` is a `LoadInstruction` that is the result of evaluating `e` + * and `node` is an `IndirctOperandNode` that should map `node.asExpr()` to `e`. + * + * We map `e` to `node.asExpr()` when `node` semantically represents the + * same value as `load`. A subsequent flow step will flow `node` to + * the `LoadInstruction`. + */ +private predicate exprNodeShouldBeIndirectOperand(IndirectOperand node, Expr e, LoadInstruction load) { + node.getIndirectionIndex() = 1 and + e = load.getConvertedResultExpression() and + load.getSourceAddressOperand() = node.getOperand() and + not convertedExprMustBeOperand(e) +} + +/** Holds if `node` should be an instruction node that maps `node.asExpr()` to `e`. */ +predicate exprNodeShouldBeInstruction(Node node, Expr e) { + e = node.asInstruction().getConvertedResultExpression() and + not exprNodeShouldBeOperand(_, e) and + not exprNodeShouldBeIndirectOperand(_, e, _) +} + +private class ExprNodeBase extends Node { + Expr getConvertedExpr() { none() } // overridden by subclasses + + Expr getExpr() { none() } // overridden by subclasses +} + +private class InstructionExprNode extends ExprNodeBase, InstructionNode { + InstructionExprNode() { exprNodeShouldBeInstruction(this, _) } + + final override Expr getConvertedExpr() { exprNodeShouldBeInstruction(this, result) } + + final override Expr getExpr() { result = this.getInstruction().getUnconvertedResultExpression() } + + final override string toStringImpl() { result = this.getConvertedExpr().toString() } +} + +private class OperandExprNode extends ExprNodeBase, OperandNode { + OperandExprNode() { exprNodeShouldBeOperand(this, _) } + + final override Expr getConvertedExpr() { exprNodeShouldBeOperand(this, result) } + + final override Expr getExpr() { + result = this.getOperand().getDef().getUnconvertedResultExpression() + } + + final override string toStringImpl() { + // Avoid generating multiple `toString` results for `ArgumentNode`s + // since they have a better `toString`. + result = this.(ArgumentNode).toStringImpl() + or + not this instanceof ArgumentNode and + result = this.getConvertedExpr().toString() + } +} + +private class IndirectOperandExprNode extends ExprNodeBase, IndirectOperand { + LoadInstruction load; + + IndirectOperandExprNode() { exprNodeShouldBeIndirectOperand(this, _, load) } + + final override Expr getConvertedExpr() { exprNodeShouldBeIndirectOperand(this, result, load) } + + final override Expr getExpr() { result = load.getUnconvertedResultExpression() } + + final override string toStringImpl() { result = this.getConvertedExpr().toString() } +} + +/** + * An expression, viewed as a node in a data flow graph. + */ +class ExprNode extends Node instanceof ExprNodeBase { + /** + * Gets the non-conversion expression corresponding to this node, if any. If + * this node strictly (in the sense of `getConvertedExpr`) corresponds to a + * `Conversion`, then the result is that `Conversion`'s non-`Conversion` base + * expression. + */ + Expr getExpr() { result = super.getExpr() } + + /** + * Gets the expression corresponding to this node, if any. The returned + * expression may be a `Conversion`. + */ + Expr getConvertedExpr() { result = super.getConvertedExpr() } +} + +/** + * The value of a parameter at function entry, viewed as a node in a data + * flow graph. This includes both explicit parameters such as `x` in `f(x)` + * and implicit parameters such as `this` in `x.f()`. + * + * To match a specific kind of parameter, consider using one of the subclasses + * `ExplicitParameterNode`, `ThisParameterNode`, or + * `ParameterIndirectionNode`. + */ +class ParameterNode extends Node { + ParameterNode() { + // To avoid making this class abstract, we enumerate its values here + this.asInstruction() instanceof InitializeParameterInstruction + or + this instanceof IndirectParameterNode + } + + /** + * Holds if this node is the parameter of `f` at the specified position. The + * implicit `this` parameter is considered to have position `-1`, and + * pointer-indirection parameters are at further negative positions. + */ + predicate isParameterOf(Function f, ParameterPosition pos) { none() } // overridden by subclasses +} + +/** An explicit positional parameter, not including `this` or `...`. */ +private class ExplicitParameterNode extends ParameterNode, InstructionNode { + override InitializeParameterInstruction instr; + + ExplicitParameterNode() { exists(instr.getParameter()) } + + override predicate isParameterOf(Function f, ParameterPosition pos) { + f.getParameter(pos.(DirectPosition).getIndex()) = instr.getParameter() + } + + /** Gets the `Parameter` associated with this node. */ + Parameter getParameter() { result = instr.getParameter() } + + override string toStringImpl() { result = instr.getParameter().toString() } +} + +/** An implicit `this` parameter. */ +class ThisParameterNode extends ParameterNode, InstructionNode { + override InitializeParameterInstruction instr; + + ThisParameterNode() { instr.getIRVariable() instanceof IRThisVariable } + + override predicate isParameterOf(Function f, ParameterPosition pos) { + pos.(DirectPosition).getIndex() = -1 and instr.getEnclosingFunction() = f + } + + override string toStringImpl() { result = "this" } +} + +pragma[noinline] +private predicate indirectPostionHasArgumentIndexAndIndex( + IndirectionPosition pos, int argumentIndex, int indirectionIndex +) { + pos.getArgumentIndex() = argumentIndex and + pos.getIndirectionIndex() = indirectionIndex +} + +pragma[noinline] +private predicate indirectParameterNodeHasArgumentIndexAndIndex( + IndirectParameterNode node, int argumentIndex, int indirectionIndex +) { + node.getArgumentIndex() = argumentIndex and + node.getIndirectionIndex() = indirectionIndex +} + +/** A synthetic parameter to model the pointed-to object of a pointer parameter. */ +class ParameterIndirectionNode extends ParameterNode instanceof IndirectParameterNode { + override predicate isParameterOf(Function f, ParameterPosition pos) { + IndirectParameterNode.super.getEnclosingCallable() = f and + exists(int argumentIndex, int indirectionIndex | + indirectPostionHasArgumentIndexAndIndex(pos, argumentIndex, indirectionIndex) and + indirectParameterNodeHasArgumentIndexAndIndex(this, argumentIndex, indirectionIndex) + ) + } +} + +/** + * A node associated with an object after an operation that might have + * changed its state. + * + * This can be either the argument to a callable after the callable returns + * (which might have mutated the argument), or the qualifier of a field after + * an update to the field. + * + * Nodes corresponding to AST elements, for example `ExprNode`, usually refer + * to the value before the update with the exception of `ClassInstanceExpr`, + * which represents the value after the constructor has run. + */ +abstract class PostUpdateNode extends Node { + /** + * Gets the node before the state update. + */ + abstract Node getPreUpdateNode(); +} + +/** + * The base class for nodes that perform "partial definitions". + * + * In contrast to a normal "definition", which provides a new value for + * something, a partial definition is an expression that may affect a + * value, but does not necessarily replace it entirely. For example: + * ``` + * x.y = 1; // a partial definition of the object `x`. + * x.y.z = 1; // a partial definition of the object `x.y` and `x`. + * x.setY(1); // a partial definition of the object `x`. + * setY(&x); // a partial definition of the object `x`. + * ``` + */ +abstract private class PartialDefinitionNode extends PostUpdateNode { + abstract Expr getDefinedExpr(); +} + +/** + * A node that represents the value of a variable after a function call that + * may have changed the variable because it's passed by reference. + * + * A typical example would be a call `f(&x)`. Firstly, there will be flow into + * `x` from previous definitions of `x`. Secondly, there will be a + * `DefinitionByReferenceNode` to represent the value of `x` after the call has + * returned. This node will have its `getArgument()` equal to `&x` and its + * `getVariableAccess()` equal to `x`. + */ +class DefinitionByReferenceNode extends IndirectArgumentOutNode { + /** Gets the unconverted argument corresponding to this node. */ + Expr getArgument() { result = this.getAddressOperand().getDef().getUnconvertedResultExpression() } + + /** Gets the parameter through which this value is assigned. */ + Parameter getParameter() { + result = this.getCallInstruction().getStaticCallTarget().getParameter(this.getArgumentIndex()) + } +} + +/** + * A `Node` corresponding to a variable in the program, as opposed to the + * value of that variable at some particular point. This can be used for + * modeling flow in and out of global variables. + */ +class VariableNode extends Node, TVariableNode { + Variable v; + + VariableNode() { this = TVariableNode(v) } + + /** Gets the variable corresponding to this node. */ + Variable getVariable() { result = v } + + override Declaration getFunction() { none() } + + override Declaration getEnclosingCallable() { + // When flow crosses from one _enclosing callable_ to another, the + // interprocedural data-flow library discards call contexts and inserts a + // node in the big-step relation used for human-readable path explanations. + // Therefore we want a distinct enclosing callable for each `VariableNode`, + // and that can be the `Variable` itself. + result = v + } + + override IRType getType() { result.getCanonicalLanguageType().hasUnspecifiedType(v.getType(), _) } + + final override Location getLocationImpl() { result = v.getLocation() } + + override string toStringImpl() { result = v.toString() } +} + +/** + * Gets the node corresponding to `instr`. + */ +InstructionNode instructionNode(Instruction instr) { result.getInstruction() = instr } + +/** + * Gets the node corresponding to `operand`. + */ +OperandNode operandNode(Operand operand) { result.getOperand() = operand } + +/** + * Gets the `Node` corresponding to the value of evaluating `e` or any of its + * conversions. There is no result if `e` is a `Conversion`. For data flowing + * _out of_ an expression, like when an argument is passed by reference, use + * `definitionByReferenceNodeFromArgument` instead. + */ +ExprNode exprNode(Expr e) { result.getExpr() = e } + +/** + * Gets the `Node` corresponding to the value of evaluating `e`. Here, `e` may + * be a `Conversion`. For data flowing _out of_ an expression, like when an + * argument is passed by reference, use + * `definitionByReferenceNodeFromArgument` instead. + */ +ExprNode convertedExprNode(Expr e) { result.getConvertedExpr() = e } + +/** + * Gets the `Node` corresponding to the value of `p` at function entry. + */ +ExplicitParameterNode parameterNode(Parameter p) { result.getParameter() = p } + +/** + * Gets the `Node` corresponding to a definition by reference of the variable + * that is passed as unconverted `argument` of a call. + */ +DefinitionByReferenceNode definitionByReferenceNodeFromArgument(Expr argument) { + result.getArgument() = argument +} + +/** Gets the `VariableNode` corresponding to the variable `v`. */ +VariableNode variableNode(Variable v) { result.getVariable() = v } + +/** + * DEPRECATED: See UninitializedNode. + * + * Gets the `Node` corresponding to the value of an uninitialized local + * variable `v`. + */ +Node uninitializedNode(LocalVariable v) { none() } + +/** + * Holds if data flows from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localFlowStep = simpleLocalFlowStep/2; + +private predicate indirectionOperandFlow(IndirectOperand nodeFrom, Node nodeTo) { + // Reduce the indirection count by 1 if we're passing through a `LoadInstruction`. + exists(int ind, LoadInstruction load | + hasOperandAndIndex(nodeFrom, load.getSourceAddressOperand(), ind) and + nodeHasInstruction(nodeTo, load, ind - 1) + ) + or + // If an operand flows to an instruction, then the indirection of + // the operand also flows to the indirction of the instruction. + exists(Operand operand, Instruction instr, int indirectionIndex | + simpleInstructionLocalFlowStep(operand, instr) and + hasOperandAndIndex(nodeFrom, operand, indirectionIndex) and + hasInstructionAndIndex(nodeTo, instr, indirectionIndex) + ) + or + // If there's indirect flow to an operand, then there's also indirect + // flow to the operand after applying some pointer arithmetic. + exists(PointerArithmeticInstruction pointerArith, int indirectionIndex | + hasOperandAndIndex(nodeFrom, pointerArith.getAnOperand(), indirectionIndex) and + hasInstructionAndIndex(nodeTo, pointerArith, indirectionIndex) + ) +} + +pragma[noinline] +predicate hasOperandAndIndex(IndirectOperand indirectOperand, Operand operand, int indirectionIndex) { + indirectOperand.getOperand() = operand and + indirectOperand.getIndirectionIndex() = indirectionIndex +} + +pragma[noinline] +predicate hasInstructionAndIndex( + IndirectInstruction indirectInstr, Instruction instr, int indirectionIndex +) { + indirectInstr.getInstruction() = instr and + indirectInstr.getIndirectionIndex() = indirectionIndex +} + +private predicate indirectionInstructionFlow(IndirectInstruction nodeFrom, IndirectOperand nodeTo) { + // If there's flow from an instruction to an operand, then there's also flow from the + // indirect instruction to the indirect operand. + exists(Operand operand, Instruction instr, int indirectionIndex | + simpleOperandLocalFlowStep(pragma[only_bind_into](instr), pragma[only_bind_into](operand)) + | + hasOperandAndIndex(nodeTo, operand, indirectionIndex) and + hasInstructionAndIndex(nodeFrom, instr, indirectionIndex) + ) +} + +/** + * INTERNAL: do not use. + * + * This is the local flow predicate that's used as a building block in global + * data flow. It may have less flow than the `localFlowStep` predicate. + */ +predicate simpleLocalFlowStep(Node nodeFrom, Node nodeTo) { + // Post update node -> Node flow + Ssa::ssaFlow(nodeFrom.(PostUpdateNode).getPreUpdateNode(), nodeTo) + or + // Def-use/Use-use flow + Ssa::ssaFlow(nodeFrom, nodeTo) + or + // Operand -> Instruction flow + simpleInstructionLocalFlowStep(nodeFrom.asOperand(), nodeTo.asInstruction()) + or + // Instruction -> Operand flow + simpleOperandLocalFlowStep(nodeFrom.asInstruction(), nodeTo.asOperand()) + or + // Phi node -> Node flow + Ssa::fromPhiNode(nodeFrom, nodeTo) + or + // Indirect operand -> (indirect) instruction flow + indirectionOperandFlow(nodeFrom, nodeTo) + or + // Indirect instruction -> indirect operand flow + indirectionInstructionFlow(nodeFrom, nodeTo) + or + // Flow through modeled functions + modelFlow(nodeFrom, nodeTo) + or + // Reverse flow: data that flows from the definition node back into the indirection returned + // by a function. This allows data to flow 'in' through references returned by a modeled + // function such as `operator[]`. + exists(Operand address, int indirectionIndex | + nodeHasOperand(nodeTo.(IndirectReturnOutNode), address, indirectionIndex) + | + exists(StoreInstruction store | + nodeHasInstruction(nodeFrom, store, indirectionIndex - 1) and + store.getDestinationAddressOperand() = address + ) + or + Ssa::outNodeHasAddressAndIndex(nodeFrom, address, indirectionIndex - 1) + ) +} + +pragma[noinline] +private predicate getAddressType(LoadInstruction load, Type t) { + exists(Instruction address | + address = load.getSourceAddress() and + t = address.getResultType() + ) +} + +/** + * Like the AST dataflow library, we want to conflate the address and value of a reference. This class + * represents the `LoadInstruction` that is generated from a reference dereference. + */ +private class ReferenceDereferenceInstruction extends LoadInstruction { + ReferenceDereferenceInstruction() { + exists(ReferenceType ref | + getAddressType(this, ref) and + this.getResultType() = ref.getBaseType() + ) + } +} + +private predicate simpleInstructionLocalFlowStep(Operand opFrom, Instruction iTo) { + // Treat all conversions as flow, even conversions between different numeric types. + conversionFlow(opFrom, iTo, false) + or + iTo.(CopyInstruction).getSourceValueOperand() = opFrom + or + // Conflate references and values like in AST dataflow. + iTo.(ReferenceDereferenceInstruction).getSourceAddressOperand() = opFrom +} + +private predicate simpleOperandLocalFlowStep(Instruction iFrom, Operand opTo) { + not opTo instanceof MemoryOperand and + opTo.getDef() = iFrom +} + +private predicate modelFlow(Node nodeFrom, Node nodeTo) { + exists( + CallInstruction call, DataFlowFunction func, FunctionInput modelIn, FunctionOutput modelOut + | + call.getStaticCallTarget() = func and + func.hasDataFlow(modelIn, modelOut) + | + nodeFrom = callInput(call, modelIn) and + nodeTo = callOutput(call, modelOut) + or + exists(int d | + nodeFrom = callInput(call, modelIn, d) and + nodeTo = callOutput(call, modelOut, d) + ) + ) +} + +/** + * Holds if data flows from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +pragma[inline] +predicate localFlow(Node source, Node sink) { localFlowStep*(source, sink) } + +/** + * Holds if data can flow from `i1` to `i2` in zero or more + * local (intra-procedural) steps. + */ +pragma[inline] +predicate localInstructionFlow(Instruction e1, Instruction e2) { + localFlow(instructionNode(e1), instructionNode(e2)) +} + +/** + * Holds if data can flow from `e1` to `e2` in zero or more + * local (intra-procedural) steps. + */ +pragma[inline] +predicate localExprFlow(Expr e1, Expr e2) { localFlow(exprNode(e1), exprNode(e2)) } + +cached +private newtype TContent = + TFieldContent(Field f, int indirectionIndex) { + indirectionIndex = [1 .. Ssa::getMaxIndirectionsForType(f.getUnspecifiedType())] and + // Reads and writes of union fields are tracked using `UnionContent`. + not f.getDeclaringType() instanceof Union + } or + TUnionContent(Union u, int indirectionIndex) { + // We key `UnionContent` by the union instead of its fields since a write to one + // field can be read by any read of the union's fields. + indirectionIndex = + [1 .. max(Ssa::getMaxIndirectionsForType(u.getAField().getUnspecifiedType()))] + } or + TCollectionContent() or // Not used in C/C++ + TArrayContent() // Not used in C/C++. + +/** + * A description of the way data may be stored inside an object. Examples + * include instance fields, the contents of a collection object, or the contents + * of an array. + */ +class Content extends TContent { + /** Gets a textual representation of this element. */ + abstract string toString(); + + predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + path = "" and sl = 0 and sc = 0 and el = 0 and ec = 0 + } +} + +/** A reference through a non-union instance field. */ +class FieldContent extends Content, TFieldContent { + Field f; + int indirectionIndex; + + FieldContent() { this = TFieldContent(f, indirectionIndex) } + + override string toString() { + indirectionIndex = 1 and result = f.toString() + or + indirectionIndex > 1 and result = f.toString() + " indirection" + } + + Field getField() { result = f } + + pragma[inline] + int getIndirectionIndex() { + pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) + } +} + +/** A reference through an instance field of a union. */ +class UnionContent extends Content, TUnionContent { + Union u; + int indirectionIndex; + + UnionContent() { this = TUnionContent(u, indirectionIndex) } + + override string toString() { + indirectionIndex = 1 and result = u.toString() + or + indirectionIndex > 1 and result = u.toString() + " indirection" + } + + Field getAField() { result = u.getAField() } + + pragma[inline] + int getIndirectionIndex() { + pragma[only_bind_into](result) = pragma[only_bind_out](indirectionIndex) + } +} + +/** A reference through an array. */ +class ArrayContent extends Content, TArrayContent { + override string toString() { result = "[]" } +} + +/** A reference through the contents of some collection-like container. */ +private class CollectionContent extends Content, TCollectionContent { + override string toString() { result = "" } +} + +/** + * An entity that represents a set of `Content`s. + * + * The set may be interpreted differently depending on whether it is + * stored into (`getAStoreContent`) or read from (`getAReadContent`). + */ +class ContentSet instanceof Content { + /** Gets a content that may be stored into when storing into this set. */ + Content getAStoreContent() { result = this } + + /** Gets a content that may be read from when reading from this set. */ + Content getAReadContent() { result = this } + + /** Gets a textual representation of this content set. */ + string toString() { result = super.toString() } + + /** + * Holds if this element is at the specified location. + * The location spans column `startcolumn` of line `startline` to + * column `endcolumn` of line `endline` in file `filepath`. + * For more information, see + * [Locations](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). + */ + predicate hasLocationInfo(string path, int sl, int sc, int el, int ec) { + super.hasLocationInfo(path, sl, sc, el, ec) + } +} + +private IRBlock getBasicBlock(Node node) { + node.asInstruction().getBlock() = result + or + node.asOperand().getUse().getBlock() = result + or + node.(SsaPhiNode).getPhiNode().getBasicBlock() = result + or + node.(IndirectOperand).getOperand().getUse().getBlock() = result + or + node.(IndirectInstruction).getInstruction().getBlock() = result + or + result = getBasicBlock(node.(PostUpdateNode).getPreUpdateNode()) +} + +/** + * Holds if the guard `g` validates the expression `e` upon evaluating to `branch`. + * + * The expression `e` is expected to be a syntactic part of the guard `g`. + * For example, the guard `g` might be a call `isSafe(x)` and the expression `e` + * the argument `x`. + */ +signature predicate guardChecksSig(IRGuardCondition g, Expr e, boolean branch); + +/** + * Provides a set of barrier nodes for a guard that validates an expression. + * + * This is expected to be used in `isBarrier`/`isSanitizer` definitions + * in data flow and taint tracking. + */ +module BarrierGuard { + /** Gets a node that is safely guarded by the given guard check. */ + ExprNode getABarrierNode() { + exists(IRGuardCondition g, Expr e, ValueNumber value, boolean edge | + e = value.getAnInstruction().getConvertedResultExpression() and + result.getConvertedExpr() = e and + guardChecks(g, value.getAnInstruction().getConvertedResultExpression(), edge) and + g.controls(getBasicBlock(result), edge) + ) + } +} + +/** + * Holds if the guard `g` validates the instruction `instr` upon evaluating to `branch`. + */ +signature predicate instructionGuardChecksSig(IRGuardCondition g, Instruction instr, boolean branch); + +/** + * Provides a set of barrier nodes for a guard that validates an instruction. + * + * This is expected to be used in `isBarrier`/`isSanitizer` definitions + * in data flow and taint tracking. + */ +module InstructionBarrierGuard { + /** Gets a node that is safely guarded by the given guard check. */ + ExprNode getABarrierNode() { + exists(IRGuardCondition g, ValueNumber value, boolean edge | + instructionGuardChecks(g, value.getAnInstruction(), edge) and + result.asInstruction() = value.getAnInstruction() and + g.controls(result.asInstruction().getBlock(), edge) + ) + } +} + +/** + * DEPRECATED: Use `BarrierGuard` module instead. + * + * A guard that validates some instruction. + * + * To use this in a configuration, extend the class and provide a + * characteristic predicate precisely specifying the guard, and override + * `checks` to specify what is being validated and in which branch. + * + * It is important that all extending classes in scope are disjoint. + */ +deprecated class BarrierGuard extends IRGuardCondition { + /** Override this predicate to hold if this guard validates `instr` upon evaluating to `b`. */ + predicate checksInstr(Instruction instr, boolean b) { none() } + + /** Override this predicate to hold if this guard validates `expr` upon evaluating to `b`. */ + predicate checks(Expr e, boolean b) { none() } + + /** Gets a node guarded by this guard. */ + final Node getAGuardedNode() { + exists(ValueNumber value, boolean edge | + ( + this.checksInstr(value.getAnInstruction(), edge) + or + this.checks(value.getAnInstruction().getConvertedResultExpression(), edge) + ) and + result.asInstruction() = value.getAnInstruction() and + this.controls(result.asInstruction().getBlock(), edge) + ) + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll new file mode 100644 index 000000000000..c302c6ef878f --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ModelUtil.qll @@ -0,0 +1,93 @@ +/** + * Provides predicates for mapping the `FunctionInput` and `FunctionOutput` + * classes used in function models to the corresponding instructions. + */ + +private import semmle.code.cpp.ir.IR +private import experimental.semmle.code.cpp.ir.dataflow.DataFlow +private import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowUtil +private import SsaInternals as Ssa + +/** + * Gets the instruction that goes into `input` for `call`. + */ +DataFlow::Node callInput(CallInstruction call, FunctionInput input) { + // An argument or qualifier + exists(int index | + result.asOperand() = call.getArgumentOperand(index) and + input.isParameterOrQualifierAddress(index) + ) + or + // A value pointed to by an argument or qualifier + exists(int index, int indirectionIndex | + hasOperandAndIndex(result, call.getArgumentOperand(index), indirectionIndex) and + input.isParameterDerefOrQualifierObject(index, indirectionIndex) + ) + or + exists(int ind | + result = getIndirectReturnOutNode(call, ind) and + input.isReturnValueDeref(ind) + ) +} + +/** + * Gets the instruction that holds the `output` for `call`. + */ +Node callOutput(CallInstruction call, FunctionOutput output) { + // The return value + result.asInstruction() = call and + output.isReturnValue() + or + // The side effect of a call on the value pointed to by an argument or qualifier + exists(int index, int indirectionIndex | + result.(IndirectArgumentOutNode).getArgumentIndex() = index and + result.(IndirectArgumentOutNode).getIndirectionIndex() + 1 = indirectionIndex and + result.(IndirectArgumentOutNode).getCallInstruction() = call and + output.isParameterDerefOrQualifierObject(index, indirectionIndex) + ) + or + exists(int ind | + result = getIndirectReturnOutNode(call, ind) and + output.isReturnValueDeref(ind) + ) +} + +DataFlow::Node callInput(CallInstruction call, FunctionInput input, int d) { + exists(DataFlow::Node n | n = callInput(call, input) and d > 0 | + // An argument or qualifier + hasOperandAndIndex(result, n.asOperand(), d) + or + exists(Operand operand, int indirectionIndex | + // A value pointed to by an argument or qualifier + hasOperandAndIndex(n, operand, indirectionIndex) and + hasOperandAndIndex(result, operand, indirectionIndex + d) + ) + ) +} + +private IndirectReturnOutNode getIndirectReturnOutNode(CallInstruction call, int d) { + result.getCallInstruction() = call and + result.getIndirectionIndex() = d +} + +/** + * Gets the instruction that holds the `output` for `call`. + */ +bindingset[d] +Node callOutput(CallInstruction call, FunctionOutput output, int d) { + exists(DataFlow::Node n | n = callOutput(call, output) and d > 0 | + // The return value + result = getIndirectReturnOutNode(n.asInstruction(), d) + or + // If there isn't an indirect out node for the call with indirection `d` then + // we conflate this with the underlying `CallInstruction`. + not exists(getIndirectReturnOutNode(call, d)) and + n.asInstruction() = result.asInstruction() + or + // The side effect of a call on the value pointed to by an argument or qualifier + exists(Operand operand, int indirectionIndex | + Ssa::outNodeHasAddressAndIndex(n, operand, indirectionIndex) and + Ssa::outNodeHasAddressAndIndex(result, operand, indirectionIndex + d) + ) + ) +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll new file mode 100644 index 000000000000..7359656e5a4b --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRLocalFlow.qll @@ -0,0 +1,136 @@ +private import cpp +// The `ValueNumbering` library has to be imported right after `cpp` to ensure +// that the cached IR gets the same checksum here as it does in queries that use +// `ValueNumbering` without `DataFlow`. +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.DataFlow +private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil +private import PrintIRUtilities + +/** + * Gets the local dataflow from other nodes in the same function to this node. + */ +private string getFromFlow(DataFlow::Node useNode, int order1, int order2) { + exists(DataFlow::Node defNode, string prefix | + ( + simpleLocalFlowStep(defNode, useNode) and prefix = "" + or + any(DataFlow::Configuration cfg).isAdditionalFlowStep(defNode, useNode) and + defNode.getEnclosingCallable() = useNode.getEnclosingCallable() and + prefix = "+" + ) and + if defNode.asInstruction() = useNode.asOperand().getAnyDef() + then + // Shorthand for flow from the def of this operand. + result = prefix + "def" and + order1 = -1 and + order2 = 0 + else + if defNode.asOperand().getUse() = useNode.asInstruction() + then + // Shorthand for flow from an operand of this instruction + result = prefix + defNode.asOperand().getDumpId() and + order1 = -1 and + order2 = defNode.asOperand().getDumpSortOrder() + else result = prefix + nodeId(defNode, order1, order2) + ) +} + +/** + * Gets the local dataflow from this node to other nodes in the same function. + */ +private string getToFlow(DataFlow::Node defNode, int order1, int order2) { + exists(DataFlow::Node useNode, string prefix | + ( + simpleLocalFlowStep(defNode, useNode) and prefix = "" + or + any(DataFlow::Configuration cfg).isAdditionalFlowStep(defNode, useNode) and + defNode.getEnclosingCallable() = useNode.getEnclosingCallable() and + prefix = "+" + ) and + if useNode.asInstruction() = defNode.asOperand().getUse() + then + // Shorthand for flow to this operand's instruction. + result = prefix + "result" and + order1 = -1 and + order2 = 0 + else result = prefix + nodeId(useNode, order1, order2) + ) +} + +/** + * Gets the properties of the dataflow node `node`. + */ +private string getNodeProperty(DataFlow::Node node, string key) { + // List dataflow into and out of this node. Flow into this node is printed as `src->@`, and flow + // out of this node is printed as `@->dest`. + key = "flow" and + result = + strictconcat(string flow, boolean to, int order1, int order2 | + flow = getFromFlow(node, order1, order2) + "->@" and to = false + or + flow = "@->" + getToFlow(node, order1, order2) and to = true + | + flow, ", " order by to, order1, order2, flow + ) + or + // Is this node a dataflow sink? + key = "sink" and + any(DataFlow::Configuration cfg).isSink(node) and + result = "true" + or + // Is this node a dataflow source? + key = "source" and + any(DataFlow::Configuration cfg).isSource(node) and + result = "true" + or + // Is this node a dataflow barrier, and if so, what kind? + key = "barrier" and + result = + strictconcat(string kind | + any(DataFlow::Configuration cfg).isBarrier(node) and kind = "full" + or + any(DataFlow::Configuration cfg).isBarrierIn(node) and kind = "in" + or + any(DataFlow::Configuration cfg).isBarrierOut(node) and kind = "out" + | + kind, ", " + ) + or + // Is there partial flow from a source to this node? + // This property will only be emitted if partial flow is enabled by overriding + // `DataFlow::Configration::explorationLimit()`. + key = "pflow" and + result = + strictconcat(DataFlow::PartialPathNode sourceNode, DataFlow::PartialPathNode destNode, int dist, + int order1, int order2 | + any(DataFlow::Configuration cfg).hasPartialFlow(sourceNode, destNode, dist) and + destNode.getNode() = node and + // Only print flow from a source in the same function. + sourceNode.getNode().getEnclosingCallable() = node.getEnclosingCallable() + | + nodeId(sourceNode.getNode(), order1, order2) + "+" + dist.toString(), ", " + order by + order1, order2, dist desc + ) +} + +/** + * Property provider for local IR dataflow. + */ +class LocalFlowPropertyProvider extends IRPropertyProvider { + override string getOperandProperty(Operand operand, string key) { + exists(DataFlow::Node node | + operand = node.asOperand() and + result = getNodeProperty(node, key) + ) + } + + override string getInstructionProperty(Instruction instruction, string key) { + exists(DataFlow::Node node | + instruction = node.asInstruction() and + result = getNodeProperty(node, key) + ) + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll new file mode 100644 index 000000000000..8c3182162170 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRStoreSteps.qll @@ -0,0 +1,33 @@ +/** + * Print the dataflow local store steps in IR dumps. + */ + +private import cpp +// The `ValueNumbering` library has to be imported right after `cpp` to ensure +// that the cached IR gets the same checksum here as it does in queries that use +// `ValueNumbering` without `DataFlow`. +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.DataFlow +private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil +private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate +private import PrintIRUtilities + +/** + * Property provider for local IR dataflow store steps. + */ +class LocalFlowPropertyProvider extends IRPropertyProvider { + override string getInstructionProperty(Instruction instruction, string key) { + exists(DataFlow::Node objectNode, Content content | + key = "content[" + content.toString() + "]" and + instruction = objectNode.asInstruction() and + result = + strictconcat(string element, DataFlow::Node fieldNode | + storeStep(fieldNode, content, objectNode) and + element = nodeId(fieldNode, _, _) + | + element, ", " + ) + ) + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll new file mode 100644 index 000000000000..5fc15cf986c7 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/PrintIRUtilities.qll @@ -0,0 +1,39 @@ +/** + * Shared utilities used when printing dataflow annotations in IR dumps. + */ + +private import cpp +// The `ValueNumbering` library has to be imported right after `cpp` to ensure +// that the cached IR gets the same checksum here as it does in queries that use +// `ValueNumbering` without `DataFlow`. +private import semmle.code.cpp.ir.ValueNumbering +private import semmle.code.cpp.ir.IR +private import semmle.code.cpp.ir.dataflow.DataFlow + +/** + * Gets a short ID for an IR dataflow node. + * - For `Instruction`s, this is just the result ID of the instruction (e.g. `m128`). + * - For `Operand`s, this is the label of the operand, prefixed with the result ID of the + * instruction and a dot (e.g. `m128.left`). + * - For `Variable`s, this is the qualified name of the variable. + */ +string nodeId(DataFlow::Node node, int order1, int order2) { + exists(Instruction instruction | instruction = node.asInstruction() | + result = instruction.getResultId() and + order1 = instruction.getBlock().getDisplayIndex() and + order2 = instruction.getDisplayIndexInBlock() + ) + or + exists(Operand operand, Instruction instruction | + operand = node.asOperand() and + instruction = operand.getUse() + | + result = instruction.getResultId() + "." + operand.getDumpId() and + order1 = instruction.getBlock().getDisplayIndex() and + order2 = instruction.getDisplayIndexInBlock() + ) + or + result = "var(" + node.asVariable().getQualifiedName() + ")" and + order1 = 1000000 and + order2 = 0 +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll new file mode 100644 index 000000000000..61ff5707d77f --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternals.qll @@ -0,0 +1,547 @@ +private import codeql.ssa.Ssa as SsaImplCommon +private import semmle.code.cpp.ir.IR +private import DataFlowUtil +private import DataFlowImplCommon as DataFlowImplCommon +private import semmle.code.cpp.models.interfaces.Allocation as Alloc +private import semmle.code.cpp.models.interfaces.DataFlow as DataFlow +private import semmle.code.cpp.ir.internal.IRCppLanguage +private import DataFlowPrivate +private import ssa0.SsaInternals as SsaInternals0 +import SsaInternalsCommon + +private module SourceVariables { + int getMaxIndirectionForIRVariable(IRVariable var) { + exists(Type type, boolean isGLValue | + var.getLanguageType().hasType(type, isGLValue) and + if isGLValue = true + then result = 1 + getMaxIndirectionsForType(type) + else result = getMaxIndirectionsForType(type) + ) + } + + class BaseSourceVariable = SsaInternals0::BaseSourceVariable; + + class BaseIRVariable = SsaInternals0::BaseIRVariable; + + class BaseCallVariable = SsaInternals0::BaseCallVariable; + + cached + private newtype TSourceVariable = + TSourceIRVariable(BaseIRVariable baseVar, int ind) { + ind = [0 .. getMaxIndirectionForIRVariable(baseVar.getIRVariable())] + } or + TCallVariable(AllocationInstruction call, int ind) { + ind = [0 .. countIndirectionsForCppType(getResultLanguageType(call))] + } + + abstract class SourceVariable extends TSourceVariable { + int ind; + + bindingset[ind] + SourceVariable() { any() } + + abstract string toString(); + + int getIndirection() { result = ind } + + abstract BaseSourceVariable getBaseVariable(); + } + + class SourceIRVariable extends SourceVariable, TSourceIRVariable { + BaseIRVariable var; + + SourceIRVariable() { this = TSourceIRVariable(var, ind) } + + IRVariable getIRVariable() { result = var.getIRVariable() } + + override BaseIRVariable getBaseVariable() { result.getIRVariable() = this.getIRVariable() } + + override string toString() { + ind = 0 and + result = this.getIRVariable().toString() + or + ind > 0 and + result = this.getIRVariable().toString() + " indirection" + } + } + + class CallVariable extends SourceVariable, TCallVariable { + AllocationInstruction call; + + CallVariable() { this = TCallVariable(call, ind) } + + AllocationInstruction getCall() { result = call } + + override BaseCallVariable getBaseVariable() { result.getCallInstruction() = call } + + override string toString() { + ind = 0 and + result = "Call" + or + ind > 0 and + result = "Call indirection" + } + } +} + +import SourceVariables + +predicate hasIndirectOperand(Operand op, int indirectionIndex) { + exists(CppType type, int m | + not ignoreOperand(op) and + type = getLanguageType(op) and + m = countIndirectionsForCppType(type) and + indirectionIndex = [1 .. m] + ) +} + +predicate hasIndirectInstruction(Instruction instr, int indirectionIndex) { + exists(CppType type, int m | + not ignoreInstruction(instr) and + type = getResultLanguageType(instr) and + m = countIndirectionsForCppType(type) and + indirectionIndex = [1 .. m] + ) +} + +cached +private newtype TDefOrUseImpl = + TDefImpl(Operand address, int indirectionIndex) { + isDef(_, _, address, _, _, indirectionIndex) and + // We only include the definition if the SSA pruning stage + // concluded that the definition is live after the write. + any(SsaInternals0::Def def).getAddressOperand() = address + } or + TUseImpl(Operand operand, int indirectionIndex) { + isUse(_, operand, _, _, indirectionIndex) and + not isDef(_, _, operand, _, _, _) + } + +abstract private class DefOrUseImpl extends TDefOrUseImpl { + /** Gets a textual representation of this element. */ + abstract string toString(); + + /** Gets the block of this definition or use. */ + abstract IRBlock getBlock(); + + /** Holds if this definition or use has index `index` in block `block`. */ + abstract predicate hasIndexInBlock(IRBlock block, int index); + + final predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) { + this.hasIndexInBlock(block, index) and + sv = this.getSourceVariable() + } + + /** Gets the location of this element. */ + abstract Cpp::Location getLocation(); + + /** + * Gets the index (i.e., the number of loads required) of this + * definition or use. + * + * Note that this is _not_ the definition's (or use's) index in + * the enclosing basic block. To obtain this index, use + * `DefOrUseImpl::hasIndexInBlock/2` or `DefOrUseImpl::hasIndexInBlock/3`. + */ + abstract int getIndirectionIndex(); + + /** + * Gets the instruction that computes the base of this definition or use. + * This is always a `VariableAddressInstruction` or an `AllocationInstruction`. + */ + abstract Instruction getBase(); + + final BaseSourceVariable getBaseSourceVariable() { + exists(IRVariable var | + result.(BaseIRVariable).getIRVariable() = var and + instructionHasIRVariable(this.getBase(), var) + ) + or + result.(BaseCallVariable).getCallInstruction() = this.getBase() + } + + /** Gets the variable that is defined or used. */ + final SourceVariable getSourceVariable() { + exists(BaseSourceVariable v, int ind | + sourceVariableHasBaseAndIndex(result, v, ind) and + defOrUseHasSourceVariable(this, v, ind) + ) + } +} + +pragma[noinline] +private predicate instructionHasIRVariable(VariableAddressInstruction vai, IRVariable var) { + vai.getIRVariable() = var +} + +private predicate defOrUseHasSourceVariable(DefOrUseImpl defOrUse, BaseSourceVariable bv, int ind) { + defHasSourceVariable(defOrUse, bv, ind) + or + useHasSourceVariable(defOrUse, bv, ind) +} + +pragma[noinline] +private predicate defHasSourceVariable(DefImpl def, BaseSourceVariable bv, int ind) { + bv = def.getBaseSourceVariable() and + ind = def.getIndirection() +} + +pragma[noinline] +private predicate useHasSourceVariable(UseImpl use, BaseSourceVariable bv, int ind) { + bv = use.getBaseSourceVariable() and + ind = use.getIndirection() +} + +pragma[noinline] +private predicate sourceVariableHasBaseAndIndex(SourceVariable v, BaseSourceVariable bv, int ind) { + v.getBaseVariable() = bv and + v.getIndirection() = ind +} + +class DefImpl extends DefOrUseImpl, TDefImpl { + Operand address; + int ind; + + DefImpl() { this = TDefImpl(address, ind) } + + override Instruction getBase() { isDef(_, _, address, result, _, _) } + + Operand getAddressOperand() { result = address } + + int getIndirection() { isDef(_, _, address, _, result, ind) } + + override int getIndirectionIndex() { result = ind } + + Instruction getDefiningInstruction() { isDef(_, result, address, _, _, _) } + + override string toString() { result = "DefImpl" } + + override IRBlock getBlock() { result = this.getDefiningInstruction().getBlock() } + + override Cpp::Location getLocation() { result = this.getDefiningInstruction().getLocation() } + + final override predicate hasIndexInBlock(IRBlock block, int index) { + this.getDefiningInstruction() = block.getInstruction(index) + } + + predicate isCertain() { isDef(true, _, address, _, _, ind) } +} + +class UseImpl extends DefOrUseImpl, TUseImpl { + Operand operand; + int ind; + + UseImpl() { this = TUseImpl(operand, ind) } + + Operand getOperand() { result = operand } + + override string toString() { result = "UseImpl" } + + final override predicate hasIndexInBlock(IRBlock block, int index) { + operand.getUse() = block.getInstruction(index) + } + + final override IRBlock getBlock() { result = operand.getUse().getBlock() } + + final override Cpp::Location getLocation() { result = operand.getLocation() } + + final int getIndirection() { isUse(_, operand, _, result, ind) } + + override int getIndirectionIndex() { result = ind } + + override Instruction getBase() { isUse(_, operand, result, _, ind) } + + predicate isCertain() { isUse(true, operand, _, _, ind) } +} + +/** + * Holds if `defOrUse1` is a definition which is first read by `use`, + * or if `defOrUse1` is a use and `use` is a next subsequent use. + * + * In both cases, `use` can either be an explicit use written in the + * source file, or it can be a phi node as computed by the SSA library. + */ +predicate adjacentDefRead(DefOrUse defOrUse1, UseOrPhi use) { + exists(IRBlock bb1, int i1, SourceVariable v | + defOrUse1.asDefOrUse().hasIndexInBlock(bb1, i1, v) + | + exists(IRBlock bb2, int i2 | + adjacentDefRead(_, pragma[only_bind_into](bb1), pragma[only_bind_into](i1), + pragma[only_bind_into](bb2), pragma[only_bind_into](i2)) + | + use.asDefOrUse().(UseImpl).hasIndexInBlock(bb2, i2, v) + ) + or + exists(PhiNode phi | + lastRefRedef(_, bb1, i1, phi) and + use.asPhi() = phi and + phi.getSourceVariable() = pragma[only_bind_into](v) + ) + ) +} + +private predicate useToNode(UseOrPhi use, Node nodeTo) { + exists(UseImpl useImpl | + useImpl = use.asDefOrUse() and + nodeHasOperand(nodeTo, useImpl.getOperand(), useImpl.getIndirectionIndex()) + ) + or + nodeTo.(SsaPhiNode).getPhiNode() = use.asPhi() +} + +pragma[noinline] +predicate outNodeHasAddressAndIndex( + IndirectArgumentOutNode out, Operand address, int indirectionIndex +) { + out.getAddressOperand() = address and + out.getIndirectionIndex() = indirectionIndex +} + +private predicate defToNode(Node nodeFrom, Def def) { + nodeHasInstruction(nodeFrom, def.getDefiningInstruction(), def.getIndirectionIndex()) +} + +private predicate nodeToDefOrUse(Node nodeFrom, SsaDefOrUse defOrUse) { + // Node -> Def + defToNode(nodeFrom, defOrUse) + or + // Node -> Use + useToNode(defOrUse, nodeFrom) +} + +/** + * Perform a single conversion-like step from `nFrom` to `nTo`. This relation + * only holds when there is no use-use relation out of `nTo`. + */ +private predicate indirectConversionFlowStep(Node nFrom, Node nTo) { + not exists(UseOrPhi defOrUse | + nodeToDefOrUse(nTo, defOrUse) and + adjacentDefRead(defOrUse, _) + ) and + exists(Operand op1, Operand op2, int indirectionIndex, Instruction instr | + hasOperandAndIndex(nFrom, op1, pragma[only_bind_into](indirectionIndex)) and + hasOperandAndIndex(nTo, op2, pragma[only_bind_into](indirectionIndex)) and + instr = op2.getDef() and + conversionFlow(op1, instr, _) + ) +} + +/** + * The reason for this predicate is a bit annoying: + * We cannot mark a `PointerArithmeticInstruction` that computes an offset based on some SSA + * variable `x` as a use of `x` since this creates taint-flow in the following example: + * ```c + * int x = array[source] + * sink(*array) + * ``` + * This is because `source` would flow from the operand of `PointerArithmeticInstruction` to the + * result of the instruction, and into the `IndirectOperand` that represents the value of `*array`. + * Then, via use-use flow, flow will arrive at `*array` in `sink(*array)`. + * + * So this predicate recurses back along conversions and `PointerArithmeticInstruction`s to find the + * first use that has provides use-use flow, and uses that target as the target of the `nodeFrom`. + */ +private predicate adjustForPointerArith(Node nodeFrom, UseOrPhi use) { + nodeFrom = any(PostUpdateNode pun).getPreUpdateNode() and + exists(DefOrUse defOrUse, Node adjusted | + indirectConversionFlowStep*(adjusted, nodeFrom) and + nodeToDefOrUse(adjusted, defOrUse) and + adjacentDefRead(defOrUse, use) + ) +} + +/** Holds if there is def-use or use-use flow from `nodeFrom` to `nodeTo`. */ +predicate ssaFlow(Node nodeFrom, Node nodeTo) { + // `nodeFrom = any(PostUpdateNode pun).getPreUpdateNode()` is implied by adjustedForPointerArith. + exists(UseOrPhi use | + adjustForPointerArith(nodeFrom, use) and + useToNode(use, nodeTo) + ) + or + not nodeFrom = any(PostUpdateNode pun).getPreUpdateNode() and + exists(DefOrUse defOrUse1, UseOrPhi use | + nodeToDefOrUse(nodeFrom, defOrUse1) and + adjacentDefRead(defOrUse1, use) and + useToNode(use, nodeTo) + ) +} + +/** Holds if `nodeTo` receives flow from the phi node `nodeFrom`. */ +predicate fromPhiNode(SsaPhiNode nodeFrom, Node nodeTo) { + exists(PhiNode phi, SourceVariable sv, IRBlock bb1, int i1, UseOrPhi use | + phi = nodeFrom.getPhiNode() and + phi.definesAt(sv, bb1, i1) and + useToNode(use, nodeTo) + | + exists(IRBlock bb2, int i2 | + use.asDefOrUse().hasIndexInBlock(bb2, i2, sv) and + adjacentDefRead(phi, bb1, i1, bb2, i2) + ) + or + exists(PhiNode phiTo | + lastRefRedef(phi, _, _, phiTo) and + nodeTo.(SsaPhiNode).getPhiNode() = phiTo + ) + ) +} + +private SsaInternals0::SourceVariable getOldSourceVariable(SourceVariable v) { + v.getBaseVariable().(BaseIRVariable).getIRVariable() = + result.getBaseVariable().(SsaInternals0::BaseIRVariable).getIRVariable() + or + v.getBaseVariable().(BaseCallVariable).getCallInstruction() = + result.getBaseVariable().(SsaInternals0::BaseCallVariable).getCallInstruction() +} + +/** + * Holds if there is a write at index `i` in basic block `bb` to variable `v` that's + * subsequently read (as determined by the SSA pruning stage). + */ +private predicate variableWriteCand(IRBlock bb, int i, SourceVariable v) { + exists(SsaInternals0::Def def, SsaInternals0::SourceVariable v0 | + def.asDefOrUse().hasIndexInBlock(bb, i, v0) and + v0 = getOldSourceVariable(v) + ) +} + +private module SsaInput implements SsaImplCommon::InputSig { + import InputSigCommon + import SourceVariables + + /** + * Holds if the `i`'th write in block `bb` writes to the variable `v`. + * `certain` is `true` if the write is guaranteed to overwrite the entire variable. + */ + predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) { + DataFlowImplCommon::forceCachingInSameStage() and + variableWriteCand(bb, i, v) and + exists(DefImpl def | def.hasIndexInBlock(bb, i, v) | + if def.isCertain() then certain = true else certain = false + ) + } + + /** + * Holds if the `i`'th read in block `bb` reads to the variable `v`. + * `certain` is `true` if the read is guaranteed. For C++, this is always the case. + */ + predicate variableRead(IRBlock bb, int i, SourceVariable v, boolean certain) { + exists(UseImpl use | use.hasIndexInBlock(bb, i, v) | + if use.isCertain() then certain = true else certain = false + ) + } +} + +/** + * The final SSA predicates used for dataflow purposes. + */ +cached +module SsaCached { + /** + * Holds if `def` is accessed at index `i1` in basic block `bb1` (either a read + * or a write), `def` is read at index `i2` in basic block `bb2`, and there is a + * path between them without any read of `def`. + */ + cached + predicate adjacentDefRead(Definition def, IRBlock bb1, int i1, IRBlock bb2, int i2) { + SsaImpl::adjacentDefRead(def, bb1, i1, bb2, i2) + } + + /** + * Holds if the node at index `i` in `bb` is a last reference to SSA definition + * `def`. The reference is last because it can reach another write `next`, + * without passing through another read or write. + */ + cached + predicate lastRefRedef(Definition def, IRBlock bb, int i, Definition next) { + SsaImpl::lastRefRedef(def, bb, i, next) + } +} + +cached +private newtype TSsaDefOrUse = + TDefOrUse(DefOrUseImpl defOrUse) { + defOrUse instanceof UseImpl + or + // Like in the pruning stage, we only include definition that's live after the + // write as the final definitions computed by SSA. + exists(Definition def, SourceVariable sv, IRBlock bb, int i | + def.definesAt(sv, bb, i) and + defOrUse.(DefImpl).hasIndexInBlock(bb, i, sv) + ) + } or + TPhi(PhiNode phi) + +abstract private class SsaDefOrUse extends TSsaDefOrUse { + string toString() { none() } + + DefOrUseImpl asDefOrUse() { none() } + + PhiNode asPhi() { none() } + + abstract Location getLocation(); +} + +class DefOrUse extends TDefOrUse, SsaDefOrUse { + DefOrUseImpl defOrUse; + + DefOrUse() { this = TDefOrUse(defOrUse) } + + final override DefOrUseImpl asDefOrUse() { result = defOrUse } + + final override Location getLocation() { result = defOrUse.getLocation() } + + final SourceVariable getSourceVariable() { result = defOrUse.getSourceVariable() } + + override string toString() { result = defOrUse.toString() } +} + +class Phi extends TPhi, SsaDefOrUse { + PhiNode phi; + + Phi() { this = TPhi(phi) } + + final override PhiNode asPhi() { result = phi } + + final override Location getLocation() { result = phi.getBasicBlock().getLocation() } + + override string toString() { result = "Phi" } +} + +class UseOrPhi extends SsaDefOrUse { + UseOrPhi() { + this.asDefOrUse() instanceof UseImpl + or + this instanceof Phi + } + + final override Location getLocation() { + result = this.asDefOrUse().getLocation() or result = this.(Phi).getLocation() + } +} + +class Def extends DefOrUse { + override DefImpl defOrUse; + + Operand getAddressOperand() { result = defOrUse.getAddressOperand() } + + Instruction getAddress() { result = this.getAddressOperand().getDef() } + + /** + * This predicate ensures that joins go from `defOrUse` to the result + * instead of the other way around. + */ + pragma[inline] + int getIndirectionIndex() { + pragma[only_bind_into](result) = pragma[only_bind_out](defOrUse).getIndirectionIndex() + } + + Instruction getDefiningInstruction() { result = defOrUse.getDefiningInstruction() } +} + +private module SsaImpl = SsaImplCommon::Make; + +class PhiNode = SsaImpl::PhiNode; + +class Definition = SsaImpl::Definition; + +import SsaCached diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll new file mode 100644 index 000000000000..36ab036c4e53 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/SsaInternalsCommon.qll @@ -0,0 +1,268 @@ +import cpp as Cpp +import semmle.code.cpp.ir.IR +import semmle.code.cpp.ir.internal.IRCppLanguage +private import semmle.code.cpp.ir.implementation.raw.internal.SideEffects as SideEffects +private import DataFlowImplCommon as DataFlowImplCommon +private import DataFlowUtil + +/** + * Holds if `operand` is an operand that is not used by the dataflow library. + * Ignored operands are not recognizd as uses by SSA, and they don't have a + * corresponding `(Indirect)OperandNode`. + */ +predicate ignoreOperand(Operand operand) { + operand = any(Instruction instr | ignoreInstruction(instr)).getAnOperand() +} + +/** + * Holds if `instr` is an instruction that is not used by the dataflow library. + * Ignored instructions are not recognized as reads/writes by SSA, and they + * don't have a corresponding `(Indirect)InstructionNode`. + */ +predicate ignoreInstruction(Instruction instr) { + DataFlowImplCommon::forceCachingInSameStage() and + ( + instr instanceof WriteSideEffectInstruction or + instr instanceof PhiInstruction or + instr instanceof ReadSideEffectInstruction or + instr instanceof ChiInstruction or + instr instanceof InitializeIndirectionInstruction + ) +} + +/** + * Gets the C++ type of `this` in the member function `f`. + * The result is a glvalue if `isGLValue` is true, and + * a prvalue if `isGLValue` is false. + */ +bindingset[isGLValue] +private CppType getThisType(Cpp::MemberFunction f, boolean isGLValue) { + result.hasType(f.getTypeOfThis(), isGLValue) +} + +/** + * Gets the C++ type of the instruction `i`. + * + * This is equivalent to `i.getResultLanguageType()` with the exception + * of instructions that directly references a `this` IRVariable. In this + * case, `i.getResultLanguageType()` gives an unknown type, whereas the + * predicate gives the expected type (i.e., a potentially cv-qualified + * type `A*` where `A` is the declaring type of the member function that + * contains `i`). + */ +cached +CppType getResultLanguageType(Instruction i) { + if i.(VariableAddressInstruction).getIRVariable() instanceof IRThisVariable + then + if i.isGLValue() + then result = getThisType(i.getEnclosingFunction(), true) + else result = getThisType(i.getEnclosingFunction(), false) + else result = i.getResultLanguageType() +} + +/** + * Gets the C++ type of the operand `operand`. + * This is equivalent to the type of the operand's defining instruction. + * + * See `getResultLanguageType` for a description of this behavior. + */ +CppType getLanguageType(Operand operand) { result = getResultLanguageType(operand.getDef()) } + +/** + * Gets the maximum number of indirections a glvalue of type `type` can have. + * For example: + * - If `type = int`, the result is 1 + * - If `type = MyStruct`, the result is 1 + * - If `type = char*`, the result is 2 + */ +int getMaxIndirectionsForType(Type type) { + result = countIndirectionsForCppType(getTypeForGLValue(type)) +} + +/** + * Gets the maximum number of indirections a value of type `type` can have. + * + * Note that this predicate is intended to be called on unspecified types + * (i.e., `countIndirections(e.getUnspecifiedType())`). + */ +private int countIndirections(Type t) { + result = + 1 + + countIndirections([t.(Cpp::PointerType).getBaseType(), t.(Cpp::ReferenceType).getBaseType()]) + or + not t instanceof Cpp::PointerType and + not t instanceof Cpp::ReferenceType and + result = 0 +} + +/** + * Gets the maximum number of indirections a value of C++ + * type `langType` can have. + */ +int countIndirectionsForCppType(LanguageType langType) { + exists(Type type | langType.hasType(type, true) | + result = 1 + countIndirections(type.getUnspecifiedType()) + ) + or + exists(Type type | langType.hasType(type, false) | + result = countIndirections(type.getUnspecifiedType()) + ) +} + +/** + * A `CallInstruction` that calls an allocation function such + * as `malloc` or `operator new`. + */ +class AllocationInstruction extends CallInstruction { + AllocationInstruction() { this.getStaticCallTarget() instanceof Cpp::AllocationFunction } +} + +/** + * Holds if `i` is a base instruction that starts a sequence of uses + * of some variable that SSA can handle. + * + * This is either when `i` is a `VariableAddressInstruction` or when + * `i` is a fresh allocation produced by an `AllocationInstruction`. + */ +private predicate isSourceVariableBase(Instruction i) { + i instanceof VariableAddressInstruction or i instanceof AllocationInstruction +} + +/** + * Holds if the value pointed to by `operand` can potentially be + * modified be the caller. + */ +predicate isModifiableByCall(ArgumentOperand operand) { + exists(CallInstruction call, int index, CppType type | + type = getLanguageType(operand) and + call.getArgumentOperand(index) = operand and + if index = -1 + then not call.getStaticCallTarget() instanceof Cpp::ConstMemberFunction + else not SideEffects::isConstPointerLike(any(Type t | type.hasType(t, _))) + ) +} + +cached +private module Cached { + /** + * Holds if `op` is a use of an SSA variable rooted at `base` with `ind` number + * of indirections. + * + * `certain` is `true` if the operand is guaranteed to read the variable, and + * `indirectionIndex` specifies the number of loads required to read the variable. + */ + cached + predicate isUse(boolean certain, Operand op, Instruction base, int ind, int indirectionIndex) { + not ignoreOperand(op) and + certain = true and + exists(LanguageType type, int m, int ind0 | + type = getLanguageType(op) and + m = countIndirectionsForCppType(type) and + isUseImpl(op, base, ind0) and + ind = ind0 + [0 .. m] and + indirectionIndex = ind - ind0 + ) + } + + /** + * Holds if `operand` is a use of an SSA variable rooted at `base`, and the + * path from `base` to `operand` passes through `ind` load-like instructions. + */ + private predicate isUseImpl(Operand operand, Instruction base, int ind) { + DataFlowImplCommon::forceCachingInSameStage() and + ind = 0 and + operand.getDef() = base and + isSourceVariableBase(base) + or + exists(Operand mid, Instruction instr | + isUseImpl(mid, base, ind) and + instr = operand.getDef() and + conversionFlow(mid, instr, false) + ) + or + exists(int ind0 | + isUseImpl(operand.getDef().(LoadInstruction).getSourceAddressOperand(), base, ind0) + or + isUseImpl(operand.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind0) + | + ind0 = ind - 1 + ) + } + + /** + * Holds if `address` is an address of an SSA variable rooted at `base`, + * and `instr` is a definition of the SSA variable with `ind` number of indirections. + * + * `certain` is `true` if `instr` is guaranteed to write to the variable, and + * `indirectionIndex` specifies the number of loads required to read the variable + * after the write operation. + */ + cached + predicate isDef( + boolean certain, Instruction instr, Operand address, Instruction base, int ind, + int indirectionIndex + ) { + certain = true and + exists(int ind0, CppType type, int m | + address = + [ + instr.(StoreInstruction).getDestinationAddressOperand(), + instr.(InitializeParameterInstruction).getAnOperand(), + instr.(InitializeDynamicAllocationInstruction).getAllocationAddressOperand(), + instr.(UninitializedInstruction).getAnOperand() + ] + | + isDefImpl(address, base, ind0) and + type = getLanguageType(address) and + m = countIndirectionsForCppType(type) and + ind = ind0 + [1 .. m] and + indirectionIndex = ind - (ind0 + 1) + ) + } + + /** + * Holds if `address` is a use of an SSA variable rooted at `base`, and the + * path from `base` to `address` passes through `ind` load-like instructions. + * + * Note: Unlike `isUseImpl`, this predicate recurses through pointer-arithmetic + * instructions. + */ + private predicate isDefImpl(Operand address, Instruction base, int ind) { + DataFlowImplCommon::forceCachingInSameStage() and + ind = 0 and + address.getDef() = base and + isSourceVariableBase(base) + or + exists(Operand mid, Instruction instr | + isDefImpl(mid, base, ind) and + instr = address.getDef() and + conversionFlow(mid, instr, _) + ) + or + exists(int ind0 | + isDefImpl(address.getDef().(LoadInstruction).getSourceAddressOperand(), base, ind0) + or + isDefImpl(address.getDef().(InitializeParameterInstruction).getAnOperand(), base, ind0) + | + ind0 = ind - 1 + ) + } +} + +import Cached + +/** + * Inputs to the shared SSA library's parameterized module that is shared + * between the SSA pruning stage, and the final SSA stage. + */ +module InputSigCommon { + class BasicBlock = IRBlock; + + BasicBlock getImmediateBasicBlockDominator(BasicBlock bb) { result.immediatelyDominates(bb) } + + BasicBlock getABasicBlockSuccessor(BasicBlock bb) { result = bb.getASuccessor() } + + class ExitBasicBlock extends IRBlock { + ExitBasicBlock() { this.getLastInstruction() instanceof ExitFunctionInstruction } + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll new file mode 100644 index 000000000000..1ab232923bfa --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/TaintTrackingUtil.qll @@ -0,0 +1,208 @@ +private import semmle.code.cpp.ir.IR +private import experimental.semmle.code.cpp.ir.dataflow.DataFlow +private import ModelUtil +private import semmle.code.cpp.models.interfaces.DataFlow +private import semmle.code.cpp.models.interfaces.SideEffect +private import DataFlowUtil +private import DataFlowPrivate +private import semmle.code.cpp.models.Models + +/** + * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +predicate localTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + DataFlow::localFlowStep(nodeFrom, nodeTo) + or + localAdditionalTaintStep(nodeFrom, nodeTo) +} + +/** + * Holds if taint can flow in one local step from `nodeFrom` to `nodeTo` excluding + * local data flow steps. That is, `nodeFrom` and `nodeTo` are likely to represent + * different objects. + */ +cached +predicate localAdditionalTaintStep(DataFlow::Node nodeFrom, DataFlow::Node nodeTo) { + operandToInstructionTaintStep(nodeFrom.asOperand(), nodeTo.asInstruction()) + or + modeledTaintStep(nodeFrom, nodeTo) + or + // Flow from `op` to `*op`. + exists(Operand operand, int indirectionIndex | + nodeHasOperand(nodeFrom, operand, indirectionIndex) and + nodeHasOperand(nodeTo, operand, indirectionIndex - 1) + ) + or + // Flow from `instr` to `*instr`. + exists(Instruction instr, int indirectionIndex | + nodeHasInstruction(nodeFrom, instr, indirectionIndex) and + nodeHasInstruction(nodeTo, instr, indirectionIndex - 1) + ) + or + // Flow from (the indirection of) an operand of a pointer arithmetic instruction to the + // indirection of the pointer arithmetic instruction. This provides flow from `source` + // in `x[source]` to the result of the associated load instruction. + exists(PointerArithmeticInstruction pai, int indirectionIndex | + nodeHasOperand(nodeFrom, pai.getAnOperand(), pragma[only_bind_into](indirectionIndex)) and + hasInstructionAndIndex(nodeTo, pai, indirectionIndex + 1) + ) +} + +/** + * Holds if taint propagates from `nodeFrom` to `nodeTo` in exactly one local + * (intra-procedural) step. + */ +private predicate operandToInstructionTaintStep(Operand opFrom, Instruction instrTo) { + // Taint can flow through expressions that alter the value but preserve + // more than one bit of it _or_ expressions that follow data through + // pointer indirections. + instrTo.getAnOperand() = opFrom and + ( + instrTo instanceof ArithmeticInstruction + or + instrTo instanceof BitwiseInstruction + or + instrTo instanceof PointerArithmeticInstruction + ) + or + // The `CopyInstruction` case is also present in non-taint data flow, but + // that uses `getDef` rather than `getAnyDef`. For taint, we want flow + // from a definition of `myStruct` to a `myStruct.myField` expression. + instrTo.(LoadInstruction).getSourceAddressOperand() = opFrom + or + // Unary instructions tend to preserve enough information in practice that we + // want taint to flow through. + // The exception is `FieldAddressInstruction`. Together with the rules below for + // `LoadInstruction`s and `ChiInstruction`s, flow through `FieldAddressInstruction` + // could cause flow into one field to come out an unrelated field. + // This would happen across function boundaries, where the IR would not be able to + // match loads to stores. + instrTo.(UnaryInstruction).getUnaryOperand() = opFrom and + ( + not instrTo instanceof FieldAddressInstruction + or + instrTo.(FieldAddressInstruction).getField().getDeclaringType() instanceof Union + ) +} + +/** + * Holds if taint may propagate from `source` to `sink` in zero or more local + * (intra-procedural) steps. + */ +pragma[inline] +predicate localTaint(DataFlow::Node source, DataFlow::Node sink) { localTaintStep*(source, sink) } + +/** + * Holds if taint can flow from `i1` to `i2` in zero or more + * local (intra-procedural) steps. + */ +pragma[inline] +predicate localInstructionTaint(Instruction i1, Instruction i2) { + localTaint(DataFlow::instructionNode(i1), DataFlow::instructionNode(i2)) +} + +/** + * Holds if taint can flow from `e1` to `e2` in zero or more + * local (intra-procedural) steps. + */ +pragma[inline] +predicate localExprTaint(Expr e1, Expr e2) { + localTaint(DataFlow::exprNode(e1), DataFlow::exprNode(e2)) +} + +/** + * Holds if the additional step from `src` to `sink` should be included in all + * global taint flow configurations. + */ +predicate defaultAdditionalTaintStep(DataFlow::Node src, DataFlow::Node sink) { + localAdditionalTaintStep(src, sink) +} + +/** + * Holds if default `TaintTracking::Configuration`s should allow implicit reads + * of `c` at sinks and inputs to additional taint steps. + */ +bindingset[node] +predicate defaultImplicitTaintRead(DataFlow::Node node, DataFlow::Content c) { none() } + +/** + * Holds if `node` should be a sanitizer in all global taint flow configurations + * but not in local taint. + */ +predicate defaultTaintSanitizer(DataFlow::Node node) { none() } + +/** + * Holds if taint can flow from `instrIn` to `instrOut` through a call to a + * modeled function. + */ +predicate modeledTaintStep(DataFlow::Node nodeIn, DataFlow::Node nodeOut) { + // Normal taint steps + exists(CallInstruction call, TaintFunction func, FunctionInput modelIn, FunctionOutput modelOut | + call.getStaticCallTarget() = func and + func.hasTaintFlow(modelIn, modelOut) + | + ( + nodeIn = callInput(call, modelIn) + or + exists(int n | + modelIn.isParameterDerefOrQualifierObject(n) and + if n = -1 + then nodeIn = callInput(call, any(InQualifierAddress inQualifier)) + else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n)) + ) + ) and + nodeOut = callOutput(call, modelOut) + or + exists(int d | + nodeIn = callInput(call, modelIn, d) + or + exists(int n | + d = 1 and + modelIn.isParameterDerefOrQualifierObject(n) and + if n = -1 + then nodeIn = callInput(call, any(InQualifierAddress inQualifier)) + else nodeIn = callInput(call, any(InParameter inParam | inParam.getIndex() = n)) + ) + | + call.getStaticCallTarget() = func and + func.hasTaintFlow(modelIn, modelOut) and + nodeOut = callOutput(call, modelOut, d) + ) + ) + or + // Taint flow from one argument to another and data flow from an argument to a + // return value. This happens in functions like `strcat` and `memcpy`. We + // could model this flow in two separate steps, but that would add reverse + // flow from the write side-effect to the call instruction, which may not be + // desirable. + exists( + CallInstruction call, Function func, FunctionInput modelIn, OutParameterDeref modelMidOut, + int indexMid, InParameter modelMidIn, OutReturnValue modelOut + | + nodeIn = callInput(call, modelIn) and + nodeOut = callOutput(call, modelOut) and + call.getStaticCallTarget() = func and + func.(TaintFunction).hasTaintFlow(modelIn, modelMidOut) and + func.(DataFlowFunction).hasDataFlow(modelMidIn, modelOut) and + modelMidOut.isParameterDeref(indexMid) and + modelMidIn.isParameter(indexMid) + ) + or + // Taint flow from a pointer argument to an output, when the model specifies flow from the deref + // to that output, but the deref is not modeled in the IR for the caller. + exists( + CallInstruction call, DataFlow::SideEffectOperandNode indirectArgument, Function func, + FunctionInput modelIn, FunctionOutput modelOut + | + indirectArgument = callInput(call, modelIn) and + indirectArgument.getAddressOperand() = nodeIn.asOperand() and + call.getStaticCallTarget() = func and + ( + func.(DataFlowFunction).hasDataFlow(modelIn, modelOut) + or + func.(TaintFunction).hasTaintFlow(modelIn, modelOut) + ) and + nodeOut = callOutput(call, modelOut) + ) +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll new file mode 100644 index 000000000000..3a89f1d170f3 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/ssa0/SsaInternals.qll @@ -0,0 +1,314 @@ +/** + * This module defines an initial SSA pruning stage that doesn't take + * indirections into account. + */ + +private import codeql.ssa.Ssa as SsaImplCommon +private import semmle.code.cpp.ir.IR +private import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowImplCommon as DataFlowImplCommon +private import semmle.code.cpp.models.interfaces.Allocation as Alloc +private import semmle.code.cpp.models.interfaces.DataFlow as DataFlow +private import semmle.code.cpp.ir.implementation.raw.internal.SideEffects as SideEffects +private import semmle.code.cpp.ir.internal.IRCppLanguage +private import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate +private import experimental.semmle.code.cpp.ir.dataflow.internal.DataFlowUtil +private import experimental.semmle.code.cpp.ir.dataflow.internal.SsaInternalsCommon + +private module SourceVariables { + newtype TBaseSourceVariable = + // Each IR variable gets its own source variable + TBaseIRVariable(IRVariable var) or + // Each allocation gets its own source variable + TBaseCallVariable(AllocationInstruction call) + + abstract class BaseSourceVariable extends TBaseSourceVariable { + abstract string toString(); + + abstract DataFlowType getType(); + } + + class BaseIRVariable extends BaseSourceVariable, TBaseIRVariable { + IRVariable var; + + IRVariable getIRVariable() { result = var } + + BaseIRVariable() { this = TBaseIRVariable(var) } + + override string toString() { result = var.toString() } + + override DataFlowType getType() { result = var.getIRType() } + } + + class BaseCallVariable extends BaseSourceVariable, TBaseCallVariable { + AllocationInstruction call; + + BaseCallVariable() { this = TBaseCallVariable(call) } + + AllocationInstruction getCallInstruction() { result = call } + + override string toString() { result = call.toString() } + + override DataFlowType getType() { result = call.getResultIRType() } + } + + private newtype TSourceVariable = + TSourceIRVariable(BaseIRVariable baseVar) or + TCallVariable(AllocationInstruction call) + + abstract class SourceVariable extends TSourceVariable { + abstract string toString(); + + abstract BaseSourceVariable getBaseVariable(); + } + + class SourceIRVariable extends SourceVariable, TSourceIRVariable { + BaseIRVariable var; + + SourceIRVariable() { this = TSourceIRVariable(var) } + + IRVariable getIRVariable() { result = var.getIRVariable() } + + override BaseIRVariable getBaseVariable() { result.getIRVariable() = this.getIRVariable() } + + override string toString() { result = this.getIRVariable().toString() } + } + + class CallVariable extends SourceVariable, TCallVariable { + AllocationInstruction call; + + CallVariable() { this = TCallVariable(call) } + + AllocationInstruction getCall() { result = call } + + override BaseCallVariable getBaseVariable() { result.getCallInstruction() = call } + + override string toString() { result = "Call" } + } +} + +import SourceVariables + +private newtype TDefOrUseImpl = + TDefImpl(Operand address) { isDef(_, _, address, _, _, _) } or + TUseImpl(Operand operand) { + isUse(_, operand, _, _, _) and + not isDef(_, _, operand, _, _, _) + } + +abstract private class DefOrUseImpl extends TDefOrUseImpl { + /** Gets a textual representation of this element. */ + abstract string toString(); + + /** Gets the block of this definition or use. */ + abstract IRBlock getBlock(); + + /** Holds if this definition or use has index `index` in block `block`. */ + abstract predicate hasIndexInBlock(IRBlock block, int index); + + final predicate hasIndexInBlock(IRBlock block, int index, SourceVariable sv) { + this.hasIndexInBlock(block, index) and + sv = this.getSourceVariable() + } + + /** Gets the location of this element. */ + abstract Cpp::Location getLocation(); + + abstract Instruction getBase(); + + final BaseSourceVariable getBaseSourceVariable() { + exists(IRVariable var | + result.(BaseIRVariable).getIRVariable() = var and + instructionHasIRVariable(this.getBase(), var) + ) + or + result.(BaseCallVariable).getCallInstruction() = this.getBase() + } + + /** Gets the variable that is defined or used. */ + final SourceVariable getSourceVariable() { + exists(BaseSourceVariable v | + sourceVariableHasBaseAndIndex(result, v) and + defOrUseHasSourceVariable(this, v) + ) + } +} + +pragma[noinline] +private predicate instructionHasIRVariable(VariableAddressInstruction vai, IRVariable var) { + vai.getIRVariable() = var +} + +private predicate defOrUseHasSourceVariable(DefOrUseImpl defOrUse, BaseSourceVariable bv) { + defHasSourceVariable(defOrUse, bv) + or + useHasSourceVariable(defOrUse, bv) +} + +pragma[noinline] +private predicate defHasSourceVariable(DefImpl def, BaseSourceVariable bv) { + bv = def.getBaseSourceVariable() +} + +pragma[noinline] +private predicate useHasSourceVariable(UseImpl use, BaseSourceVariable bv) { + bv = use.getBaseSourceVariable() +} + +pragma[noinline] +private predicate sourceVariableHasBaseAndIndex(SourceVariable v, BaseSourceVariable bv) { + v.getBaseVariable() = bv +} + +class DefImpl extends DefOrUseImpl, TDefImpl { + Operand address; + + DefImpl() { this = TDefImpl(address) } + + override Instruction getBase() { isDef(_, _, address, result, _, _) } + + Operand getAddressOperand() { result = address } + + Instruction getDefiningInstruction() { isDef(_, result, address, _, _, _) } + + override string toString() { result = address.toString() } + + override IRBlock getBlock() { result = this.getDefiningInstruction().getBlock() } + + override Cpp::Location getLocation() { result = this.getDefiningInstruction().getLocation() } + + final override predicate hasIndexInBlock(IRBlock block, int index) { + this.getDefiningInstruction() = block.getInstruction(index) + } + + predicate isCertain() { isDef(true, _, address, _, _, _) } +} + +class UseImpl extends DefOrUseImpl, TUseImpl { + Operand operand; + + UseImpl() { this = TUseImpl(operand) } + + Operand getOperand() { result = operand } + + override string toString() { result = operand.toString() } + + final override predicate hasIndexInBlock(IRBlock block, int index) { + operand.getUse() = block.getInstruction(index) + } + + final override IRBlock getBlock() { result = operand.getUse().getBlock() } + + final override Cpp::Location getLocation() { result = operand.getLocation() } + + override Instruction getBase() { isUse(_, operand, result, _, _) } + + predicate isCertain() { isUse(true, operand, _, _, _) } +} + +private module SsaInput implements SsaImplCommon::InputSig { + import InputSigCommon + import SourceVariables + + /** + * Holds if the `i`'th write in block `bb` writes to the variable `v`. + * `certain` is `true` if the write is guaranteed to overwrite the entire variable. + */ + predicate variableWrite(IRBlock bb, int i, SourceVariable v, boolean certain) { + DataFlowImplCommon::forceCachingInSameStage() and + exists(DefImpl def | def.hasIndexInBlock(bb, i, v) | + if def.isCertain() then certain = true else certain = false + ) + } + + /** + * Holds if the `i`'th read in block `bb` reads to the variable `v`. + * `certain` is `true` if the read is guaranteed. + */ + predicate variableRead(IRBlock bb, int i, SourceVariable v, boolean certain) { + exists(UseImpl use | use.hasIndexInBlock(bb, i, v) | + if use.isCertain() then certain = true else certain = false + ) + } +} + +private newtype TSsaDefOrUse = + TDefOrUse(DefOrUseImpl defOrUse) { + defOrUse instanceof UseImpl + or + // If `defOrUse` is a definition we only include it if the + // SSA library concludes that it's live after the write. + exists(Definition def, SourceVariable sv, IRBlock bb, int i | + def.definesAt(sv, bb, i) and + defOrUse.(DefImpl).hasIndexInBlock(bb, i, sv) + ) + } or + TPhi(PhiNode phi) + +abstract private class SsaDefOrUse extends TSsaDefOrUse { + string toString() { result = "SsaDefOrUse" } + + DefOrUseImpl asDefOrUse() { none() } + + PhiNode asPhi() { none() } + + abstract Location getLocation(); +} + +class DefOrUse extends TDefOrUse, SsaDefOrUse { + DefOrUseImpl defOrUse; + + DefOrUse() { this = TDefOrUse(defOrUse) } + + final override DefOrUseImpl asDefOrUse() { result = defOrUse } + + final override Location getLocation() { result = defOrUse.getLocation() } + + final SourceVariable getSourceVariable() { result = defOrUse.getSourceVariable() } +} + +class Phi extends TPhi, SsaDefOrUse { + PhiNode phi; + + Phi() { this = TPhi(phi) } + + final override PhiNode asPhi() { result = phi } + + final override Location getLocation() { result = phi.getBasicBlock().getLocation() } +} + +class UseOrPhi extends SsaDefOrUse { + UseOrPhi() { + this.asDefOrUse() instanceof UseImpl + or + this instanceof Phi + } + + final override Location getLocation() { + result = this.asDefOrUse().getLocation() or result = this.(Phi).getLocation() + } + + override string toString() { + result = this.asDefOrUse().toString() + or + this instanceof Phi and + result = "Phi" + } +} + +class Def extends DefOrUse { + override DefImpl defOrUse; + + Operand getAddressOperand() { result = defOrUse.getAddressOperand() } + + Instruction getAddress() { result = this.getAddressOperand().getDef() } + + Instruction getDefiningInstruction() { result = defOrUse.getDefiningInstruction() } + + override string toString() { result = this.asDefOrUse().toString() + " (def)" } +} + +private module SsaImpl = SsaImplCommon::Make; + +class PhiNode = SsaImpl::PhiNode; + +class Definition = SsaImpl::Definition; diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll new file mode 100644 index 000000000000..e6ce1ada8d4c --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingImpl.qll @@ -0,0 +1,186 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSource(DataFlow::Node source) { none() } + + /** + * Holds if `source` is a relevant taint source with the given initial + * `state`. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() } + + /** + * Holds if `sink` is a relevant taint sink + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSink(DataFlow::Node sink) { none() } + + /** + * Holds if `sink` is a relevant taint sink accepting `state`. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() } + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + this.isSanitizer(node) or + defaultTaintSanitizer(node) + } + + /** + * Holds if the node `node` is a taint sanitizer when the flow state is + * `state`. + */ + predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() } + + final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) { + this.isSanitizer(node, state) + } + + /** Holds if taint propagation into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) } + + /** Holds if taint propagation out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) } + + /** + * DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead. + * + * Holds if taint propagation through nodes guarded by `guard` is prohibited. + */ + deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { + this.isSanitizerGuard(guard) + } + + /** + * DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead. + * + * Holds if taint propagation through nodes guarded by `guard` is prohibited + * when the flow state is `state`. + */ + deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { + none() + } + + deprecated final override predicate isBarrierGuard( + DataFlow::BarrierGuard guard, DataFlow::FlowState state + ) { + this.isSanitizerGuard(guard, state) + } + + /** + * Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + this.isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + /** + * Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalTaintStep( + DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2, + DataFlow::FlowState state2 + ) { + none() + } + + final override predicate isAdditionalFlowStep( + DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2, + DataFlow::FlowState state2 + ) { + this.isAdditionalTaintStep(node1, state1, node2, state2) + } + + override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) { + (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and + defaultImplicitTaintRead(node, c) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingParameter.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingParameter.qll new file mode 100644 index 000000000000..07185a4ad57e --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking1/TaintTrackingParameter.qll @@ -0,0 +1,5 @@ +import experimental.semmle.code.cpp.ir.dataflow.internal.TaintTrackingUtil as Public + +module Private { + import experimental.semmle.code.cpp.ir.dataflow.DataFlow::DataFlow as DataFlow +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll new file mode 100644 index 000000000000..e6ce1ada8d4c --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingImpl.qll @@ -0,0 +1,186 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSource(DataFlow::Node source) { none() } + + /** + * Holds if `source` is a relevant taint source with the given initial + * `state`. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() } + + /** + * Holds if `sink` is a relevant taint sink + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSink(DataFlow::Node sink) { none() } + + /** + * Holds if `sink` is a relevant taint sink accepting `state`. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() } + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + this.isSanitizer(node) or + defaultTaintSanitizer(node) + } + + /** + * Holds if the node `node` is a taint sanitizer when the flow state is + * `state`. + */ + predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() } + + final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) { + this.isSanitizer(node, state) + } + + /** Holds if taint propagation into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) } + + /** Holds if taint propagation out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) } + + /** + * DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead. + * + * Holds if taint propagation through nodes guarded by `guard` is prohibited. + */ + deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { + this.isSanitizerGuard(guard) + } + + /** + * DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead. + * + * Holds if taint propagation through nodes guarded by `guard` is prohibited + * when the flow state is `state`. + */ + deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { + none() + } + + deprecated final override predicate isBarrierGuard( + DataFlow::BarrierGuard guard, DataFlow::FlowState state + ) { + this.isSanitizerGuard(guard, state) + } + + /** + * Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + this.isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + /** + * Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalTaintStep( + DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2, + DataFlow::FlowState state2 + ) { + none() + } + + final override predicate isAdditionalFlowStep( + DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2, + DataFlow::FlowState state2 + ) { + this.isAdditionalTaintStep(node1, state1, node2, state2) + } + + override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) { + (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and + defaultImplicitTaintRead(node, c) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingParameter.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingParameter.qll new file mode 100644 index 000000000000..7d545fe5d043 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking2/TaintTrackingParameter.qll @@ -0,0 +1,5 @@ +import experimental.semmle.code.cpp.ir.dataflow.internal.TaintTrackingUtil as Public + +module Private { + import experimental.semmle.code.cpp.ir.dataflow.DataFlow2::DataFlow2 as DataFlow +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll new file mode 100644 index 000000000000..e6ce1ada8d4c --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingImpl.qll @@ -0,0 +1,186 @@ +/** + * Provides an implementation of global (interprocedural) taint tracking. + * This file re-exports the local (intraprocedural) taint-tracking analysis + * from `TaintTrackingParameter::Public` and adds a global analysis, mainly + * exposed through the `Configuration` class. For some languages, this file + * exists in several identical copies, allowing queries to use multiple + * `Configuration` classes that depend on each other without introducing + * mutual recursion among those configurations. + */ + +import TaintTrackingParameter::Public +private import TaintTrackingParameter::Private + +/** + * A configuration of interprocedural taint tracking analysis. This defines + * sources, sinks, and any other configurable aspect of the analysis. Each + * use of the taint tracking library must define its own unique extension of + * this abstract class. + * + * A taint-tracking configuration is a special data flow configuration + * (`DataFlow::Configuration`) that allows for flow through nodes that do not + * necessarily preserve values but are still relevant from a taint tracking + * perspective. (For example, string concatenation, where one of the operands + * is tainted.) + * + * To create a configuration, extend this class with a subclass whose + * characteristic predicate is a unique singleton string. For example, write + * + * ```ql + * class MyAnalysisConfiguration extends TaintTracking::Configuration { + * MyAnalysisConfiguration() { this = "MyAnalysisConfiguration" } + * // Override `isSource` and `isSink`. + * // Optionally override `isSanitizer`. + * // Optionally override `isSanitizerIn`. + * // Optionally override `isSanitizerOut`. + * // Optionally override `isSanitizerGuard`. + * // Optionally override `isAdditionalTaintStep`. + * } + * ``` + * + * Then, to query whether there is flow between some `source` and `sink`, + * write + * + * ```ql + * exists(MyAnalysisConfiguration cfg | cfg.hasFlow(source, sink)) + * ``` + * + * Multiple configurations can coexist, but it is unsupported to depend on + * another `TaintTracking::Configuration` or a `DataFlow::Configuration` in the + * overridden predicates that define sources, sinks, or additional steps. + * Instead, the dependency should go to a `TaintTracking2::Configuration` or a + * `DataFlow2::Configuration`, `DataFlow3::Configuration`, etc. + */ +abstract class Configuration extends DataFlow::Configuration { + bindingset[this] + Configuration() { any() } + + /** + * Holds if `source` is a relevant taint source. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSource(DataFlow::Node source) { none() } + + /** + * Holds if `source` is a relevant taint source with the given initial + * `state`. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSource(DataFlow::Node source, DataFlow::FlowState state) { none() } + + /** + * Holds if `sink` is a relevant taint sink + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSink(DataFlow::Node sink) { none() } + + /** + * Holds if `sink` is a relevant taint sink accepting `state`. + * + * The smaller this predicate is, the faster `hasFlow()` will converge. + */ + // overridden to provide taint-tracking specific qldoc + override predicate isSink(DataFlow::Node sink, DataFlow::FlowState state) { none() } + + /** Holds if the node `node` is a taint sanitizer. */ + predicate isSanitizer(DataFlow::Node node) { none() } + + final override predicate isBarrier(DataFlow::Node node) { + this.isSanitizer(node) or + defaultTaintSanitizer(node) + } + + /** + * Holds if the node `node` is a taint sanitizer when the flow state is + * `state`. + */ + predicate isSanitizer(DataFlow::Node node, DataFlow::FlowState state) { none() } + + final override predicate isBarrier(DataFlow::Node node, DataFlow::FlowState state) { + this.isSanitizer(node, state) + } + + /** Holds if taint propagation into `node` is prohibited. */ + predicate isSanitizerIn(DataFlow::Node node) { none() } + + final override predicate isBarrierIn(DataFlow::Node node) { this.isSanitizerIn(node) } + + /** Holds if taint propagation out of `node` is prohibited. */ + predicate isSanitizerOut(DataFlow::Node node) { none() } + + final override predicate isBarrierOut(DataFlow::Node node) { this.isSanitizerOut(node) } + + /** + * DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead. + * + * Holds if taint propagation through nodes guarded by `guard` is prohibited. + */ + deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard) { none() } + + deprecated final override predicate isBarrierGuard(DataFlow::BarrierGuard guard) { + this.isSanitizerGuard(guard) + } + + /** + * DEPRECATED: Use `isSanitizer` and `BarrierGuard` module instead. + * + * Holds if taint propagation through nodes guarded by `guard` is prohibited + * when the flow state is `state`. + */ + deprecated predicate isSanitizerGuard(DataFlow::BarrierGuard guard, DataFlow::FlowState state) { + none() + } + + deprecated final override predicate isBarrierGuard( + DataFlow::BarrierGuard guard, DataFlow::FlowState state + ) { + this.isSanitizerGuard(guard, state) + } + + /** + * Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps. + */ + predicate isAdditionalTaintStep(DataFlow::Node node1, DataFlow::Node node2) { none() } + + final override predicate isAdditionalFlowStep(DataFlow::Node node1, DataFlow::Node node2) { + this.isAdditionalTaintStep(node1, node2) or + defaultAdditionalTaintStep(node1, node2) + } + + /** + * Holds if taint may propagate from `node1` to `node2` in addition to the normal data-flow and taint steps. + * This step is only applicable in `state1` and updates the flow state to `state2`. + */ + predicate isAdditionalTaintStep( + DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2, + DataFlow::FlowState state2 + ) { + none() + } + + final override predicate isAdditionalFlowStep( + DataFlow::Node node1, DataFlow::FlowState state1, DataFlow::Node node2, + DataFlow::FlowState state2 + ) { + this.isAdditionalTaintStep(node1, state1, node2, state2) + } + + override predicate allowImplicitRead(DataFlow::Node node, DataFlow::ContentSet c) { + (this.isSink(node) or this.isAdditionalTaintStep(node, _)) and + defaultImplicitTaintRead(node, c) + } + + /** + * Holds if taint may flow from `source` to `sink` for this configuration. + */ + // overridden to provide taint-tracking specific qldoc + override predicate hasFlow(DataFlow::Node source, DataFlow::Node sink) { + super.hasFlow(source, sink) + } +} diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingParameter.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingParameter.qll new file mode 100644 index 000000000000..3e69abcf4a78 --- /dev/null +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/internal/tainttracking3/TaintTrackingParameter.qll @@ -0,0 +1,5 @@ +import experimental.semmle.code.cpp.ir.dataflow.internal.TaintTrackingUtil as Public + +module Private { + import experimental.semmle.code.cpp.ir.dataflow.DataFlow3::DataFlow3 as DataFlow +} From 5509562fe616af7c18ca8ac74d434a9e0059c414 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 26 Aug 2022 14:55:20 +0100 Subject: [PATCH 2/6] C++: Repair a few broken models that were incorrectly a pointer as tainted (instead of the pointee), or vice versa. Because of existing dataflow pointer/pointee conflation we never noticed that, but since this PR removes those imprecisions we now need to update these models. --- .../lib/semmle/code/cpp/models/implementations/Iterator.qll | 2 +- .../semmle/code/cpp/models/implementations/StdContainer.qll | 2 +- .../semmle/code/cpp/models/implementations/StdString.qll | 6 +++--- .../lib/semmle/code/cpp/models/implementations/Strcat.qll | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll index 2fa9803d0533..8d0e97dc2ec3 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll @@ -223,7 +223,7 @@ private class IteratorCrementMemberOperator extends MemberFunction, DataFlowFunc output.isQualifierObject() or input.isQualifierObject() and - output.isReturnValueDeref() + output.isReturnValue() } override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll index c93a5ad147b3..8c531891bcda 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/StdContainer.qll @@ -176,7 +176,7 @@ private class StdSequenceContainerInsert extends TaintFunction { ) and ( output.isQualifierObject() or - output.isReturnValueDeref() + output.isReturnValue() ) } } diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll index ae190688b701..b5c18a8e09fd 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll @@ -176,7 +176,7 @@ private class StdStringAppend extends TaintFunction { ) and ( output.isQualifierObject() or - output.isReturnValueDeref() + output.isReturnValue() ) or // reverse flow from returned reference to the qualifier (for writes to @@ -543,11 +543,11 @@ private class StdOStreamOutNonMember extends DataFlowFunction, TaintFunction { override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { // flow from second parameter to first parameter - input.isParameter(1) and + input.isParameterDeref(1) and output.isParameterDeref(0) or // flow from second parameter to return value - input.isParameter(1) and + input.isParameterDeref(1) and output.isReturnValueDeref() or // reverse flow from returned reference to the first parameter diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcat.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcat.qll index 1a65e7b6ca47..e729c3cb0a41 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcat.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Strcat.qll @@ -61,7 +61,7 @@ class StrcatFunction extends TaintFunction, DataFlowFunction, ArrayFunction, Sid input.isParameterDeref(0) and output.isParameterDeref(0) or - input.isParameter(1) and + input.isParameterDeref(1) and output.isParameterDeref(0) } From 6dcfe0348b760f3bc4cfc6cd1a19e754da720023 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Fri, 9 Sep 2022 17:06:21 +0100 Subject: [PATCH 3/6] C++: Copy over the required changes to non-experimental libraries. --- .../code/cpp/ir/dataflow/TaintTracking.qll | 2 +- .../code/cpp/ir/dataflow/TaintTracking2.qll | 2 +- .../code/cpp/ir/dataflow/TaintTracking3.qll | 2 +- .../raw/internal/SideEffects.qll | 10 +- .../interfaces/FunctionInputsAndOutputs.qll | 141 ++++++++++++++++-- 5 files changed, 141 insertions(+), 16 deletions(-) diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll index 7eada0bb2443..430641164993 100644 --- a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking.qll @@ -19,5 +19,5 @@ import semmle.code.cpp.ir.dataflow.DataFlow import semmle.code.cpp.ir.dataflow.DataFlow2 module TaintTracking { - import semmle.code.cpp.ir.dataflow.internal.tainttracking1.TaintTrackingImpl + import experimental.semmle.code.cpp.ir.dataflow.internal.tainttracking1.TaintTrackingImpl } diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll index 3ef03a3bd2cb..aa9fd392803b 100644 --- a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking2.qll @@ -11,5 +11,5 @@ * See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation. */ module TaintTracking2 { - import semmle.code.cpp.ir.dataflow.internal.tainttracking2.TaintTrackingImpl + import experimental.semmle.code.cpp.ir.dataflow.internal.tainttracking2.TaintTrackingImpl } diff --git a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll index 98e1caebf388..bcd35ccd02f0 100644 --- a/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll +++ b/cpp/ql/lib/experimental/semmle/code/cpp/ir/dataflow/TaintTracking3.qll @@ -11,5 +11,5 @@ * See `semmle.code.cpp.ir.dataflow.TaintTracking` for the full documentation. */ module TaintTracking3 { - import semmle.code.cpp.ir.dataflow.internal.tainttracking3.TaintTrackingImpl + import experimental.semmle.code.cpp.ir.dataflow.internal.tainttracking3.TaintTrackingImpl } diff --git a/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/SideEffects.qll b/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/SideEffects.qll index 93b20865cac8..684aa4f14f24 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/SideEffects.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/implementation/raw/internal/SideEffects.qll @@ -55,7 +55,15 @@ private predicate isDeeplyConstBelow(Type t) { isDeeplyConstBelow(t.(TypedefType).getBaseType()) } -private predicate isConstPointerLike(Type t) { +/** + * INTERNAL: Do not use. + * + * Holds if `t` is a pointer-like type (i.e., a pointer, + * an array a reference, or a pointer-wrapper such as + * `std::unique_ptr`) that is constant and only contains + * constant types, excluding the type itself. + */ +predicate isConstPointerLike(Type t) { ( t instanceof PointerWrapper or diff --git a/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll b/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll index 5e899be68d45..56746c928b0b 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll @@ -46,6 +46,26 @@ class FunctionInput extends TFunctionInput { */ deprecated final predicate isInParameter(ParameterIndex index) { this.isParameter(index) } + /** + * Holds if this is the input value pointed to by a pointer parameter to a function, or the input + * value referred to by a reference parameter to a function, where the parameter has index + * `index`. + * + * Example: + * ``` + * void func(int n, char* p, float& r); + * ``` + * - `isParameterDeref(1, 1)` holds for the `FunctionInput` that represents the value of `*p` (with + * type `char`) on entry to the function. + * - `isParameterDeref(2, 1)` holds for the `FunctionInput` that represents the value of `r` (with type + * `float`) on entry to the function. + * - There is no `FunctionInput` for which `isParameterDeref(0, _)` holds, because `n` is neither a + * pointer nor a reference. + */ + predicate isParameterDeref(ParameterIndex index, int ind) { + ind = 1 and this.isParameterDeref(index) + } + /** * Holds if this is the input value pointed to by a pointer parameter to a function, or the input * value referred to by a reference parameter to a function, where the parameter has index @@ -62,7 +82,7 @@ class FunctionInput extends TFunctionInput { * - There is no `FunctionInput` for which `isParameterDeref(0)` holds, because `n` is neither a * pointer nor a reference. */ - predicate isParameterDeref(ParameterIndex index) { none() } + predicate isParameterDeref(ParameterIndex index) { this.isParameterDeref(index, 1) } /** * Holds if this is the input value pointed to by a pointer parameter to a function, or the input @@ -87,7 +107,22 @@ class FunctionInput extends TFunctionInput { * - `isQualifierObject()` holds for the `FunctionInput` that represents the value of `*this` * (with type `C const`) on entry to the function. */ - predicate isQualifierObject() { none() } + predicate isQualifierObject(int ind) { ind = 1 and this.isQualifierObject() } + + /** + * Holds if this is the input value pointed to by the `this` pointer of an instance member + * function. + * + * Example: + * ``` + * struct C { + * void mfunc(int n, char* p, float& r) const; + * }; + * ``` + * - `isQualifierObject()` holds for the `FunctionInput` that represents the value of `*this` + * (with type `C const`) on entry to the function. + */ + predicate isQualifierObject() { this.isQualifierObject(1) } /** * Holds if this is the input value pointed to by the `this` pointer of an instance member @@ -143,16 +178,49 @@ class FunctionInput extends TFunctionInput { * rare, but they do occur when a function returns a reference to itself, * part of itself, or one of its other inputs. */ - predicate isReturnValueDeref() { none() } + predicate isReturnValueDeref() { this.isReturnValueDeref(1) } + + /** + * Holds if this is the input value pointed to by the return value of a + * function, if the function returns a pointer, or the input value referred + * to by the return value of a function, if the function returns a reference. + * + * Example: + * ``` + * char* getPointer(); + * float& getReference(); + * int getInt(); + * ``` + * - `isReturnValueDeref(1)` holds for the `FunctionInput` that represents the + * value of `*getPointer()` (with type `char`). + * - `isReturnValueDeref(1)` holds for the `FunctionInput` that represents the + * value of `getReference()` (with type `float`). + * - There is no `FunctionInput` of `getInt()` for which + * `isReturnValueDeref(_)` holds because the return type of `getInt()` is + * neither a pointer nor a reference. + * + * Note that data flows in through function return values are relatively + * rare, but they do occur when a function returns a reference to itself, + * part of itself, or one of its other inputs. + */ + predicate isReturnValueDeref(int ind) { ind = 1 and this.isReturnValueDeref() } /** * Holds if `i >= 0` and `isParameterDeref(i)` holds for this value, or * if `i = -1` and `isQualifierObject()` holds for this value. */ - final predicate isParameterDerefOrQualifierObject(ParameterIndex i) { - i >= 0 and this.isParameterDeref(i) + final predicate isParameterDerefOrQualifierObject(ParameterIndex i, int ind) { + i >= 0 and this.isParameterDeref(i, ind) or - i = -1 and this.isQualifierObject() + i = -1 and this.isQualifierObject(ind) + } + + /** + * Holds if `i >= 0` and `isParameterDeref(i)` holds for this value, or + * if `i = -1` and `isQualifierObject()` holds for this value. + */ + final predicate isParameterDerefOrQualifierObject(ParameterIndex i) { + this.isParameterDerefOrQualifierObject(i, 1) } } @@ -308,7 +376,9 @@ class FunctionOutput extends TFunctionOutput { * - There is no `FunctionOutput` for which `isParameterDeref(0)` holds, because `n` is neither a * pointer nor a reference. */ - predicate isParameterDeref(ParameterIndex i) { none() } + predicate isParameterDeref(ParameterIndex i) { this.isParameterDeref(i, 1) } + + predicate isParameterDeref(ParameterIndex i, int ind) { ind = 1 and this.isParameterDeref(i) } /** * Holds if this is the output value pointed to by a pointer parameter to a function, or the @@ -333,7 +403,22 @@ class FunctionOutput extends TFunctionOutput { * - `isQualifierObject()` holds for the `FunctionOutput` that represents the value of `*this` * (with type `C`) on return from the function. */ - predicate isQualifierObject() { none() } + predicate isQualifierObject() { this.isQualifierObject(1) } + + /** + * Holds if this is the output value pointed to by the `this` pointer of an instance member + * function. + * + * Example: + * ``` + * struct C { + * void mfunc(int n, char* p, float& r); + * }; + * ``` + * - `isQualifierObject()` holds for the `FunctionOutput` that represents the value of `*this` + * (with type `C`) on return from the function. + */ + predicate isQualifierObject(int ind) { ind = 1 and this.isQualifierObject() } /** * Holds if this is the output value pointed to by the `this` pointer of an instance member @@ -385,7 +470,27 @@ class FunctionOutput extends TFunctionOutput { * - There is no `FunctionOutput` of `getInt()` for which `isReturnValueDeref()` holds because the * return type of `getInt()` is neither a pointer nor a reference. */ - predicate isReturnValueDeref() { none() } + predicate isReturnValueDeref() { this.isReturnValueDeref(_) } + + /** + * Holds if this is the output value pointed to by the return value of a function, if the function + * returns a pointer, or the output value referred to by the return value of a function, if the + * function returns a reference. + * + * Example: + * ``` + * char* getPointer(); + * float& getReference(); + * int getInt(); + * ``` + * - `isReturnValueDeref(1)` holds for the `FunctionOutput` that represents the value of + * `*getPointer()` (with type `char`). + * - `isReturnValueDeref(1)` holds for the `FunctionOutput` that represents the value of + * `getReference()` (with type `float`). + * - There is no `FunctionOutput` of `getInt()` for which `isReturnValueDeref(_)` holds because the + * return type of `getInt()` is neither a pointer nor a reference. + */ + predicate isReturnValueDeref(int ind) { ind = 1 and this.isReturnValueDeref() } /** * Holds if this is the output value pointed to by the return value of a function, if the function @@ -399,10 +504,18 @@ class FunctionOutput extends TFunctionOutput { * Holds if `i >= 0` and `isParameterDeref(i)` holds for this is the value, or * if `i = -1` and `isQualifierObject()` holds for this value. */ - final predicate isParameterDerefOrQualifierObject(ParameterIndex i) { - i >= 0 and this.isParameterDeref(i) + final predicate isParameterDerefOrQualifierObject(ParameterIndex i, int ind) { + i >= 0 and this.isParameterDeref(i, ind) or - i = -1 and this.isQualifierObject() + i = -1 and this.isQualifierObject(ind) + } + + /** + * Holds if `i >= 0` and `isParameterDeref(i)` holds for this is the value, or + * if `i = -1` and `isQualifierObject()` holds for this value. + */ + final predicate isParameterDerefOrQualifierObject(ParameterIndex i) { + this.isParameterDerefOrQualifierObject(i, 1) } } @@ -431,6 +544,10 @@ class OutParameterDeref extends FunctionOutput, TOutParameterDeref { ParameterIndex getIndex() { result = index } override predicate isParameterDeref(ParameterIndex i) { i = index } + + override predicate isParameterDeref(ParameterIndex i, int ind) { + this.isParameterDeref(i) and ind = 1 + } } /** From c988547e9c13f53a9dc264eb453d4eb921572528 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Sun, 11 Sep 2022 18:31:53 +0100 Subject: [PATCH 4/6] C++: Accept test changes. --- .../dataflow/taint-tests/localTaint.expected | 36 +++++++++---------- 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected index 07bf9a85cf40..7029593ba225 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected @@ -3488,7 +3488,7 @@ | standalone_iterators.cpp:41:19:41:19 | call to operator++ | standalone_iterators.cpp:41:10:41:10 | call to operator* | TAINT | | standalone_iterators.cpp:42:12:42:12 | call to operator++ | standalone_iterators.cpp:42:10:42:10 | call to operator* | TAINT | | standalone_iterators.cpp:42:14:42:20 | ref arg source1 | standalone_iterators.cpp:39:45:39:51 | source1 | | -| standalone_iterators.cpp:42:14:42:20 | source1 | standalone_iterators.cpp:42:12:42:12 | call to operator++ | | +| standalone_iterators.cpp:42:14:42:20 | source1 | standalone_iterators.cpp:42:12:42:12 | call to operator++ | TAINT | | standalone_iterators.cpp:45:39:45:45 | source1 | standalone_iterators.cpp:45:39:45:45 | source1 | | | standalone_iterators.cpp:45:39:45:45 | source1 | standalone_iterators.cpp:46:11:46:17 | source1 | | | standalone_iterators.cpp:45:39:45:45 | source1 | standalone_iterators.cpp:47:12:47:18 | source1 | | @@ -3500,7 +3500,7 @@ | standalone_iterators.cpp:47:19:47:19 | call to operator++ | standalone_iterators.cpp:47:10:47:10 | call to operator* | TAINT | | standalone_iterators.cpp:48:12:48:12 | call to operator++ | standalone_iterators.cpp:48:10:48:10 | call to operator* | TAINT | | standalone_iterators.cpp:48:14:48:20 | ref arg source1 | standalone_iterators.cpp:45:39:45:45 | source1 | | -| standalone_iterators.cpp:48:14:48:20 | source1 | standalone_iterators.cpp:48:12:48:12 | call to operator++ | | +| standalone_iterators.cpp:48:14:48:20 | source1 | standalone_iterators.cpp:48:12:48:12 | call to operator++ | TAINT | | standalone_iterators.cpp:51:37:51:43 | source1 | standalone_iterators.cpp:52:11:52:17 | source1 | | | standalone_iterators.cpp:51:37:51:43 | source1 | standalone_iterators.cpp:53:12:53:18 | source1 | | | standalone_iterators.cpp:51:37:51:43 | source1 | standalone_iterators.cpp:54:14:54:20 | source1 | | @@ -3788,7 +3788,7 @@ | string.cpp:120:16:120:24 | call to basic_string | string.cpp:125:50:125:50 | s | | | string.cpp:120:16:120:24 | call to basic_string | string.cpp:129:16:129:16 | s | | | string.cpp:121:15:121:15 | (__begin) | string.cpp:121:15:121:15 | call to operator* | TAINT | -| string.cpp:121:15:121:15 | (__begin) | string.cpp:121:15:121:15 | call to operator++ | | +| string.cpp:121:15:121:15 | (__begin) | string.cpp:121:15:121:15 | call to operator++ | TAINT | | string.cpp:121:15:121:15 | (__end) | string.cpp:121:15:121:15 | call to iterator | | | string.cpp:121:15:121:15 | (__range) | string.cpp:121:15:121:15 | call to begin | TAINT | | string.cpp:121:15:121:15 | (__range) | string.cpp:121:15:121:15 | call to end | TAINT | @@ -3816,7 +3816,7 @@ | string.cpp:125:50:125:50 | ref arg s | string.cpp:125:50:125:50 | s | | | string.cpp:125:50:125:50 | ref arg s | string.cpp:129:16:129:16 | s | | | string.cpp:125:50:125:50 | s | string.cpp:125:52:125:54 | call to end | TAINT | -| string.cpp:125:61:125:62 | it | string.cpp:125:59:125:59 | call to operator++ | | +| string.cpp:125:61:125:62 | it | string.cpp:125:59:125:59 | call to operator++ | TAINT | | string.cpp:125:61:125:62 | ref arg it | string.cpp:125:44:125:45 | it | | | string.cpp:125:61:125:62 | ref arg it | string.cpp:125:61:125:62 | it | | | string.cpp:125:61:125:62 | ref arg it | string.cpp:126:9:126:10 | it | | @@ -3825,7 +3825,7 @@ | string.cpp:126:9:126:10 | ref arg it | string.cpp:125:61:125:62 | it | | | string.cpp:126:9:126:10 | ref arg it | string.cpp:126:9:126:10 | it | | | string.cpp:129:16:129:16 | (__begin) | string.cpp:129:16:129:16 | call to operator* | TAINT | -| string.cpp:129:16:129:16 | (__begin) | string.cpp:129:16:129:16 | call to operator++ | | +| string.cpp:129:16:129:16 | (__begin) | string.cpp:129:16:129:16 | call to operator++ | TAINT | | string.cpp:129:16:129:16 | (__end) | string.cpp:129:16:129:16 | call to iterator | | | string.cpp:129:16:129:16 | (__range) | string.cpp:129:16:129:16 | call to begin | TAINT | | string.cpp:129:16:129:16 | (__range) | string.cpp:129:16:129:16 | call to end | TAINT | @@ -3847,7 +3847,7 @@ | string.cpp:133:28:133:33 | call to source | string.cpp:133:28:133:36 | call to basic_string | TAINT | | string.cpp:133:28:133:36 | call to basic_string | string.cpp:134:22:134:28 | const_s | | | string.cpp:134:22:134:22 | (__begin) | string.cpp:134:22:134:22 | call to operator* | TAINT | -| string.cpp:134:22:134:22 | (__begin) | string.cpp:134:22:134:22 | call to operator++ | | +| string.cpp:134:22:134:22 | (__begin) | string.cpp:134:22:134:22 | call to operator++ | TAINT | | string.cpp:134:22:134:22 | (__range) | string.cpp:134:22:134:22 | call to begin | TAINT | | string.cpp:134:22:134:22 | (__range) | string.cpp:134:22:134:22 | call to end | TAINT | | string.cpp:134:22:134:22 | call to begin | string.cpp:134:22:134:22 | (__begin) | | @@ -4259,12 +4259,12 @@ | string.cpp:398:8:398:9 | i2 | string.cpp:399:12:399:13 | i3 | | | string.cpp:399:10:399:10 | call to operator++ | string.cpp:399:8:399:8 | call to operator* | TAINT | | string.cpp:399:10:399:10 | ref arg call to operator++ | string.cpp:399:12:399:13 | ref arg i3 | | -| string.cpp:399:12:399:13 | i3 | string.cpp:399:10:399:10 | call to operator++ | | +| string.cpp:399:12:399:13 | i3 | string.cpp:399:10:399:10 | call to operator++ | TAINT | | string.cpp:400:8:400:9 | i2 | string.cpp:400:3:400:9 | ... = ... | | | string.cpp:400:8:400:9 | i2 | string.cpp:401:12:401:13 | i4 | | | string.cpp:401:10:401:10 | call to operator-- | string.cpp:401:8:401:8 | call to operator* | TAINT | | string.cpp:401:10:401:10 | ref arg call to operator-- | string.cpp:401:12:401:13 | ref arg i4 | | -| string.cpp:401:12:401:13 | i4 | string.cpp:401:10:401:10 | call to operator-- | | +| string.cpp:401:12:401:13 | i4 | string.cpp:401:10:401:10 | call to operator-- | TAINT | | string.cpp:402:8:402:9 | i2 | string.cpp:402:3:402:9 | ... = ... | | | string.cpp:402:8:402:9 | i2 | string.cpp:403:3:403:4 | i5 | | | string.cpp:402:8:402:9 | i2 | string.cpp:404:9:404:10 | i5 | | @@ -4293,7 +4293,7 @@ | string.cpp:413:11:413:13 | call to end | string.cpp:413:3:413:15 | ... = ... | | | string.cpp:413:11:413:13 | call to end | string.cpp:414:5:414:6 | i9 | | | string.cpp:413:11:413:13 | call to end | string.cpp:415:9:415:10 | i9 | | -| string.cpp:414:5:414:6 | i9 | string.cpp:414:3:414:3 | call to operator-- | | +| string.cpp:414:5:414:6 | i9 | string.cpp:414:3:414:3 | call to operator-- | TAINT | | string.cpp:414:5:414:6 | ref arg i9 | string.cpp:415:9:415:10 | i9 | | | string.cpp:415:9:415:10 | i9 | string.cpp:415:8:415:8 | call to operator* | TAINT | | string.cpp:417:9:417:10 | i2 | string.cpp:417:3:417:10 | ... = ... | | @@ -6579,7 +6579,7 @@ | vector.cpp:17:21:17:33 | call to vector | vector.cpp:35:1:35:1 | v | | | vector.cpp:17:26:17:32 | source1 | vector.cpp:17:21:17:33 | call to vector | TAINT | | vector.cpp:19:14:19:14 | (__begin) | vector.cpp:19:14:19:14 | call to operator* | TAINT | -| vector.cpp:19:14:19:14 | (__begin) | vector.cpp:19:14:19:14 | call to operator++ | | +| vector.cpp:19:14:19:14 | (__begin) | vector.cpp:19:14:19:14 | call to operator++ | TAINT | | vector.cpp:19:14:19:14 | (__end) | vector.cpp:19:14:19:14 | call to iterator | | | vector.cpp:19:14:19:14 | (__range) | vector.cpp:19:14:19:14 | call to begin | TAINT | | vector.cpp:19:14:19:14 | (__range) | vector.cpp:19:14:19:14 | call to end | TAINT | @@ -6609,7 +6609,7 @@ | vector.cpp:23:55:23:55 | ref arg v | vector.cpp:27:15:27:15 | v | | | vector.cpp:23:55:23:55 | ref arg v | vector.cpp:35:1:35:1 | v | | | vector.cpp:23:55:23:55 | v | vector.cpp:23:57:23:59 | call to end | TAINT | -| vector.cpp:23:66:23:67 | it | vector.cpp:23:64:23:64 | call to operator++ | | +| vector.cpp:23:66:23:67 | it | vector.cpp:23:64:23:64 | call to operator++ | TAINT | | vector.cpp:23:66:23:67 | ref arg it | vector.cpp:23:49:23:50 | it | | | vector.cpp:23:66:23:67 | ref arg it | vector.cpp:23:66:23:67 | it | | | vector.cpp:23:66:23:67 | ref arg it | vector.cpp:24:9:24:10 | it | | @@ -6618,7 +6618,7 @@ | vector.cpp:24:9:24:10 | ref arg it | vector.cpp:23:66:23:67 | it | | | vector.cpp:24:9:24:10 | ref arg it | vector.cpp:24:9:24:10 | it | | | vector.cpp:27:15:27:15 | (__begin) | vector.cpp:27:15:27:15 | call to operator* | TAINT | -| vector.cpp:27:15:27:15 | (__begin) | vector.cpp:27:15:27:15 | call to operator++ | | +| vector.cpp:27:15:27:15 | (__begin) | vector.cpp:27:15:27:15 | call to operator++ | TAINT | | vector.cpp:27:15:27:15 | (__end) | vector.cpp:27:15:27:15 | call to iterator | | | vector.cpp:27:15:27:15 | (__range) | vector.cpp:27:15:27:15 | call to begin | TAINT | | vector.cpp:27:15:27:15 | (__range) | vector.cpp:27:15:27:15 | call to end | TAINT | @@ -6641,7 +6641,7 @@ | vector.cpp:31:33:31:45 | call to vector | vector.cpp:35:1:35:1 | const_v | | | vector.cpp:31:38:31:44 | source1 | vector.cpp:31:33:31:45 | call to vector | TAINT | | vector.cpp:32:21:32:21 | (__begin) | vector.cpp:32:21:32:21 | call to operator* | TAINT | -| vector.cpp:32:21:32:21 | (__begin) | vector.cpp:32:21:32:21 | call to operator++ | | +| vector.cpp:32:21:32:21 | (__begin) | vector.cpp:32:21:32:21 | call to operator++ | TAINT | | vector.cpp:32:21:32:21 | (__range) | vector.cpp:32:21:32:21 | call to begin | TAINT | | vector.cpp:32:21:32:21 | (__range) | vector.cpp:32:21:32:21 | call to end | TAINT | | vector.cpp:32:21:32:21 | call to begin | vector.cpp:32:21:32:21 | (__begin) | | @@ -7652,7 +7652,7 @@ | vector.cpp:344:56:344:57 | ref arg v2 | vector.cpp:347:7:347:8 | v2 | | | vector.cpp:344:56:344:57 | ref arg v2 | vector.cpp:415:1:415:1 | v2 | | | vector.cpp:344:56:344:57 | v2 | vector.cpp:344:59:344:61 | call to end | TAINT | -| vector.cpp:344:68:344:69 | it | vector.cpp:344:66:344:66 | call to operator++ | | +| vector.cpp:344:68:344:69 | it | vector.cpp:344:66:344:66 | call to operator++ | TAINT | | vector.cpp:344:68:344:69 | ref arg it | vector.cpp:344:50:344:51 | it | | | vector.cpp:344:68:344:69 | ref arg it | vector.cpp:344:68:344:69 | it | | | vector.cpp:344:68:344:69 | ref arg it | vector.cpp:345:4:345:5 | it | | @@ -7669,7 +7669,7 @@ | vector.cpp:345:9:345:14 | call to source | vector.cpp:345:3:345:16 | ... = ... | | | vector.cpp:347:7:347:8 | ref arg v2 | vector.cpp:415:1:415:1 | v2 | | | vector.cpp:349:15:349:15 | (__begin) | vector.cpp:349:15:349:15 | call to operator* | TAINT | -| vector.cpp:349:15:349:15 | (__begin) | vector.cpp:349:15:349:15 | call to operator++ | | +| vector.cpp:349:15:349:15 | (__begin) | vector.cpp:349:15:349:15 | call to operator++ | TAINT | | vector.cpp:349:15:349:15 | (__end) | vector.cpp:349:15:349:15 | call to iterator | | | vector.cpp:349:15:349:15 | (__range) | vector.cpp:349:15:349:15 | call to begin | TAINT | | vector.cpp:349:15:349:15 | (__range) | vector.cpp:349:15:349:15 | call to end | TAINT | @@ -7701,7 +7701,7 @@ | vector.cpp:354:56:354:57 | ref arg v4 | vector.cpp:357:7:357:8 | v4 | | | vector.cpp:354:56:354:57 | ref arg v4 | vector.cpp:415:1:415:1 | v4 | | | vector.cpp:354:56:354:57 | v4 | vector.cpp:354:59:354:61 | call to end | TAINT | -| vector.cpp:354:68:354:69 | it | vector.cpp:354:66:354:66 | call to operator++ | | +| vector.cpp:354:68:354:69 | it | vector.cpp:354:66:354:66 | call to operator++ | TAINT | | vector.cpp:354:68:354:69 | ref arg it | vector.cpp:354:50:354:51 | it | | | vector.cpp:354:68:354:69 | ref arg it | vector.cpp:354:68:354:69 | it | | | vector.cpp:354:68:354:69 | ref arg it | vector.cpp:355:32:355:33 | it | | @@ -7961,7 +7961,7 @@ | vector.cpp:442:3:442:3 | ref arg call to operator* | vector.cpp:444:2:444:2 | out | | | vector.cpp:442:4:442:4 | call to operator++ | vector.cpp:442:3:442:3 | call to operator* | TAINT | | vector.cpp:442:4:442:4 | ref arg call to operator++ | vector.cpp:442:6:442:7 | ref arg it | | -| vector.cpp:442:6:442:7 | it | vector.cpp:442:4:442:4 | call to operator++ | | +| vector.cpp:442:6:442:7 | it | vector.cpp:442:4:442:4 | call to operator++ | TAINT | | vector.cpp:442:11:442:36 | call to basic_string | vector.cpp:442:3:442:3 | ref arg call to operator* | TAINT | | vector.cpp:442:23:442:35 | source_string | vector.cpp:442:11:442:36 | call to basic_string | TAINT | | vector.cpp:443:8:443:10 | ref arg out | vector.cpp:444:2:444:2 | out | | @@ -7976,7 +7976,7 @@ | vector.cpp:449:3:449:3 | ref arg call to operator* | vector.cpp:451:2:451:2 | out | | | vector.cpp:449:4:449:4 | call to operator++ | vector.cpp:449:3:449:3 | call to operator* | TAINT | | vector.cpp:449:4:449:4 | ref arg call to operator++ | vector.cpp:449:6:449:7 | ref arg it | | -| vector.cpp:449:6:449:7 | it | vector.cpp:449:4:449:4 | call to operator++ | | +| vector.cpp:449:6:449:7 | it | vector.cpp:449:4:449:4 | call to operator++ | TAINT | | vector.cpp:449:11:449:16 | call to source | vector.cpp:449:3:449:3 | ref arg call to operator* | TAINT | | vector.cpp:450:8:450:10 | ref arg out | vector.cpp:451:2:451:2 | out | | | vector.cpp:467:22:467:25 | call to vector | vector.cpp:471:8:471:8 | v | | From bb1c088fe08330879da7aa74dcbf8751a02674eb Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 12 Sep 2022 15:52:27 +0100 Subject: [PATCH 5/6] C++: Undo changes to iterator models. --- .../cpp/models/implementations/Iterator.qll | 2 +- .../cpp/models/implementations/StdString.qll | 2 +- .../dataflow/taint-tests/localTaint.expected | 36 +++++++++---------- 3 files changed, 20 insertions(+), 20 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll index 8d0e97dc2ec3..2fa9803d0533 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/Iterator.qll @@ -223,7 +223,7 @@ private class IteratorCrementMemberOperator extends MemberFunction, DataFlowFunc output.isQualifierObject() or input.isQualifierObject() and - output.isReturnValue() + output.isReturnValueDeref() } override predicate hasTaintFlow(FunctionInput input, FunctionOutput output) { diff --git a/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll b/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll index b5c18a8e09fd..3d2eda597998 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/implementations/StdString.qll @@ -176,7 +176,7 @@ private class StdStringAppend extends TaintFunction { ) and ( output.isQualifierObject() or - output.isReturnValue() + output.isReturnValueDeref() ) or // reverse flow from returned reference to the qualifier (for writes to diff --git a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected index 7029593ba225..07bf9a85cf40 100644 --- a/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected +++ b/cpp/ql/test/library-tests/dataflow/taint-tests/localTaint.expected @@ -3488,7 +3488,7 @@ | standalone_iterators.cpp:41:19:41:19 | call to operator++ | standalone_iterators.cpp:41:10:41:10 | call to operator* | TAINT | | standalone_iterators.cpp:42:12:42:12 | call to operator++ | standalone_iterators.cpp:42:10:42:10 | call to operator* | TAINT | | standalone_iterators.cpp:42:14:42:20 | ref arg source1 | standalone_iterators.cpp:39:45:39:51 | source1 | | -| standalone_iterators.cpp:42:14:42:20 | source1 | standalone_iterators.cpp:42:12:42:12 | call to operator++ | TAINT | +| standalone_iterators.cpp:42:14:42:20 | source1 | standalone_iterators.cpp:42:12:42:12 | call to operator++ | | | standalone_iterators.cpp:45:39:45:45 | source1 | standalone_iterators.cpp:45:39:45:45 | source1 | | | standalone_iterators.cpp:45:39:45:45 | source1 | standalone_iterators.cpp:46:11:46:17 | source1 | | | standalone_iterators.cpp:45:39:45:45 | source1 | standalone_iterators.cpp:47:12:47:18 | source1 | | @@ -3500,7 +3500,7 @@ | standalone_iterators.cpp:47:19:47:19 | call to operator++ | standalone_iterators.cpp:47:10:47:10 | call to operator* | TAINT | | standalone_iterators.cpp:48:12:48:12 | call to operator++ | standalone_iterators.cpp:48:10:48:10 | call to operator* | TAINT | | standalone_iterators.cpp:48:14:48:20 | ref arg source1 | standalone_iterators.cpp:45:39:45:45 | source1 | | -| standalone_iterators.cpp:48:14:48:20 | source1 | standalone_iterators.cpp:48:12:48:12 | call to operator++ | TAINT | +| standalone_iterators.cpp:48:14:48:20 | source1 | standalone_iterators.cpp:48:12:48:12 | call to operator++ | | | standalone_iterators.cpp:51:37:51:43 | source1 | standalone_iterators.cpp:52:11:52:17 | source1 | | | standalone_iterators.cpp:51:37:51:43 | source1 | standalone_iterators.cpp:53:12:53:18 | source1 | | | standalone_iterators.cpp:51:37:51:43 | source1 | standalone_iterators.cpp:54:14:54:20 | source1 | | @@ -3788,7 +3788,7 @@ | string.cpp:120:16:120:24 | call to basic_string | string.cpp:125:50:125:50 | s | | | string.cpp:120:16:120:24 | call to basic_string | string.cpp:129:16:129:16 | s | | | string.cpp:121:15:121:15 | (__begin) | string.cpp:121:15:121:15 | call to operator* | TAINT | -| string.cpp:121:15:121:15 | (__begin) | string.cpp:121:15:121:15 | call to operator++ | TAINT | +| string.cpp:121:15:121:15 | (__begin) | string.cpp:121:15:121:15 | call to operator++ | | | string.cpp:121:15:121:15 | (__end) | string.cpp:121:15:121:15 | call to iterator | | | string.cpp:121:15:121:15 | (__range) | string.cpp:121:15:121:15 | call to begin | TAINT | | string.cpp:121:15:121:15 | (__range) | string.cpp:121:15:121:15 | call to end | TAINT | @@ -3816,7 +3816,7 @@ | string.cpp:125:50:125:50 | ref arg s | string.cpp:125:50:125:50 | s | | | string.cpp:125:50:125:50 | ref arg s | string.cpp:129:16:129:16 | s | | | string.cpp:125:50:125:50 | s | string.cpp:125:52:125:54 | call to end | TAINT | -| string.cpp:125:61:125:62 | it | string.cpp:125:59:125:59 | call to operator++ | TAINT | +| string.cpp:125:61:125:62 | it | string.cpp:125:59:125:59 | call to operator++ | | | string.cpp:125:61:125:62 | ref arg it | string.cpp:125:44:125:45 | it | | | string.cpp:125:61:125:62 | ref arg it | string.cpp:125:61:125:62 | it | | | string.cpp:125:61:125:62 | ref arg it | string.cpp:126:9:126:10 | it | | @@ -3825,7 +3825,7 @@ | string.cpp:126:9:126:10 | ref arg it | string.cpp:125:61:125:62 | it | | | string.cpp:126:9:126:10 | ref arg it | string.cpp:126:9:126:10 | it | | | string.cpp:129:16:129:16 | (__begin) | string.cpp:129:16:129:16 | call to operator* | TAINT | -| string.cpp:129:16:129:16 | (__begin) | string.cpp:129:16:129:16 | call to operator++ | TAINT | +| string.cpp:129:16:129:16 | (__begin) | string.cpp:129:16:129:16 | call to operator++ | | | string.cpp:129:16:129:16 | (__end) | string.cpp:129:16:129:16 | call to iterator | | | string.cpp:129:16:129:16 | (__range) | string.cpp:129:16:129:16 | call to begin | TAINT | | string.cpp:129:16:129:16 | (__range) | string.cpp:129:16:129:16 | call to end | TAINT | @@ -3847,7 +3847,7 @@ | string.cpp:133:28:133:33 | call to source | string.cpp:133:28:133:36 | call to basic_string | TAINT | | string.cpp:133:28:133:36 | call to basic_string | string.cpp:134:22:134:28 | const_s | | | string.cpp:134:22:134:22 | (__begin) | string.cpp:134:22:134:22 | call to operator* | TAINT | -| string.cpp:134:22:134:22 | (__begin) | string.cpp:134:22:134:22 | call to operator++ | TAINT | +| string.cpp:134:22:134:22 | (__begin) | string.cpp:134:22:134:22 | call to operator++ | | | string.cpp:134:22:134:22 | (__range) | string.cpp:134:22:134:22 | call to begin | TAINT | | string.cpp:134:22:134:22 | (__range) | string.cpp:134:22:134:22 | call to end | TAINT | | string.cpp:134:22:134:22 | call to begin | string.cpp:134:22:134:22 | (__begin) | | @@ -4259,12 +4259,12 @@ | string.cpp:398:8:398:9 | i2 | string.cpp:399:12:399:13 | i3 | | | string.cpp:399:10:399:10 | call to operator++ | string.cpp:399:8:399:8 | call to operator* | TAINT | | string.cpp:399:10:399:10 | ref arg call to operator++ | string.cpp:399:12:399:13 | ref arg i3 | | -| string.cpp:399:12:399:13 | i3 | string.cpp:399:10:399:10 | call to operator++ | TAINT | +| string.cpp:399:12:399:13 | i3 | string.cpp:399:10:399:10 | call to operator++ | | | string.cpp:400:8:400:9 | i2 | string.cpp:400:3:400:9 | ... = ... | | | string.cpp:400:8:400:9 | i2 | string.cpp:401:12:401:13 | i4 | | | string.cpp:401:10:401:10 | call to operator-- | string.cpp:401:8:401:8 | call to operator* | TAINT | | string.cpp:401:10:401:10 | ref arg call to operator-- | string.cpp:401:12:401:13 | ref arg i4 | | -| string.cpp:401:12:401:13 | i4 | string.cpp:401:10:401:10 | call to operator-- | TAINT | +| string.cpp:401:12:401:13 | i4 | string.cpp:401:10:401:10 | call to operator-- | | | string.cpp:402:8:402:9 | i2 | string.cpp:402:3:402:9 | ... = ... | | | string.cpp:402:8:402:9 | i2 | string.cpp:403:3:403:4 | i5 | | | string.cpp:402:8:402:9 | i2 | string.cpp:404:9:404:10 | i5 | | @@ -4293,7 +4293,7 @@ | string.cpp:413:11:413:13 | call to end | string.cpp:413:3:413:15 | ... = ... | | | string.cpp:413:11:413:13 | call to end | string.cpp:414:5:414:6 | i9 | | | string.cpp:413:11:413:13 | call to end | string.cpp:415:9:415:10 | i9 | | -| string.cpp:414:5:414:6 | i9 | string.cpp:414:3:414:3 | call to operator-- | TAINT | +| string.cpp:414:5:414:6 | i9 | string.cpp:414:3:414:3 | call to operator-- | | | string.cpp:414:5:414:6 | ref arg i9 | string.cpp:415:9:415:10 | i9 | | | string.cpp:415:9:415:10 | i9 | string.cpp:415:8:415:8 | call to operator* | TAINT | | string.cpp:417:9:417:10 | i2 | string.cpp:417:3:417:10 | ... = ... | | @@ -6579,7 +6579,7 @@ | vector.cpp:17:21:17:33 | call to vector | vector.cpp:35:1:35:1 | v | | | vector.cpp:17:26:17:32 | source1 | vector.cpp:17:21:17:33 | call to vector | TAINT | | vector.cpp:19:14:19:14 | (__begin) | vector.cpp:19:14:19:14 | call to operator* | TAINT | -| vector.cpp:19:14:19:14 | (__begin) | vector.cpp:19:14:19:14 | call to operator++ | TAINT | +| vector.cpp:19:14:19:14 | (__begin) | vector.cpp:19:14:19:14 | call to operator++ | | | vector.cpp:19:14:19:14 | (__end) | vector.cpp:19:14:19:14 | call to iterator | | | vector.cpp:19:14:19:14 | (__range) | vector.cpp:19:14:19:14 | call to begin | TAINT | | vector.cpp:19:14:19:14 | (__range) | vector.cpp:19:14:19:14 | call to end | TAINT | @@ -6609,7 +6609,7 @@ | vector.cpp:23:55:23:55 | ref arg v | vector.cpp:27:15:27:15 | v | | | vector.cpp:23:55:23:55 | ref arg v | vector.cpp:35:1:35:1 | v | | | vector.cpp:23:55:23:55 | v | vector.cpp:23:57:23:59 | call to end | TAINT | -| vector.cpp:23:66:23:67 | it | vector.cpp:23:64:23:64 | call to operator++ | TAINT | +| vector.cpp:23:66:23:67 | it | vector.cpp:23:64:23:64 | call to operator++ | | | vector.cpp:23:66:23:67 | ref arg it | vector.cpp:23:49:23:50 | it | | | vector.cpp:23:66:23:67 | ref arg it | vector.cpp:23:66:23:67 | it | | | vector.cpp:23:66:23:67 | ref arg it | vector.cpp:24:9:24:10 | it | | @@ -6618,7 +6618,7 @@ | vector.cpp:24:9:24:10 | ref arg it | vector.cpp:23:66:23:67 | it | | | vector.cpp:24:9:24:10 | ref arg it | vector.cpp:24:9:24:10 | it | | | vector.cpp:27:15:27:15 | (__begin) | vector.cpp:27:15:27:15 | call to operator* | TAINT | -| vector.cpp:27:15:27:15 | (__begin) | vector.cpp:27:15:27:15 | call to operator++ | TAINT | +| vector.cpp:27:15:27:15 | (__begin) | vector.cpp:27:15:27:15 | call to operator++ | | | vector.cpp:27:15:27:15 | (__end) | vector.cpp:27:15:27:15 | call to iterator | | | vector.cpp:27:15:27:15 | (__range) | vector.cpp:27:15:27:15 | call to begin | TAINT | | vector.cpp:27:15:27:15 | (__range) | vector.cpp:27:15:27:15 | call to end | TAINT | @@ -6641,7 +6641,7 @@ | vector.cpp:31:33:31:45 | call to vector | vector.cpp:35:1:35:1 | const_v | | | vector.cpp:31:38:31:44 | source1 | vector.cpp:31:33:31:45 | call to vector | TAINT | | vector.cpp:32:21:32:21 | (__begin) | vector.cpp:32:21:32:21 | call to operator* | TAINT | -| vector.cpp:32:21:32:21 | (__begin) | vector.cpp:32:21:32:21 | call to operator++ | TAINT | +| vector.cpp:32:21:32:21 | (__begin) | vector.cpp:32:21:32:21 | call to operator++ | | | vector.cpp:32:21:32:21 | (__range) | vector.cpp:32:21:32:21 | call to begin | TAINT | | vector.cpp:32:21:32:21 | (__range) | vector.cpp:32:21:32:21 | call to end | TAINT | | vector.cpp:32:21:32:21 | call to begin | vector.cpp:32:21:32:21 | (__begin) | | @@ -7652,7 +7652,7 @@ | vector.cpp:344:56:344:57 | ref arg v2 | vector.cpp:347:7:347:8 | v2 | | | vector.cpp:344:56:344:57 | ref arg v2 | vector.cpp:415:1:415:1 | v2 | | | vector.cpp:344:56:344:57 | v2 | vector.cpp:344:59:344:61 | call to end | TAINT | -| vector.cpp:344:68:344:69 | it | vector.cpp:344:66:344:66 | call to operator++ | TAINT | +| vector.cpp:344:68:344:69 | it | vector.cpp:344:66:344:66 | call to operator++ | | | vector.cpp:344:68:344:69 | ref arg it | vector.cpp:344:50:344:51 | it | | | vector.cpp:344:68:344:69 | ref arg it | vector.cpp:344:68:344:69 | it | | | vector.cpp:344:68:344:69 | ref arg it | vector.cpp:345:4:345:5 | it | | @@ -7669,7 +7669,7 @@ | vector.cpp:345:9:345:14 | call to source | vector.cpp:345:3:345:16 | ... = ... | | | vector.cpp:347:7:347:8 | ref arg v2 | vector.cpp:415:1:415:1 | v2 | | | vector.cpp:349:15:349:15 | (__begin) | vector.cpp:349:15:349:15 | call to operator* | TAINT | -| vector.cpp:349:15:349:15 | (__begin) | vector.cpp:349:15:349:15 | call to operator++ | TAINT | +| vector.cpp:349:15:349:15 | (__begin) | vector.cpp:349:15:349:15 | call to operator++ | | | vector.cpp:349:15:349:15 | (__end) | vector.cpp:349:15:349:15 | call to iterator | | | vector.cpp:349:15:349:15 | (__range) | vector.cpp:349:15:349:15 | call to begin | TAINT | | vector.cpp:349:15:349:15 | (__range) | vector.cpp:349:15:349:15 | call to end | TAINT | @@ -7701,7 +7701,7 @@ | vector.cpp:354:56:354:57 | ref arg v4 | vector.cpp:357:7:357:8 | v4 | | | vector.cpp:354:56:354:57 | ref arg v4 | vector.cpp:415:1:415:1 | v4 | | | vector.cpp:354:56:354:57 | v4 | vector.cpp:354:59:354:61 | call to end | TAINT | -| vector.cpp:354:68:354:69 | it | vector.cpp:354:66:354:66 | call to operator++ | TAINT | +| vector.cpp:354:68:354:69 | it | vector.cpp:354:66:354:66 | call to operator++ | | | vector.cpp:354:68:354:69 | ref arg it | vector.cpp:354:50:354:51 | it | | | vector.cpp:354:68:354:69 | ref arg it | vector.cpp:354:68:354:69 | it | | | vector.cpp:354:68:354:69 | ref arg it | vector.cpp:355:32:355:33 | it | | @@ -7961,7 +7961,7 @@ | vector.cpp:442:3:442:3 | ref arg call to operator* | vector.cpp:444:2:444:2 | out | | | vector.cpp:442:4:442:4 | call to operator++ | vector.cpp:442:3:442:3 | call to operator* | TAINT | | vector.cpp:442:4:442:4 | ref arg call to operator++ | vector.cpp:442:6:442:7 | ref arg it | | -| vector.cpp:442:6:442:7 | it | vector.cpp:442:4:442:4 | call to operator++ | TAINT | +| vector.cpp:442:6:442:7 | it | vector.cpp:442:4:442:4 | call to operator++ | | | vector.cpp:442:11:442:36 | call to basic_string | vector.cpp:442:3:442:3 | ref arg call to operator* | TAINT | | vector.cpp:442:23:442:35 | source_string | vector.cpp:442:11:442:36 | call to basic_string | TAINT | | vector.cpp:443:8:443:10 | ref arg out | vector.cpp:444:2:444:2 | out | | @@ -7976,7 +7976,7 @@ | vector.cpp:449:3:449:3 | ref arg call to operator* | vector.cpp:451:2:451:2 | out | | | vector.cpp:449:4:449:4 | call to operator++ | vector.cpp:449:3:449:3 | call to operator* | TAINT | | vector.cpp:449:4:449:4 | ref arg call to operator++ | vector.cpp:449:6:449:7 | ref arg it | | -| vector.cpp:449:6:449:7 | it | vector.cpp:449:4:449:4 | call to operator++ | TAINT | +| vector.cpp:449:6:449:7 | it | vector.cpp:449:4:449:4 | call to operator++ | | | vector.cpp:449:11:449:16 | call to source | vector.cpp:449:3:449:3 | ref arg call to operator* | TAINT | | vector.cpp:450:8:450:10 | ref arg out | vector.cpp:451:2:451:2 | out | | | vector.cpp:467:22:467:25 | call to vector | vector.cpp:471:8:471:8 | v | | From d2b150eaf59f0f4f258ea5d8c1bc9c599d74b175 Mon Sep 17 00:00:00 2001 From: Mathias Vorreiter Pedersen Date: Mon, 12 Sep 2022 16:00:49 +0100 Subject: [PATCH 6/6] C++: Fix QLDoc on the model predicates used by the new experimental use-use code. --- .../interfaces/FunctionInputsAndOutputs.qll | 30 ++++++++++++++----- 1 file changed, 23 insertions(+), 7 deletions(-) diff --git a/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll b/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll index 56746c928b0b..de44913a39ff 100644 --- a/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll +++ b/cpp/ql/lib/semmle/code/cpp/models/interfaces/FunctionInputsAndOutputs.qll @@ -47,9 +47,9 @@ class FunctionInput extends TFunctionInput { deprecated final predicate isInParameter(ParameterIndex index) { this.isParameter(index) } /** - * Holds if this is the input value pointed to by a pointer parameter to a function, or the input - * value referred to by a reference parameter to a function, where the parameter has index - * `index`. + * Holds if this is the input value pointed to (through `ind` number of indirections) by a + * pointer parameter to a function, or the input value referred to by a reference parameter + * to a function, where the parameter has index `index`. * * Example: * ``` @@ -206,8 +206,8 @@ class FunctionInput extends TFunctionInput { predicate isReturnValueDeref(int ind) { ind = 1 and this.isReturnValueDeref() } /** - * Holds if `i >= 0` and `isParameterDeref(i)` holds for this value, or - * if `i = -1` and `isQualifierObject()` holds for this value. + * Holds if `i >= 0` and `isParameterDeref(i, ind)` holds for this value, or + * if `i = -1` and `isQualifierObject(ind)` holds for this value. */ final predicate isParameterDerefOrQualifierObject(ParameterIndex i, int ind) { i >= 0 and this.isParameterDeref(i, ind) @@ -378,6 +378,22 @@ class FunctionOutput extends TFunctionOutput { */ predicate isParameterDeref(ParameterIndex i) { this.isParameterDeref(i, 1) } + /** + * Holds if this is the output value pointed to by a pointer parameter (through `ind` number + * of indirections) to a function, or the output value referred to by a reference parameter to + * a function, where the parameter has index `index`. + * + * Example: + * ``` + * void func(int n, char* p, float& r); + * ``` + * - `isParameterDeref(1, 1)` holds for the `FunctionOutput` that represents the value of `*p` (with + * type `char`) on return from the function. + * - `isParameterDeref(2, 1)` holds for the `FunctionOutput` that represents the value of `r` (with + * type `float`) on return from the function. + * - There is no `FunctionOutput` for which `isParameterDeref(0, _)` holds, because `n` is neither a + * pointer nor a reference. + */ predicate isParameterDeref(ParameterIndex i, int ind) { ind = 1 and this.isParameterDeref(i) } /** @@ -501,8 +517,8 @@ class FunctionOutput extends TFunctionOutput { deprecated final predicate isOutReturnPointer() { this.isReturnValueDeref() } /** - * Holds if `i >= 0` and `isParameterDeref(i)` holds for this is the value, or - * if `i = -1` and `isQualifierObject()` holds for this value. + * Holds if `i >= 0` and `isParameterDeref(i, ind)` holds for this is the value, or + * if `i = -1` and `isQualifierObject(ind)` holds for this value. */ final predicate isParameterDerefOrQualifierObject(ParameterIndex i, int ind) { i >= 0 and this.isParameterDeref(i, ind)