[analyzer][NFCI] Allow clients of NoStateChangeFuncVisitor to check entire function calls, rather than each ExplodedNode in it

Szelethus · Szelethus · commit 7d0e62bfb773 · 2021-09-02T16:56:32.000+02:00
D105553 added NoStateChangeFuncVisitor, an abstract class to aid in creating notes such as "Returning without writing to 'x'", or "Returning without changing the ownership status of allocated memory". Its clients need to define, among other things, what a change of state is. For code like this: f() { g(); } foo() { f(); h(); } We'd have a path in the ExplodedGraph that looks like this: -- <g> --> / \ --- <f> --------> --- <h> ---> / \ / \ -------- <foo> ------ <foo> --> When we're interested in whether f neglected to change some property, NoStateChangeFuncVisitor asks these questions: ÷×~ -- <g> --> ß / \$ @&#* --- <f> --------> --- <h> ---> / \ / \ -------- <foo> ------ <foo> --> Has anything changed in between # and *? Has anything changed in between & and *? Has anything changed in between @ and *? ... Has anything changed in between $ and *? Has anything changed in between × and ~? Has anything changed in between ÷ and ~? ... Has anything changed in between ß and *? ... This is a rather thorough line of questioning, which is why in D105819, I was only interested in whether state *right before* and *right after* a function call changed, and early returned to the CallEnter location: if (!CurrN->getLocationAs<CallEnter>()) return; Except that I made a typo, and forgot to negate the condition. So, in this patch, I'm fixing that, and under the same hood allow all clients to decide to do this whole-function check instead of the thorough one. Differential Revision: https://reviews.llvm.org/D108695
diff --git a/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h b/clang/include/clang/StaticAnalyzer/Core/BugReporter/BugReporterVisitors.h
@@ -633,6 +633,9 @@ class CXXConstructorCall;
 /// Descendants can define what a "state change is", like a change of value
 /// to a memory region, liveness, etc. For function calls where the state did
 /// not change as defined, a custom note may be constructed.
+///
+/// For a minimal example, check out
+/// clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp.
 class NoStateChangeFuncVisitor : public BugReporterVisitor {
 private:
   /// Frames modifying the state as defined in \c wasModifiedBeforeCallExit.
@@ -643,6 +646,8 @@ class NoStateChangeFuncVisitor : public BugReporterVisitor {
   /// many times (going up the path for each node and checking whether the
   /// region was written into) we instead lazily compute the stack frames
   /// along the path.
+  // TODO: Can't we just use a map instead? This is likely not as cheap as it
+  // makes the code difficult to read.
   llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifying;
   llvm::SmallPtrSet<const StackFrameContext *, 32> FramesModifyingCalculated;
 
@@ -651,18 +656,46 @@ class NoStateChangeFuncVisitor : public BugReporterVisitor {
   /// The calculation is cached in FramesModifying.
   bool isModifiedInFrame(const ExplodedNode *CallExitBeginN);
 
+  void markFrameAsModifying(const StackFrameContext *SCtx);
+
   /// Write to \c FramesModifying all stack frames along the path in the current
   /// stack frame which modifies the state.
   void findModifyingFrames(const ExplodedNode *const CallExitBeginN);
 
 protected:
   bugreporter::TrackingKind TKind;
 
-  /// \return Whether the state was modified from the current node, \CurrN, to
-  /// the end of the stack fram, at \p CallExitBeginN.
-  virtual bool
-  wasModifiedBeforeCallExit(const ExplodedNode *CurrN,
-                            const ExplodedNode *CallExitBeginN) = 0;
+  /// \return Whether the state was modified from the current node, \p CurrN, to
+  /// the end of the stack frame, at \p CallExitBeginN. \p CurrN and
+  /// \p CallExitBeginN are always in the same stack frame.
+  /// Clients should override this callback when a state change is important
+  /// not only on the entire function call, but inside of it as well.
+  /// Example: we may want to leave a note about the lack of locking/unlocking
+  /// on a particular mutex, but not if inside the function its state was
+  /// changed, but also restored. wasModifiedInFunction() wouldn't know of this
+  /// change.
+  virtual bool wasModifiedBeforeCallExit(const ExplodedNode *CurrN,
+                                         const ExplodedNode *CallExitBeginN) {
+    return false;
+  }
+
+  /// \return Whether the state was modified in the inlined function call in
+  /// between \p CallEnterN and \p CallExitEndN. Mind that the stack frame
+  /// retrieved from a CallEnterN and CallExitEndN is the *caller's* stack
+  /// frame! The inlined function's stack should be retrieved from either the
+  /// immediate successor to \p CallEnterN or immediate predecessor to
+  /// \p CallExitEndN.
+  /// Clients should override this function if a state changes local to the
+  /// inlined function are not interesting, only the change occuring as a
+  /// result of it.
+  /// Example: we want to leave a not about a leaked resource object not being
+  /// deallocated / its ownership changed inside a function, and we don't care
+  /// if it was assigned to a local variable (its change in ownership is
+  /// inconsequential).
+  virtual bool wasModifiedInFunction(const ExplodedNode *CallEnterN,
+                                     const ExplodedNode *CallExitEndN) {
+    return false;
+  }
 
   /// Consume the information on the non-modifying stack frame in order to
   /// either emit a note or not. May suppress the report entirely.
@@ -702,7 +735,6 @@ class NoStateChangeFuncVisitor : public BugReporterVisitor {
 };
 
 } // namespace ento
-
 } // namespace clang
 
 #endif // LLVM_CLANG_STATICANALYZER_CORE_BUGREPORTER_BUGREPORTERVISITORS_H
diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp
@@ -52,6 +52,7 @@
 #include "clang/AST/Expr.h"
 #include "clang/AST/ExprCXX.h"
 #include "clang/AST/ParentMap.h"
+#include "clang/Analysis/ProgramPoint.h"
 #include "clang/Basic/LLVM.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
@@ -76,8 +77,10 @@
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SmallString.h"
 #include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Casting.h"
 #include "llvm/Support/Compiler.h"
 #include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
 #include <climits>
 #include <functional>
 #include <utility>
@@ -755,6 +758,17 @@ class NoOwnershipChangeVisitor final : public NoStateChangeFuncVisitor {
         Owners.insert(Region);
       return true;
     }
+
+    LLVM_DUMP_METHOD void dump() const { dumpToStream(llvm::errs()); }
+    LLVM_DUMP_METHOD void dumpToStream(llvm::raw_ostream &out) const {
+      out << "Owners: {\n";
+      for (const MemRegion *Owner : Owners) {
+        out << "  ";
+        Owner->dumpToStream(out);
+        out << ",\n";
+      }
+      out << "}\n";
+    }
   };
 
 protected:
@@ -768,31 +782,24 @@ class NoOwnershipChangeVisitor final : public NoStateChangeFuncVisitor {
     return Ret;
   }
 
-  static const ExplodedNode *getCallExitEnd(const ExplodedNode *N) {
-    while (N && !N->getLocationAs<CallExitEnd>())
-      N = N->getFirstSucc();
-    return N;
+  LLVM_DUMP_METHOD static std::string
+  getFunctionName(const ExplodedNode *CallEnterN) {
+    if (const CallExpr *CE = llvm::dyn_cast_or_null<CallExpr>(
+            CallEnterN->getLocationAs<CallEnter>()->getCallExpr()))
+      if (const FunctionDecl *FD = CE->getDirectCallee())
+        return FD->getQualifiedNameAsString();
+    return "";
   }
 
   virtual bool
-  wasModifiedBeforeCallExit(const ExplodedNode *CurrN,
-                            const ExplodedNode *CallExitN) override {
-    if (CurrN->getLocationAs<CallEnter>())
-      return true;
-
-    // Its the state right *after* the call that is interesting. Any pointers
-    // inside the call that pointed to the allocated memory are of little
-    // consequence if their lifetime ends within the function.
-    CallExitN = getCallExitEnd(CallExitN);
-    if (!CallExitN)
-      return true;
-
-    if (CurrN->getState()->get<RegionState>(Sym) !=
-        CallExitN->getState()->get<RegionState>(Sym))
+  wasModifiedInFunction(const ExplodedNode *CallEnterN,
+                        const ExplodedNode *CallExitEndN) override {
+    if (CallEnterN->getState()->get<RegionState>(Sym) !=
+        CallExitEndN->getState()->get<RegionState>(Sym))
       return true;
 
-    OwnerSet CurrOwners = getOwnersAtNode(CurrN);
-    OwnerSet ExitOwners = getOwnersAtNode(CallExitN);
+    OwnerSet CurrOwners = getOwnersAtNode(CallEnterN);
+    OwnerSet ExitOwners = getOwnersAtNode(CallExitEndN);
 
     // Owners in the current set may be purged from the analyzer later on.
     // If a variable is dead (is not referenced directly or indirectly after
diff --git a/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp b/clang/lib/StaticAnalyzer/Core/BugReporterVisitors.cpp
@@ -355,43 +355,81 @@ bool NoStateChangeFuncVisitor::isModifiedInFrame(const ExplodedNode *N) {
   return FramesModifying.count(SCtx);
 }
 
+void NoStateChangeFuncVisitor::markFrameAsModifying(
+    const StackFrameContext *SCtx) {
+  while (SCtx) {
+    auto p = FramesModifying.insert(SCtx);
+    if (!p.second)
+      break; // Frame and all its parents already inserted.
+    SCtx = SCtx->getParent()->getStackFrame();
+  }
+}
+
+static const ExplodedNode *getMatchingCallExitEnd(const ExplodedNode *N) {
+  assert(N->getLocationAs<CallEnter>());
+  // The stackframe of the callee is only found in the nodes succeeding
+  // the CallEnter node. CallEnter's stack frame refers to the caller.
+  const StackFrameContext *OrigSCtx = N->getFirstSucc()->getStackFrame();
+
+  // Similarly, the nodes preceding CallExitEnd refer to the callee's stack
+  // frame.
+  auto IsMatchingCallExitEnd = [OrigSCtx](const ExplodedNode *N) {
+    return N->getLocationAs<CallExitEnd>() &&
+           OrigSCtx == N->getFirstPred()->getStackFrame();
+  };
+  while (N && !IsMatchingCallExitEnd(N)) {
+    assert(N->succ_size() <= 1 &&
+           "This function is to be used on the trimmed ExplodedGraph!");
+    N = N->getFirstSucc();
+  }
+  return N;
+}
+
 void NoStateChangeFuncVisitor::findModifyingFrames(
     const ExplodedNode *const CallExitBeginN) {
 
   assert(CallExitBeginN->getLocationAs<CallExitBegin>());
-  const ExplodedNode *LastReturnN = CallExitBeginN;
+
   const StackFrameContext *const OriginalSCtx =
       CallExitBeginN->getLocationContext()->getStackFrame();
 
-  const ExplodedNode *CurrN = CallExitBeginN;
-
-  do {
-    ProgramStateRef State = CurrN->getState();
-    auto CallExitLoc = CurrN->getLocationAs<CallExitBegin>();
-    if (CallExitLoc) {
-      LastReturnN = CurrN;
+  const ExplodedNode *CurrCallExitBeginN = CallExitBeginN;
+  const StackFrameContext *CurrentSCtx = OriginalSCtx;
+
+  for (const ExplodedNode *CurrN = CallExitBeginN; CurrN;
+       CurrN = CurrN->getFirstPred()) {
+    // Found a new inlined call.
+    if (CurrN->getLocationAs<CallExitBegin>()) {
+      CurrCallExitBeginN = CurrN;
+      CurrentSCtx = CurrN->getStackFrame();
+      FramesModifyingCalculated.insert(CurrentSCtx);
+      // We won't see a change in between two identical exploded nodes: skip.
+      continue;
     }
 
-    FramesModifyingCalculated.insert(
-        CurrN->getLocationContext()->getStackFrame());
-
-    if (wasModifiedBeforeCallExit(CurrN, LastReturnN)) {
-      const StackFrameContext *SCtx = CurrN->getStackFrame();
-      while (!SCtx->inTopFrame()) {
-        auto p = FramesModifying.insert(SCtx);
-        if (!p.second)
-          break; // Frame and all its parents already inserted.
-        SCtx = SCtx->getParent()->getStackFrame();
+    if (auto CE = CurrN->getLocationAs<CallEnter>()) {
+      if (const ExplodedNode *CallExitEndN = getMatchingCallExitEnd(CurrN))
+        if (wasModifiedInFunction(CurrN, CallExitEndN))
+          markFrameAsModifying(CurrentSCtx);
+
+      // We exited this inlined call, lets actualize the stack frame.
+      CurrentSCtx = CurrN->getStackFrame();
+
+      // Stop calculating at the current function, but always regard it as
+      // modifying, so we can avoid notes like this:
+      //   void f(Foo &F) {
+      //     F.field = 0; // note: 0 assigned to 'F.field'
+      //                  // note: returning without writing to 'F.field'
+      //   }
+      if (CE->getCalleeContext() == OriginalSCtx) {
+        markFrameAsModifying(CurrentSCtx);
+        break;
       }
     }
 
-    // Stop calculation at the call to the current function.
-    if (auto CE = CurrN->getLocationAs<CallEnter>())
-      if (CE->getCalleeContext() == OriginalSCtx)
-        break;
-
-    CurrN = CurrN->getFirstPred();
-  } while (CurrN);
+    if (wasModifiedBeforeCallExit(CurrN, CurrCallExitBeginN))
+      markFrameAsModifying(CurrentSCtx);
+  }
 }
 
 PathDiagnosticPieceRef NoStateChangeFuncVisitor::VisitNode(
diff --git a/clang/unittests/StaticAnalyzer/CMakeLists.txt b/clang/unittests/StaticAnalyzer/CMakeLists.txt
@@ -9,6 +9,7 @@ add_clang_unittest(StaticAnalysisTests
   CallDescriptionTest.cpp
   CallEventTest.cpp
   FalsePositiveRefutationBRVisitorTest.cpp
+  NoStateChangeFuncVisitorTest.cpp
   ParamRegionTest.cpp
   RangeSetTest.cpp
   RegisterCustomCheckersTest.cpp
diff --git a/clang/unittests/StaticAnalyzer/CallEventTest.cpp b/clang/unittests/StaticAnalyzer/CallEventTest.cpp
@@ -81,7 +81,7 @@ TEST(CXXDeallocatorCall, SimpleDestructor) {
     }
   )",
                                                          Diags));
-  EXPECT_EQ(Diags, "test.CXXDeallocator:NumArgs: 1\n");
+  EXPECT_EQ(Diags, "test.CXXDeallocator: NumArgs: 1\n");
 }
 
 } // namespace
diff --git a/clang/unittests/StaticAnalyzer/CheckerRegistration.h b/clang/unittests/StaticAnalyzer/CheckerRegistration.h
@@ -6,6 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "clang/Analysis/PathDiagnostic.h"
 #include "clang/Frontend/CompilerInstance.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
@@ -26,8 +27,23 @@ class DiagConsumer : public PathDiagnosticConsumer {
   DiagConsumer(llvm::raw_ostream &Output) : Output(Output) {}
   void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
                             FilesMade *filesMade) override {
-    for (const auto *PD : Diags)
-      Output << PD->getCheckerName() << ":" << PD->getShortDescription() << '\n';
+    for (const auto *PD : Diags) {
+      Output << PD->getCheckerName() << ": ";
+
+      for (PathDiagnosticPieceRef Piece :
+           PD->path.flatten(/*ShouldFlattenMacros*/ true)) {
+        if (Piece->getKind() != PathDiagnosticPiece::Event)
+          continue;
+        if (Piece->getString().empty())
+          continue;
+        // The last event is usually the same as the warning message, skip.
+        if (Piece->getString() == PD->getShortDescription())
+          continue;
+
+        Output << Piece->getString() << " | ";
+      }
+      Output << PD->getShortDescription() << '\n';
+    }
   }
 
   StringRef getName() const override { return "Test"; }
diff --git a/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp b/clang/unittests/StaticAnalyzer/FalsePositiveRefutationBRVisitorTest.cpp
@@ -128,16 +128,16 @@ TEST_F(FalsePositiveRefutationBRVisitorTestBase, UnSatInTheMiddleNoReport) {
   EXPECT_TRUE(runCheckerOnCodeWithArgs<addFalsePositiveGenerator>(
       Code, LazyAssumeAndCrossCheckArgs, Diags));
   EXPECT_EQ(Diags,
-            "test.FalsePositiveGenerator:REACHED_WITH_NO_CONTRADICTION\n");
+            "test.FalsePositiveGenerator: REACHED_WITH_NO_CONTRADICTION\n");
   // Single warning. The second report was invalidated by the visitor.
 
   // Without enabling the crosscheck-with-z3 both reports are displayed.
   std::string Diags2;
   EXPECT_TRUE(runCheckerOnCodeWithArgs<addFalsePositiveGenerator>(
       Code, LazyAssumeArgs, Diags2));
   EXPECT_EQ(Diags2,
-            "test.FalsePositiveGenerator:REACHED_WITH_NO_CONTRADICTION\n"
-            "test.FalsePositiveGenerator:REACHED_WITH_CONTRADICTION\n");
+            "test.FalsePositiveGenerator: REACHED_WITH_NO_CONTRADICTION\n"
+            "test.FalsePositiveGenerator: REACHED_WITH_CONTRADICTION\n");
 }
 
 TEST_F(FalsePositiveRefutationBRVisitorTestBase,
@@ -159,16 +159,16 @@ TEST_F(FalsePositiveRefutationBRVisitorTestBase,
   EXPECT_TRUE(runCheckerOnCodeWithArgs<addFalsePositiveGenerator>(
       Code, LazyAssumeAndCrossCheckArgs, Diags));
   EXPECT_EQ(Diags,
-            "test.FalsePositiveGenerator:REACHED_WITH_NO_CONTRADICTION\n");
+            "test.FalsePositiveGenerator: REACHED_WITH_NO_CONTRADICTION\n");
   // Single warning. The second report was invalidated by the visitor.
 
   // Without enabling the crosscheck-with-z3 both reports are displayed.
   std::string Diags2;
   EXPECT_TRUE(runCheckerOnCodeWithArgs<addFalsePositiveGenerator>(
       Code, LazyAssumeArgs, Diags2));
   EXPECT_EQ(Diags2,
-            "test.FalsePositiveGenerator:REACHED_WITH_NO_CONTRADICTION\n"
-            "test.FalsePositiveGenerator:CAN_BE_TRUE\n");
+            "test.FalsePositiveGenerator: REACHED_WITH_NO_CONTRADICTION\n"
+            "test.FalsePositiveGenerator: CAN_BE_TRUE\n");
 }
 
 TEST_F(FalsePositiveRefutationBRVisitorTestBase,
@@ -206,16 +206,16 @@ TEST_F(FalsePositiveRefutationBRVisitorTestBase,
   EXPECT_TRUE(runCheckerOnCodeWithArgs<addFalsePositiveGenerator>(
       Code, LazyAssumeAndCrossCheckArgs, Diags));
   EXPECT_EQ(Diags,
-            "test.FalsePositiveGenerator:REACHED_WITH_NO_CONTRADICTION\n");
+            "test.FalsePositiveGenerator: REACHED_WITH_NO_CONTRADICTION\n");
   // Single warning. The second report was invalidated by the visitor.
 
   // Without enabling the crosscheck-with-z3 both reports are displayed.
   std::string Diags2;
   EXPECT_TRUE(runCheckerOnCodeWithArgs<addFalsePositiveGenerator>(
       Code, LazyAssumeArgs, Diags2));
   EXPECT_EQ(Diags2,
-            "test.FalsePositiveGenerator:REACHED_WITH_NO_CONTRADICTION\n"
-            "test.FalsePositiveGenerator:CAN_BE_TRUE\n");
+            "test.FalsePositiveGenerator: REACHED_WITH_NO_CONTRADICTION\n"
+            "test.FalsePositiveGenerator: CAN_BE_TRUE\n");
 }
 
 } // namespace
diff --git a/clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp b/clang/unittests/StaticAnalyzer/NoStateChangeFuncVisitorTest.cpp
diff --git a/clang/unittests/StaticAnalyzer/RegisterCustomCheckersTest.cpp b/clang/unittests/StaticAnalyzer/RegisterCustomCheckersTest.cpp

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ TEST(CXXDeallocatorCall, SimpleDestructor) {`
`81`	`81`	`}`
`82`	`82`	`)",`
`83`	`83`	`Diags));`
`84`		`- EXPECT_EQ(Diags, "test.CXXDeallocator:NumArgs: 1\n");`
	`84`	`+ EXPECT_EQ(Diags, "test.CXXDeallocator: NumArgs: 1\n");`
`85`	`85`	`}`
`86`	`86`
`87`	`87`	`} // namespace`