FunctionCognitiveComplexityCheck.cpp 19.4 KB
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
//===--- FunctionCognitiveComplexityCheck.cpp - clang-tidy ------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "FunctionCognitiveComplexityCheck.h"
#include "../ClangTidyDiagnosticConsumer.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclBase.h"
#include "clang/AST/Expr.h"
#include "clang/AST/RecursiveASTVisitor.h"
#include "clang/AST/Stmt.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/ASTMatchers/ASTMatchers.h"
#include "clang/ASTMatchers/ASTMatchersInternal.h"
#include "clang/Basic/Diagnostic.h"
#include "clang/Basic/DiagnosticIDs.h"
#include "clang/Basic/LLVM.h"
#include "clang/Basic/SourceLocation.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/ErrorHandling.h"
#include <array>
#include <cassert>
#include <stack>
#include <tuple>
#include <type_traits>
#include <utility>

using namespace clang::ast_matchers;

namespace clang {
namespace tidy {
namespace readability {
namespace {

struct CognitiveComplexity final {
  // Any increment is based on some combination of reasons.
  // For details you can look at the Specification at
  // https://www.sonarsource.com/docs/CognitiveComplexity.pdf
  // or user-facing docs at
  // http://clang.llvm.org/extra/clang-tidy/checks/readability-function-cognitive-complexity.html
  // Here are all the possible reasons:
  enum Criteria : uint8_t {
    None = 0U,

    // B1, increases cognitive complexity (by 1)
    // What causes it:
    // * if, else if, else, ConditionalOperator (not BinaryConditionalOperator)
    // * SwitchStmt
    // * ForStmt, CXXForRangeStmt
    // * WhileStmt, DoStmt
    // * CXXCatchStmt
    // * GotoStmt, IndirectGotoStmt (but not BreakStmt, ContinueStmt)
    // * sequences of binary logical operators (BinOpLAnd, BinOpLOr)
    // * each method in a recursion cycle (not implemented)
    Increment = 1U << 0,

    // B2, increases current nesting level (by 1)
    // What causes it:
    // * if, else if, else, ConditionalOperator (not BinaryConditionalOperator)
    // * SwitchStmt
    // * ForStmt, CXXForRangeStmt
    // * WhileStmt, DoStmt
    // * CXXCatchStmt
    // * nested CXXConstructor, CXXDestructor, CXXMethod (incl. C++11 Lambda)
    // * GNU Statement Expression
    // * Apple Block declaration
    IncrementNesting = 1U << 1,

    // B3, increases cognitive complexity by the current nesting level
    // Applied before IncrementNesting
    // What causes it:
    // * IfStmt, ConditionalOperator (not BinaryConditionalOperator)
    // * SwitchStmt
    // * ForStmt, CXXForRangeStmt
    // * WhileStmt, DoStmt
    // * CXXCatchStmt
    PenalizeNesting = 1U << 2,

    All = Increment | PenalizeNesting | IncrementNesting,
  };

  // The helper struct used to record one increment occurrence, with all the
  // details nessesary.
  struct Detail {
    const SourceLocation Loc;     // What caused the increment?
    const unsigned short Nesting; // How deeply nested is Loc located?
    const Criteria C;             // The criteria of the increment

    Detail(SourceLocation SLoc, unsigned short CurrentNesting, Criteria Crit)
        : Loc(SLoc), Nesting(CurrentNesting), C(Crit) {}

    // To minimize the sizeof(Detail), we only store the minimal info there.
    // This function is used to convert from the stored info into the usable
    // information - what message to output, how much of an increment did this
    // occurrence actually result in.
    std::pair<unsigned, unsigned short> process() const {
      assert(C != Criteria::None && "invalid criteria");

      unsigned MsgId;           // The id of the message to output.
      unsigned short Increment; // How much of an increment?

      if (C == Criteria::All) {
        Increment = 1 + Nesting;
        MsgId = 0;
      } else if (C == (Criteria::Increment | Criteria::IncrementNesting)) {
        Increment = 1;
        MsgId = 1;
      } else if (C == Criteria::Increment) {
        Increment = 1;
        MsgId = 2;
      } else if (C == Criteria::IncrementNesting) {
        Increment = 0; // Unused in this message.
        MsgId = 3;
      } else
        llvm_unreachable("should not get to here.");

      return std::make_pair(MsgId, Increment);
    }
  };

  // Limit of 25 is the "upstream"'s default.
  static constexpr unsigned DefaultLimit = 25U;

  // Based on the publicly-avaliable numbers for some big open-source projects
  // https://sonarcloud.io/projects?languages=c%2Ccpp&size=5   we can estimate:
  // value ~20 would result in no allocs for 98% of functions, ~12 for 96%, ~10
  // for 91%, ~8 for 88%, ~6 for 84%, ~4 for 77%, ~2 for 64%, and ~1 for 37%.
  static_assert(sizeof(Detail) <= 8,
                "Since we use SmallVector to minimize the amount of "
                "allocations, we also need to consider the price we pay for "
                "that in terms of stack usage. "
                "Thus, it is good to minimize the size of the Detail struct.");
  SmallVector<Detail, DefaultLimit> Details; // 25 elements is 200 bytes.
  // Yes, 25 is a magic number. This is the seemingly-sane default for the
  // upper limit for function cognitive complexity. Thus it would make sense
  // to avoid allocations for any function that does not violate the limit.

  // The grand total Cognitive Complexity of the function.
  unsigned Total = 0;

  // The function used to store new increment, calculate the total complexity.
  void account(SourceLocation Loc, unsigned short Nesting, Criteria C);
};

// All the possible messages that can be output. The choice of the message
// to use is based of the combination of the CognitiveComplexity::Criteria.
// It would be nice to have it in CognitiveComplexity struct, but then it is
// not static.
static const std::array<const StringRef, 4> Msgs = {{
    // B1 + B2 + B3
    "+%0, including nesting penalty of %1, nesting level increased to %2",

    // B1 + B2
    "+%0, nesting level increased to %2",

    // B1
    "+%0",

    // B2
    "nesting level increased to %2",
}};

// Criteria is a bitset, thus a few helpers are needed.
CognitiveComplexity::Criteria operator|(CognitiveComplexity::Criteria LHS,
                                        CognitiveComplexity::Criteria RHS) {
  return static_cast<CognitiveComplexity::Criteria>(
      static_cast<std::underlying_type<CognitiveComplexity::Criteria>::type>(
          LHS) |
      static_cast<std::underlying_type<CognitiveComplexity::Criteria>::type>(
          RHS));
}
CognitiveComplexity::Criteria operator&(CognitiveComplexity::Criteria LHS,
                                        CognitiveComplexity::Criteria RHS) {
  return static_cast<CognitiveComplexity::Criteria>(
      static_cast<std::underlying_type<CognitiveComplexity::Criteria>::type>(
          LHS) &
      static_cast<std::underlying_type<CognitiveComplexity::Criteria>::type>(
          RHS));
}
CognitiveComplexity::Criteria &operator|=(CognitiveComplexity::Criteria &LHS,
                                          CognitiveComplexity::Criteria RHS) {
  LHS = operator|(LHS, RHS);
  return LHS;
}
CognitiveComplexity::Criteria &operator&=(CognitiveComplexity::Criteria &LHS,
                                          CognitiveComplexity::Criteria RHS) {
  LHS = operator&(LHS, RHS);
  return LHS;
}

void CognitiveComplexity::account(SourceLocation Loc, unsigned short Nesting,
                                  Criteria C) {
  C &= Criteria::All;
  assert(C != Criteria::None && "invalid criteria");

  Details.emplace_back(Loc, Nesting, C);
  const Detail &D = Details.back();

  unsigned MsgId;
  unsigned short Increase;
  std::tie(MsgId, Increase) = D.process();

  Total += Increase;
}

class FunctionASTVisitor final
    : public RecursiveASTVisitor<FunctionASTVisitor> {
  using Base = RecursiveASTVisitor<FunctionASTVisitor>;

  // The current nesting level (increased by Criteria::IncrementNesting).
  unsigned short CurrentNestingLevel = 0;

  // Used to efficiently know the last type of the binary sequence operator
  // that was encountered. It would make sense for the function call to start
  // the new sequence, thus it is a stack.
  using OBO = Optional<BinaryOperator::Opcode>;
  std::stack<OBO, SmallVector<OBO, 4>> BinaryOperatorsStack;

public:
  bool TraverseStmtWithIncreasedNestingLevel(Stmt *Node) {
    ++CurrentNestingLevel;
    bool ShouldContinue = Base::TraverseStmt(Node);
    --CurrentNestingLevel;
    return ShouldContinue;
  }

  bool TraverseDeclWithIncreasedNestingLevel(Decl *Node) {
    ++CurrentNestingLevel;
    bool ShouldContinue = Base::TraverseDecl(Node);
    --CurrentNestingLevel;
    return ShouldContinue;
  }

  bool TraverseIfStmt(IfStmt *Node, bool InElseIf = false) {
    if (!Node)
      return Base::TraverseIfStmt(Node);

    {
      CognitiveComplexity::Criteria Reasons;

      Reasons = CognitiveComplexity::Criteria::None;

      // "If" increases cognitive complexity.
      Reasons |= CognitiveComplexity::Criteria::Increment;
      // "If" increases nesting level.
      Reasons |= CognitiveComplexity::Criteria::IncrementNesting;

      if (!InElseIf) {
        // "If" receives a nesting increment commensurate with it's nested
        // depth, if it is not part of "else if".
        Reasons |= CognitiveComplexity::Criteria::PenalizeNesting;
      }

      CC.account(Node->getIfLoc(), CurrentNestingLevel, Reasons);
    }

    // If this IfStmt is *NOT* "else if", then only the body (i.e. "Then" and
    // "Else") is traversed with increased Nesting level.
    // However if this IfStmt *IS* "else if", then Nesting level is increased
    // for the whole IfStmt (i.e. for "Init", "Cond", "Then" and "Else").

    if (!InElseIf) {
      if (!TraverseStmt(Node->getInit()))
        return false;

      if (!TraverseStmt(Node->getCond()))
        return false;
    } else {
      if (!TraverseStmtWithIncreasedNestingLevel(Node->getInit()))
        return false;

      if (!TraverseStmtWithIncreasedNestingLevel(Node->getCond()))
        return false;
    }

    // "Then" always increases nesting level.
    if (!TraverseStmtWithIncreasedNestingLevel(Node->getThen()))
      return false;

    if (!Node->getElse())
      return true;

    if (auto *E = dyn_cast<IfStmt>(Node->getElse()))
      return TraverseIfStmt(E, true);

    {
      CognitiveComplexity::Criteria Reasons;

      Reasons = CognitiveComplexity::Criteria::None;

      // "Else" increases cognitive complexity.
      Reasons |= CognitiveComplexity::Criteria::Increment;
      // "Else" increases nesting level.
      Reasons |= CognitiveComplexity::Criteria::IncrementNesting;
      // "Else" DOES NOT receive a nesting increment commensurate with it's
      // nested depth.

      CC.account(Node->getElseLoc(), CurrentNestingLevel, Reasons);
    }

    // "Else" always increases nesting level.
    return TraverseStmtWithIncreasedNestingLevel(Node->getElse());
  }

// The currently-being-processed stack entry, which is always the top.
#define CurrentBinaryOperator BinaryOperatorsStack.top()

  // In a sequence of binary logical operators, if the new operator is different
  // from the previous one, then the cognitive complexity is increased.
  bool TraverseBinaryOperator(BinaryOperator *Op) {
    if (!Op || !Op->isLogicalOp())
      return Base::TraverseBinaryOperator(Op);

    // Make sure that there is always at least one frame in the stack.
    if (BinaryOperatorsStack.empty())
      BinaryOperatorsStack.emplace();

    // If this is the first binary operator that we are processing, or the
    // previous binary operator was different, there is an increment.
    if (!CurrentBinaryOperator || Op->getOpcode() != CurrentBinaryOperator)
      CC.account(Op->getOperatorLoc(), CurrentNestingLevel,
                 CognitiveComplexity::Criteria::Increment);

    // We might encounter a function call, which starts a new sequence, thus
    // we need to save the current previous binary operator.
    const Optional<BinaryOperator::Opcode> BinOpCopy(CurrentBinaryOperator);

    // Record the operator that we are currently processing and traverse it.
    CurrentBinaryOperator = Op->getOpcode();
    bool ShouldContinue = Base::TraverseBinaryOperator(Op);

    // And restore the previous binary operator, which might be nonexistent.
    CurrentBinaryOperator = BinOpCopy;

    return ShouldContinue;
  }

  // It would make sense for the function call to start the new binary
  // operator sequence, thus let's make sure that it creates a new stack frame.
  bool TraverseCallExpr(CallExpr *Node) {
    // If we are not currently processing any binary operator sequence, then
    // no Node-handling is needed.
    if (!Node || BinaryOperatorsStack.empty() || !CurrentBinaryOperator)
      return Base::TraverseCallExpr(Node);

    // Else, do add [uninitialized] frame to the stack, and traverse call.
    BinaryOperatorsStack.emplace();
    bool ShouldContinue = Base::TraverseCallExpr(Node);
    // And remove the top frame.
    BinaryOperatorsStack.pop();

    return ShouldContinue;
  }

#undef CurrentBinaryOperator

  bool TraverseStmt(Stmt *Node) {
    if (!Node)
      return Base::TraverseStmt(Node);

    // Three following switch()'es have huge duplication, but it is better to
    // keep them separate, to simplify comparing them with the Specification.

    CognitiveComplexity::Criteria Reasons = CognitiveComplexity::Criteria::None;
    SourceLocation Location = Node->getBeginLoc();

    // B1. Increments
    // There is an increment for each of the following:
    switch (Node->getStmtClass()) {
    // if, else if, else are handled in TraverseIfStmt(),
    // FIXME: "each method in a recursion cycle" Increment is not implemented.
    case Stmt::ConditionalOperatorClass:
    case Stmt::SwitchStmtClass:
    case Stmt::ForStmtClass:
    case Stmt::CXXForRangeStmtClass:
    case Stmt::WhileStmtClass:
    case Stmt::DoStmtClass:
    case Stmt::CXXCatchStmtClass:
    case Stmt::GotoStmtClass:
    case Stmt::IndirectGotoStmtClass:
      Reasons |= CognitiveComplexity::Criteria::Increment;
      break;
    default:
      // break LABEL, continue LABEL increase cognitive complexity,
      // but they are not supported in C++ or C.
      // Regular break/continue do not increase cognitive complexity.
      break;
    }

    // B2. Nesting level
    // The following structures increment the nesting level:
    switch (Node->getStmtClass()) {
    // if, else if, else are handled in TraverseIfStmt(),
    // Nested methods and such are handled in TraverseDecl.
    case Stmt::ConditionalOperatorClass:
    case Stmt::SwitchStmtClass:
    case Stmt::ForStmtClass:
    case Stmt::CXXForRangeStmtClass:
    case Stmt::WhileStmtClass:
    case Stmt::DoStmtClass:
    case Stmt::CXXCatchStmtClass:
    case Stmt::LambdaExprClass:
    case Stmt::StmtExprClass:
      Reasons |= CognitiveComplexity::Criteria::IncrementNesting;
      break;
    default:
      break;
    }

    // B3. Nesting increments
    // The following structures receive a nesting increment
    // commensurate with their nested depth inside B2 structures:
    switch (Node->getStmtClass()) {
    // if, else if, else are handled in TraverseIfStmt().
    case Stmt::ConditionalOperatorClass:
    case Stmt::SwitchStmtClass:
    case Stmt::ForStmtClass:
    case Stmt::CXXForRangeStmtClass:
    case Stmt::WhileStmtClass:
    case Stmt::DoStmtClass:
    case Stmt::CXXCatchStmtClass:
      Reasons |= CognitiveComplexity::Criteria::PenalizeNesting;
      break;
    default:
      break;
    }

    if (Node->getStmtClass() == Stmt::ConditionalOperatorClass) {
      // A little beautification.
      // For conditional operator "cond ? true : false" point at the "?"
      // symbol.
      ConditionalOperator *COp = dyn_cast<ConditionalOperator>(Node);
      Location = COp->getQuestionLoc();
    }

    // If we have found any reasons, let's account it.
    if (Reasons & CognitiveComplexity::Criteria::All)
      CC.account(Location, CurrentNestingLevel, Reasons);

    // Did we decide that the nesting level should be increased?
    if (!(Reasons & CognitiveComplexity::Criteria::IncrementNesting))
      return Base::TraverseStmt(Node);

    return TraverseStmtWithIncreasedNestingLevel(Node);
  }

  // The parameter MainAnalyzedFunction is needed to differentiate between the
  // cases where TraverseDecl() is the entry point from
  // FunctionCognitiveComplexityCheck::check() and the cases where it was called
  // from the FunctionASTVisitor itself. Explanation: if we get a function
  // definition (e.g. constructor, destructor, method), the Cognitive Complexity
  // specification states that the Nesting level shall be increased. But if this
  // function is the entry point, then the Nesting level should not be
  // increased. Thus that parameter is there and is used to fall-through
  // directly to traversing if this is the main function that is being analyzed.
  bool TraverseDecl(Decl *Node, bool MainAnalyzedFunction = false) {
    if (!Node || MainAnalyzedFunction)
      return Base::TraverseDecl(Node);

    // B2. Nesting level
    // The following structures increment the nesting level:
    switch (Node->getKind()) {
    case Decl::Function:
    case Decl::CXXMethod:
    case Decl::CXXConstructor:
    case Decl::CXXDestructor:
    case Decl::Block:
      break;
    default:
      // If this is something else, we use early return!
      return Base::TraverseDecl(Node);
      break;
    }

    CC.account(Node->getBeginLoc(), CurrentNestingLevel,
               CognitiveComplexity::Criteria::IncrementNesting);

    return TraverseDeclWithIncreasedNestingLevel(Node);
  }

  CognitiveComplexity CC;
};

} // namespace

FunctionCognitiveComplexityCheck::FunctionCognitiveComplexityCheck(
    StringRef Name, ClangTidyContext *Context)
    : ClangTidyCheck(Name, Context),
      Threshold(Options.get("Threshold", CognitiveComplexity::DefaultLimit)) {}

void FunctionCognitiveComplexityCheck::storeOptions(
    ClangTidyOptions::OptionMap &Opts) {
  Options.store(Opts, "Threshold", Threshold);
}

void FunctionCognitiveComplexityCheck::registerMatchers(MatchFinder *Finder) {
  Finder->addMatcher(
      functionDecl(
          allOf(isDefinition(), unless(anyOf(isDefaulted(), isDeleted(),
                                             isImplicit(), isInstantiated()))))
          .bind("func"),
      this);
}

void FunctionCognitiveComplexityCheck::check(
    const MatchFinder::MatchResult &Result) {
  const auto *Func = Result.Nodes.getNodeAs<FunctionDecl>("func");
  assert(Func->hasBody() && "The matchers should only match the functions that "
                            "have user-provided body.");

  FunctionASTVisitor Visitor;
  Visitor.TraverseDecl(const_cast<FunctionDecl *>(Func), true);

  if (Visitor.CC.Total <= Threshold)
    return;

  diag(Func->getLocation(),
       "function %0 has cognitive complexity of %1 (threshold %2)")
      << Func << Visitor.CC.Total << Threshold;

  // Output all the basic increments of complexity.
  for (const auto &Detail : Visitor.CC.Details) {
    unsigned MsgId;          // The id of the message to output.
    unsigned short Increase; // How much of an increment?
    std::tie(MsgId, Increase) = Detail.process();
    assert(MsgId < Msgs.size() && "MsgId should always be valid");
    // Increase, on the other hand, can be 0.

    diag(Detail.Loc, Msgs[MsgId], DiagnosticIDs::Note)
        << (unsigned)Increase << (unsigned)Detail.Nesting << 1 + Detail.Nesting;
  }
}

} // namespace readability
} // namespace tidy
} // namespace clang