StringIntegerAssignmentCheck.cpp 6.51 KB
//===--- StringIntegerAssignmentCheck.cpp - clang-tidy---------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "StringIntegerAssignmentCheck.h"
#include "clang/AST/ASTContext.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Lex/Lexer.h"

using namespace clang::ast_matchers;

namespace clang {
namespace tidy {
namespace bugprone {

void StringIntegerAssignmentCheck::registerMatchers(MatchFinder *Finder) {
  Finder->addMatcher(
      cxxOperatorCallExpr(
          hasAnyOverloadedOperatorName("=", "+="),
          callee(cxxMethodDecl(ofClass(classTemplateSpecializationDecl(
              hasName("::std::basic_string"),
              hasTemplateArgument(0, refersToType(hasCanonicalType(
                                         qualType().bind("type")))))))),
          hasArgument(
              1,
              ignoringImpCasts(
                  expr(hasType(isInteger()), unless(hasType(isAnyCharacter())),
                       // Ignore calls to tolower/toupper (see PR27723).
                       unless(callExpr(callee(functionDecl(
                           hasAnyName("tolower", "std::tolower", "toupper",
                                      "std::toupper"))))),
                       // Do not warn if assigning e.g. `CodePoint` to
                       // `basic_string<CodePoint>`
                       unless(hasType(qualType(
                           hasCanonicalType(equalsBoundNode("type"))))))
                      .bind("expr"))),
          unless(isInTemplateInstantiation())),
      this);
}

class CharExpressionDetector {
public:
  CharExpressionDetector(QualType CharType, const ASTContext &Ctx)
      : CharType(CharType), Ctx(Ctx) {}

  bool isLikelyCharExpression(const Expr *E) const {
    if (isCharTyped(E))
      return true;

    if (const auto *BinOp = dyn_cast<BinaryOperator>(E)) {
      const auto *LHS = BinOp->getLHS()->IgnoreParenImpCasts();
      const auto *RHS = BinOp->getRHS()->IgnoreParenImpCasts();
      // Handle both directions, e.g. `'a' + (i % 26)` and `(i % 26) + 'a'`.
      if (BinOp->isAdditiveOp() || BinOp->isBitwiseOp())
        return handleBinaryOp(BinOp->getOpcode(), LHS, RHS) ||
               handleBinaryOp(BinOp->getOpcode(), RHS, LHS);
      // Except in the case of '%'.
      if (BinOp->getOpcode() == BO_Rem)
        return handleBinaryOp(BinOp->getOpcode(), LHS, RHS);
      return false;
    }

    // Ternary where at least one branch is a likely char expression, e.g.
    //    i < 265 ? i : ' '
    if (const auto *CondOp = dyn_cast<AbstractConditionalOperator>(E))
      return isLikelyCharExpression(
                 CondOp->getFalseExpr()->IgnoreParenImpCasts()) ||
             isLikelyCharExpression(
                 CondOp->getTrueExpr()->IgnoreParenImpCasts());
    return false;
  }

private:
  bool handleBinaryOp(clang::BinaryOperatorKind Opcode, const Expr *const LHS,
                      const Expr *const RHS) const {
    // <char_expr> <op> <char_expr> (c++ integer promotion rules make this an
    // int), e.g.
    //    'a' + c
    if (isCharTyped(LHS) && isCharTyped(RHS))
      return true;

    // <expr> & <char_valued_constant> or <expr> % <char_valued_constant>, e.g.
    //    i & 0xff
    if ((Opcode == BO_And || Opcode == BO_Rem) && isCharValuedConstant(RHS))
      return true;

    // <char_expr> | <char_valued_constant>, e.g.
    //    c | 0x80
    if (Opcode == BO_Or && isCharTyped(LHS) && isCharValuedConstant(RHS))
      return true;

    // <char_constant> + <likely_char_expr>, e.g.
    //    'a' + (i % 26)
    if (Opcode == BO_Add)
      return isCharConstant(LHS) && isLikelyCharExpression(RHS);

    return false;
  }

  // Returns true if `E` is an character constant.
  bool isCharConstant(const Expr *E) const {
    return isCharTyped(E) && isCharValuedConstant(E);
  };

  // Returns true if `E` is an integer constant which fits in `CharType`.
  bool isCharValuedConstant(const Expr *E) const {
    if (E->isInstantiationDependent())
      return false;
    Expr::EvalResult EvalResult;
    if (!E->EvaluateAsInt(EvalResult, Ctx, Expr::SE_AllowSideEffects))
      return false;
    return EvalResult.Val.getInt().getActiveBits() <= Ctx.getTypeSize(CharType);
  };

  // Returns true if `E` has the right character type.
  bool isCharTyped(const Expr *E) const {
    return E->getType().getCanonicalType().getTypePtr() ==
           CharType.getTypePtr();
  };

  const QualType CharType;
  const ASTContext &Ctx;
};

void StringIntegerAssignmentCheck::check(
    const MatchFinder::MatchResult &Result) {
  const auto *Argument = Result.Nodes.getNodeAs<Expr>("expr");
  const auto CharType =
      Result.Nodes.getNodeAs<QualType>("type")->getCanonicalType();
  SourceLocation Loc = Argument->getBeginLoc();

  // Try to detect a few common expressions to reduce false positives.
  if (CharExpressionDetector(CharType, *Result.Context)
          .isLikelyCharExpression(Argument))
    return;

  auto Diag =
      diag(Loc, "an integer is interpreted as a character code when assigning "
                "it to a string; if this is intended, cast the integer to the "
                "appropriate character type; if you want a string "
                "representation, use the appropriate conversion facility");

  if (Loc.isMacroID())
    return;

  bool IsWideCharType = CharType->isWideCharType();
  if (!CharType->isCharType() && !IsWideCharType)
    return;
  bool IsOneDigit = false;
  bool IsLiteral = false;
  if (const auto *Literal = dyn_cast<IntegerLiteral>(Argument)) {
    IsOneDigit = Literal->getValue().getLimitedValue() < 10;
    IsLiteral = true;
  }

  SourceLocation EndLoc = Lexer::getLocForEndOfToken(
      Argument->getEndLoc(), 0, *Result.SourceManager, getLangOpts());
  if (IsOneDigit) {
    Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L'" : "'")
         << FixItHint::CreateInsertion(EndLoc, "'");
    return;
  }
  if (IsLiteral) {
    Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "L\"" : "\"")
         << FixItHint::CreateInsertion(EndLoc, "\"");
    return;
  }

  if (getLangOpts().CPlusPlus11) {
    Diag << FixItHint::CreateInsertion(Loc, IsWideCharType ? "std::to_wstring("
                                                           : "std::to_string(")
         << FixItHint::CreateInsertion(EndLoc, ")");
  }
}

} // namespace bugprone
} // namespace tidy
} // namespace clang