Parsing.cpp 9.55 KB
//===--- Parsing.cpp - Parsing function implementations ---------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "clang/Tooling/Transformer/Parsing.h"
#include "clang/AST/Expr.h"
#include "clang/ASTMatchers/ASTMatchFinder.h"
#include "clang/Basic/CharInfo.h"
#include "clang/Basic/SourceLocation.h"
#include "clang/Lex/Lexer.h"
#include "clang/Tooling/Transformer/RangeSelector.h"
#include "clang/Tooling/Transformer/SourceCode.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/Errc.h"
#include "llvm/Support/Error.h"
#include <string>
#include <utility>
#include <vector>

using namespace clang;
using namespace transformer;

// FIXME: This implementation is entirely separate from that of the AST
// matchers. Given the similarity of the languages and uses of the two parsers,
// the two should share a common parsing infrastructure, as should other
// Transformer types. We intend to unify this implementation soon to share as
// much as possible with the AST Matchers parsing.

namespace {
using llvm::Error;
using llvm::Expected;

template <typename... Ts> using RangeSelectorOp = RangeSelector (*)(Ts...);

struct ParseState {
  // The remaining input to be processed.
  StringRef Input;
  // The original input. Not modified during parsing; only for reference in
  // error reporting.
  StringRef OriginalInput;
};

// Represents an intermediate result returned by a parsing function. Functions
// that don't generate values should use `llvm::None`
template <typename ResultType> struct ParseProgress {
  ParseState State;
  // Intermediate result generated by the Parser.
  ResultType Value;
};

template <typename T> using ExpectedProgress = llvm::Expected<ParseProgress<T>>;
template <typename T> using ParseFunction = ExpectedProgress<T> (*)(ParseState);

class ParseError : public llvm::ErrorInfo<ParseError> {
public:
  // Required field for all ErrorInfo derivatives.
  static char ID;

  ParseError(size_t Pos, std::string ErrorMsg, std::string InputExcerpt)
      : Pos(Pos), ErrorMsg(std::move(ErrorMsg)),
        Excerpt(std::move(InputExcerpt)) {}

  void log(llvm::raw_ostream &OS) const override {
    OS << "parse error at position (" << Pos << "): " << ErrorMsg
       << ": " + Excerpt;
  }

  std::error_code convertToErrorCode() const override {
    return llvm::inconvertibleErrorCode();
  }

  // Position of the error in the input string.
  size_t Pos;
  std::string ErrorMsg;
  // Excerpt of the input starting at the error position.
  std::string Excerpt;
};

char ParseError::ID;
} // namespace

static const llvm::StringMap<RangeSelectorOp<std::string>> &
getUnaryStringSelectors() {
  static const llvm::StringMap<RangeSelectorOp<std::string>> M = {
      {"name", name},
      {"node", node},
      {"statement", statement},
      {"statements", statements},
      {"member", member},
      {"callArgs", callArgs},
      {"elseBranch", elseBranch},
      {"initListElements", initListElements}};
  return M;
}

static const llvm::StringMap<RangeSelectorOp<RangeSelector>> &
getUnaryRangeSelectors() {
  static const llvm::StringMap<RangeSelectorOp<RangeSelector>> M = {
      {"before", before}, {"after", after}, {"expansion", expansion}};
  return M;
}

static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> &
getBinaryStringSelectors() {
  static const llvm::StringMap<RangeSelectorOp<std::string, std::string>> M = {
      {"encloseNodes", encloseNodes}};
  return M;
}

static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>> &
getBinaryRangeSelectors() {
  static const llvm::StringMap<RangeSelectorOp<RangeSelector, RangeSelector>>
      M = {{"enclose", enclose}, {"between", between}};
  return M;
}

template <typename Element>
llvm::Optional<Element> findOptional(const llvm::StringMap<Element> &Map,
                                     llvm::StringRef Key) {
  auto it = Map.find(Key);
  if (it == Map.end())
    return llvm::None;
  return it->second;
}

template <typename ResultType>
ParseProgress<ResultType> makeParseProgress(ParseState State,
                                            ResultType Result) {
  return ParseProgress<ResultType>{State, std::move(Result)};
}

static llvm::Error makeParseError(const ParseState &S, std::string ErrorMsg) {
  size_t Pos = S.OriginalInput.size() - S.Input.size();
  return llvm::make_error<ParseError>(Pos, std::move(ErrorMsg),
                                      S.OriginalInput.substr(Pos, 20).str());
}

// Returns a new ParseState that advances \c S by \c N characters.
static ParseState advance(ParseState S, size_t N) {
  S.Input = S.Input.drop_front(N);
  return S;
}

static StringRef consumeWhitespace(StringRef S) {
  return S.drop_while([](char c) { return isASCII(c) && isWhitespace(c); });
}

// Parses a single expected character \c c from \c State, skipping preceding
// whitespace.  Error if the expected character isn't found.
static ExpectedProgress<llvm::NoneType> parseChar(char c, ParseState State) {
  State.Input = consumeWhitespace(State.Input);
  if (State.Input.empty() || State.Input.front() != c)
    return makeParseError(State,
                          ("expected char not found: " + llvm::Twine(c)).str());
  return makeParseProgress(advance(State, 1), llvm::None);
}

// Parses an identitifer "token" -- handles preceding whitespace.
static ExpectedProgress<std::string> parseId(ParseState State) {
  State.Input = consumeWhitespace(State.Input);
  auto Id = State.Input.take_while(
      [](char c) { return isASCII(c) && isIdentifierBody(c); });
  if (Id.empty())
    return makeParseError(State, "failed to parse name");
  return makeParseProgress(advance(State, Id.size()), Id.str());
}

// For consistency with the AST matcher parser and C++ code, node ids are
// written as strings. However, we do not support escaping in the string.
static ExpectedProgress<std::string> parseStringId(ParseState State) {
  State.Input = consumeWhitespace(State.Input);
  if (State.Input.empty())
    return makeParseError(State, "unexpected end of input");
  if (!State.Input.consume_front("\""))
    return makeParseError(
        State,
        "expecting string, but encountered other character or end of input");

  StringRef Id = State.Input.take_until([](char c) { return c == '"'; });
  if (State.Input.size() == Id.size())
    return makeParseError(State, "unterminated string");
  // Advance past the trailing quote as well.
  return makeParseProgress(advance(State, Id.size() + 1), Id.str());
}

// Parses a single element surrounded by parens. `Op` is applied to the parsed
// result to create the result of this function call.
template <typename T>
ExpectedProgress<RangeSelector> parseSingle(ParseFunction<T> ParseElement,
                                            RangeSelectorOp<T> Op,
                                            ParseState State) {
  auto P = parseChar('(', State);
  if (!P)
    return P.takeError();

  auto E = ParseElement(P->State);
  if (!E)
    return E.takeError();

  P = parseChar(')', E->State);
  if (!P)
    return P.takeError();

  return makeParseProgress(P->State, Op(std::move(E->Value)));
}

// Parses a pair of elements surrounded by parens and separated by comma. `Op`
// is applied to the parsed results to create the result of this function call.
template <typename T>
ExpectedProgress<RangeSelector> parsePair(ParseFunction<T> ParseElement,
                                          RangeSelectorOp<T, T> Op,
                                          ParseState State) {
  auto P = parseChar('(', State);
  if (!P)
    return P.takeError();

  auto Left = ParseElement(P->State);
  if (!Left)
    return Left.takeError();

  P = parseChar(',', Left->State);
  if (!P)
    return P.takeError();

  auto Right = ParseElement(P->State);
  if (!Right)
    return Right.takeError();

  P = parseChar(')', Right->State);
  if (!P)
    return P.takeError();

  return makeParseProgress(P->State,
                           Op(std::move(Left->Value), std::move(Right->Value)));
}

// Parses input for a stencil operator(single arg ops like AsValue, MemberOp or
// Id operator). Returns StencilType representing the operator on success and
// error if it fails to parse input for an operator.
static ExpectedProgress<RangeSelector>
parseRangeSelectorImpl(ParseState State) {
  auto Id = parseId(State);
  if (!Id)
    return Id.takeError();

  std::string OpName = std::move(Id->Value);
  if (auto Op = findOptional(getUnaryStringSelectors(), OpName))
    return parseSingle(parseStringId, *Op, Id->State);

  if (auto Op = findOptional(getUnaryRangeSelectors(), OpName))
    return parseSingle(parseRangeSelectorImpl, *Op, Id->State);

  if (auto Op = findOptional(getBinaryStringSelectors(), OpName))
    return parsePair(parseStringId, *Op, Id->State);

  if (auto Op = findOptional(getBinaryRangeSelectors(), OpName))
    return parsePair(parseRangeSelectorImpl, *Op, Id->State);

  return makeParseError(State, "unknown selector name: " + OpName);
}

Expected<RangeSelector> transformer::parseRangeSelector(llvm::StringRef Input) {
  ParseState State = {Input, Input};
  ExpectedProgress<RangeSelector> Result = parseRangeSelectorImpl(State);
  if (!Result)
    return Result.takeError();
  State = Result->State;
  // Discard any potentially trailing whitespace.
  State.Input = consumeWhitespace(State.Input);
  if (State.Input.empty())
    return Result->Value;
  return makeParseError(State, "unexpected input after selector");
}