FileMatchTrie.cpp 7.45 KB
//===- FileMatchTrie.cpp --------------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
//  This file contains the implementation of a FileMatchTrie.
//
//===----------------------------------------------------------------------===//

#include "clang/Tooling/FileMatchTrie.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/Support/FileSystem.h"
#include "llvm/Support/Path.h"
#include "llvm/Support/raw_ostream.h"
#include <string>
#include <vector>

using namespace clang;
using namespace tooling;

namespace {

/// Default \c PathComparator using \c llvm::sys::fs::equivalent().
struct DefaultPathComparator : public PathComparator {
  bool equivalent(StringRef FileA, StringRef FileB) const override {
    return FileA == FileB || llvm::sys::fs::equivalent(FileA, FileB);
  }
};

} // namespace

namespace clang {
namespace tooling {

/// A node of the \c FileMatchTrie.
///
/// Each node has storage for up to one path and a map mapping a path segment to
/// child nodes. The trie starts with an empty root node.
class FileMatchTrieNode {
public:
  /// Inserts 'NewPath' into this trie. \c ConsumedLength denotes
  /// the number of \c NewPath's trailing characters already consumed during
  /// recursion.
  ///
  /// An insert of a path
  /// 'p'starts at the root node and does the following:
  /// - If the node is empty, insert 'p' into its storage and abort.
  /// - If the node has a path 'p2' but no children, take the last path segment
  ///   's' of 'p2', put a new child into the map at 's' an insert the rest of
  ///   'p2' there.
  /// - Insert a new child for the last segment of 'p' and insert the rest of
  ///   'p' there.
  ///
  /// An insert operation is linear in the number of a path's segments.
  void insert(StringRef NewPath, unsigned ConsumedLength = 0) {
    // We cannot put relative paths into the FileMatchTrie as then a path can be
    // a postfix of another path, violating a core assumption of the trie.
    if (llvm::sys::path::is_relative(NewPath))
      return;
    if (Path.empty()) {
      // This is an empty leaf. Store NewPath and return.
      Path = std::string(NewPath);
      return;
    }
    if (Children.empty()) {
      // This is a leaf, ignore duplicate entry if 'Path' equals 'NewPath'.
      if (NewPath == Path)
          return;
      // Make this a node and create a child-leaf with 'Path'.
      StringRef Element(llvm::sys::path::filename(
          StringRef(Path).drop_back(ConsumedLength)));
      Children[Element].Path = Path;
    }
    StringRef Element(llvm::sys::path::filename(
          StringRef(NewPath).drop_back(ConsumedLength)));
    Children[Element].insert(NewPath, ConsumedLength + Element.size() + 1);
  }

  /// Tries to find the node under this \c FileMatchTrieNode that best
  /// matches 'FileName'.
  ///
  /// If multiple paths fit 'FileName' equally well, \c IsAmbiguous is set to
  /// \c true and an empty string is returned. If no path fits 'FileName', an
  /// empty string is returned. \c ConsumedLength denotes the number of
  /// \c Filename's trailing characters already consumed during recursion.
  ///
  /// To find the best matching node for a given path 'p', the
  /// \c findEquivalent() function is called recursively for each path segment
  /// (back to front) of 'p' until a node 'n' is reached that does not ..
  /// - .. have children. In this case it is checked
  ///   whether the stored path is equivalent to 'p'. If yes, the best match is
  ///   found. Otherwise continue with the parent node as if this node did not
  ///   exist.
  /// - .. a child matching the next path segment. In this case, all children of
  ///   'n' are an equally good match for 'p'. All children are of 'n' are found
  ///   recursively and their equivalence to 'p' is determined. If none are
  ///   equivalent, continue with the parent node as if 'n' didn't exist. If one
  ///   is equivalent, the best match is found. Otherwise, report and ambigiuity
  ///   error.
  StringRef findEquivalent(const PathComparator& Comparator,
                           StringRef FileName,
                           bool &IsAmbiguous,
                           unsigned ConsumedLength = 0) const {
    // Note: we support only directory symlinks for performance reasons.
    if (Children.empty()) {
      // As far as we do not support file symlinks, compare
      // basenames here to avoid request to file system.
      if (llvm::sys::path::filename(Path) ==
              llvm::sys::path::filename(FileName) &&
          Comparator.equivalent(StringRef(Path), FileName))
        return StringRef(Path);
      return {};
    }
    StringRef Element(llvm::sys::path::filename(FileName.drop_back(
        ConsumedLength)));
    llvm::StringMap<FileMatchTrieNode>::const_iterator MatchingChild =
        Children.find(Element);
    if (MatchingChild != Children.end()) {
      StringRef Result = MatchingChild->getValue().findEquivalent(
          Comparator, FileName, IsAmbiguous,
          ConsumedLength + Element.size() + 1);
      if (!Result.empty() || IsAmbiguous)
        return Result;
    }

    // If `ConsumedLength` is zero, this is the root and we have no filename
    // match. Give up in this case, we don't try to find symlinks with
    // different names.
    if (ConsumedLength == 0)
      return {};

    std::vector<StringRef> AllChildren;
    getAll(AllChildren, MatchingChild);
    StringRef Result;
    for (const auto &Child : AllChildren) {
      if (Comparator.equivalent(Child, FileName)) {
        if (Result.empty()) {
          Result = Child;
        } else {
          IsAmbiguous = true;
          return {};
        }
      }
    }
    return Result;
  }

private:
  /// Gets all paths under this FileMatchTrieNode.
  void getAll(std::vector<StringRef> &Results,
              llvm::StringMap<FileMatchTrieNode>::const_iterator Except) const {
    if (Path.empty())
      return;
    if (Children.empty()) {
      Results.push_back(StringRef(Path));
      return;
    }
    for (llvm::StringMap<FileMatchTrieNode>::const_iterator
         It = Children.begin(), E = Children.end();
         It != E; ++It) {
      if (It == Except)
        continue;
      It->getValue().getAll(Results, Children.end());
    }
  }

  // The stored absolute path in this node. Only valid for leaf nodes, i.e.
  // nodes where Children.empty().
  std::string Path;

  // The children of this node stored in a map based on the next path segment.
  llvm::StringMap<FileMatchTrieNode> Children;
};

} // namespace tooling
} // namespace clang

FileMatchTrie::FileMatchTrie()
    : Root(new FileMatchTrieNode), Comparator(new DefaultPathComparator()) {}

FileMatchTrie::FileMatchTrie(PathComparator *Comparator)
    : Root(new FileMatchTrieNode), Comparator(Comparator) {}

FileMatchTrie::~FileMatchTrie() {
  delete Root;
}

void FileMatchTrie::insert(StringRef NewPath) {
  Root->insert(NewPath);
}

StringRef FileMatchTrie::findEquivalent(StringRef FileName,
                                        raw_ostream &Error) const {
  if (llvm::sys::path::is_relative(FileName)) {
    Error << "Cannot resolve relative paths";
    return {};
  }
  bool IsAmbiguous = false;
  StringRef Result = Root->findEquivalent(*Comparator, FileName, IsAmbiguous);
  if (IsAmbiguous)
    Error << "Path is ambiguous";
  return Result;
}