SIRemoveShortExecBranches.cpp 4.96 KB
//===-- SIRemoveShortExecBranches.cpp ------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
//
/// \file
/// This pass optmizes the s_cbranch_execz instructions.
/// The pass removes this skip instruction for short branches,
/// if there is no unwanted sideeffect in the fallthrough code sequence.
///
//===----------------------------------------------------------------------===//

#include "AMDGPU.h"
#include "AMDGPUSubtarget.h"
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
#include "SIInstrInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/Support/CommandLine.h"

using namespace llvm;

#define DEBUG_TYPE "si-remove-short-exec-branches"

static unsigned SkipThreshold;

static cl::opt<unsigned, true> SkipThresholdFlag(
    "amdgpu-skip-threshold", cl::Hidden,
    cl::desc(
        "Number of instructions before jumping over divergent control flow"),
    cl::location(SkipThreshold), cl::init(12));

namespace {

class SIRemoveShortExecBranches : public MachineFunctionPass {
private:
  const SIInstrInfo *TII = nullptr;
  bool getBlockDestinations(MachineBasicBlock &SrcMBB,
                            MachineBasicBlock *&TrueMBB,
                            MachineBasicBlock *&FalseMBB,
                            SmallVectorImpl<MachineOperand> &Cond);
  bool mustRetainExeczBranch(const MachineBasicBlock &From,
                             const MachineBasicBlock &To) const;
  bool removeExeczBranch(MachineInstr &MI, MachineBasicBlock &SrcMBB);

public:
  static char ID;

  SIRemoveShortExecBranches() : MachineFunctionPass(ID) {
    initializeSIRemoveShortExecBranchesPass(*PassRegistry::getPassRegistry());
  }

  bool runOnMachineFunction(MachineFunction &MF) override;
};

} // End anonymous namespace.

INITIALIZE_PASS(SIRemoveShortExecBranches, DEBUG_TYPE,
                "SI remove short exec branches", false, false)

char SIRemoveShortExecBranches::ID = 0;

char &llvm::SIRemoveShortExecBranchesID = SIRemoveShortExecBranches::ID;

bool SIRemoveShortExecBranches::getBlockDestinations(
    MachineBasicBlock &SrcMBB, MachineBasicBlock *&TrueMBB,
    MachineBasicBlock *&FalseMBB, SmallVectorImpl<MachineOperand> &Cond) {
  if (TII->analyzeBranch(SrcMBB, TrueMBB, FalseMBB, Cond))
    return false;

  if (!FalseMBB)
    FalseMBB = SrcMBB.getNextNode();

  return true;
}

bool SIRemoveShortExecBranches::mustRetainExeczBranch(
    const MachineBasicBlock &From, const MachineBasicBlock &To) const {
  unsigned NumInstr = 0;
  const MachineFunction *MF = From.getParent();

  for (MachineFunction::const_iterator MBBI(&From), ToI(&To), End = MF->end();
       MBBI != End && MBBI != ToI; ++MBBI) {
    const MachineBasicBlock &MBB = *MBBI;

    for (MachineBasicBlock::const_iterator I = MBB.begin(), E = MBB.end();
         I != E; ++I) {
      // When a uniform loop is inside non-uniform control flow, the branch
      // leaving the loop might never be taken when EXEC = 0.
      // Hence we should retain cbranch out of the loop lest it become infinite.
      if (I->isConditionalBranch())
        return true;

      if (TII->hasUnwantedEffectsWhenEXECEmpty(*I))
        return true;

      if (TII->isKillTerminator(I->getOpcode()))
        return true;

      // These instructions are potentially expensive even if EXEC = 0.
      if (TII->isSMRD(*I) || TII->isVMEM(*I) || TII->isFLAT(*I) ||
          I->getOpcode() == AMDGPU::S_WAITCNT)
        return true;

      ++NumInstr;
      if (NumInstr >= SkipThreshold)
        return true;
    }
  }

  return false;
}

// Returns true if the skip branch instruction is removed.
bool SIRemoveShortExecBranches::removeExeczBranch(MachineInstr &MI,
                                                  MachineBasicBlock &SrcMBB) {
  MachineBasicBlock *TrueMBB = nullptr;
  MachineBasicBlock *FalseMBB = nullptr;
  SmallVector<MachineOperand, 1> Cond;

  if (!getBlockDestinations(SrcMBB, TrueMBB, FalseMBB, Cond))
    return false;

  // Consider only the forward branches.
  if ((SrcMBB.getNumber() >= TrueMBB->getNumber()) ||
      mustRetainExeczBranch(*FalseMBB, *TrueMBB))
    return false;

  LLVM_DEBUG(dbgs() << "Removing the execz branch: " << MI);
  MI.eraseFromParent();
  SrcMBB.removeSuccessor(TrueMBB);

  return true;
}

bool SIRemoveShortExecBranches::runOnMachineFunction(MachineFunction &MF) {
  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
  TII = ST.getInstrInfo();
  MF.RenumberBlocks();
  bool Changed = false;

  for (MachineBasicBlock &MBB : MF) {
    MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
    if (MBBI == MBB.end())
      continue;

    MachineInstr &MI = *MBBI;
    switch (MI.getOpcode()) {
    case AMDGPU::S_CBRANCH_EXECZ:
      Changed = removeExeczBranch(MI, MBB);
      break;
    default:
      break;
    }
  }

  return Changed;
}