doxygen/AMDGPULateCodeGenPrepare_8cpp_source.html

//===-- AMDGPUCodeGenPrepare.cpp ------------------------------------------===//

//

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

// See https://llvm.org/LICENSE.txt for license information.

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

//

//===----------------------------------------------------------------------===//

//

/// \file

/// This pass does misc. AMDGPU optimizations on IR *just* before instruction

/// selection.

//

//===----------------------------------------------------------------------===//


#include "AMDGPU.h"

#include "AMDGPUTargetMachine.h"

#include "llvm/Analysis/AssumptionCache.h"

#include "llvm/Analysis/UniformityAnalysis.h"

#include "llvm/Analysis/ValueTracking.h"

#include "llvm/CodeGen/TargetPassConfig.h"

#include "llvm/IR/IRBuilder.h"

#include "llvm/IR/InstVisitor.h"

#include "llvm/Support/CommandLine.h"

#include "llvm/Support/KnownBits.h"

#include "llvm/Transforms/Utils/Local.h"


#define DEBUG_TYPE "amdgpu-late-codegenprepare"


using namespace llvm;


// Scalar load widening needs running after load-store-vectorizer as that pass

// doesn't handle overlapping cases. In addition, this pass enhances the

// widening to handle cases where scalar sub-dword loads are naturally aligned

// only but not dword aligned.

static cl::opt<bool>

    WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads",

               cl::desc("Widen sub-dword constant address space loads in "

                        "AMDGPULateCodeGenPrepare"),

               cl::ReallyHidden, cl::init(true));


namespace {


class AMDGPULateCodeGenPrepare

    : public InstVisitor<AMDGPULateCodeGenPrepare, bool> {

  Function &F;

  const DataLayout &DL;

  const GCNSubtarget &ST;


  AssumptionCache *const AC;

  UniformityInfo &UA;


  SmallVector<WeakTrackingVH, 8> DeadInsts;


public:

  AMDGPULateCodeGenPrepare(Function &F, const GCNSubtarget &ST,

                           AssumptionCache *AC, UniformityInfo &UA)

      : F(F), DL(F.getDataLayout()), ST(ST), AC(AC), UA(UA) {}

  bool run();

  bool visitInstruction(Instruction &) { return false; }


  // Check if the specified value is at least DWORD aligned.

  bool isDWORDAligned(const Value *V) const {

    KnownBits Known = computeKnownBits(V, DL, 0, AC);

    return Known.countMinTrailingZeros() >= 2;

  }


  bool canWidenScalarExtLoad(LoadInst &LI) const;

  bool visitLoadInst(LoadInst &LI);

};


using ValueToValueMap = DenseMap<const Value *, Value *>;


class LiveRegOptimizer {

private:

  Module &Mod;

  const DataLayout &DL;

  const GCNSubtarget &ST;

  /// The scalar type to convert to

  Type *const ConvertToScalar;

  /// The set of visited Instructions

  SmallPtrSet<Instruction *, 4> Visited;

  /// Map of Value -> Converted Value

  ValueToValueMap ValMap;

  /// Map of containing conversions from Optimal Type -> Original Type per BB.

  DenseMap<BasicBlock *, ValueToValueMap> BBUseValMap;


public:

  /// Calculate the and \p return  the type to convert to given a problematic \p

  /// OriginalType. In some instances, we may widen the type (e.g. v2i8 -> i32).

  Type *calculateConvertType(Type *OriginalType);

  /// Convert the virtual register defined by \p V to the compatible vector of

  /// legal type

  Value *convertToOptType(Instruction *V, BasicBlock::iterator &InstPt);

  /// Convert the virtual register defined by \p V back to the original type \p

  /// ConvertType, stripping away the MSBs in cases where there was an imperfect

  /// fit (e.g. v2i32 -> v7i8)

  Value *convertFromOptType(Type *ConvertType, Instruction *V,

                            BasicBlock::iterator &InstPt,

                            BasicBlock *InsertBlock);

  /// Check for problematic PHI nodes or cross-bb values based on the value

  /// defined by \p I, and coerce to legal types if necessary. For problematic

  /// PHI node, we coerce all incoming values in a single invocation.

  bool optimizeLiveType(Instruction *I,

                        SmallVectorImpl<WeakTrackingVH> &DeadInsts);


  // Whether or not the type should be replaced to avoid inefficient

  // legalization code

  bool shouldReplace(Type *ITy) {

    FixedVectorType *VTy = dyn_cast<FixedVectorType>(ITy);

    if (!VTy)

      return false;


    const auto *TLI = ST.getTargetLowering();


    Type *EltTy = VTy->getElementType();

    // If the element size is not less than the convert to scalar size, then we

    // can't do any bit packing

    if (!EltTy->isIntegerTy() ||

        EltTy->getScalarSizeInBits() > ConvertToScalar->getScalarSizeInBits())

      return false;


    // Only coerce illegal types

    TargetLoweringBase::LegalizeKind LK =

        TLI->getTypeConversion(EltTy->getContext(), EVT::getEVT(EltTy, false));

    return LK.first != TargetLoweringBase::TypeLegal;

  }


  LiveRegOptimizer(Module &Mod, const GCNSubtarget &ST)

      : Mod(Mod), DL(Mod.getDataLayout()), ST(ST),

        ConvertToScalar(Type::getInt32Ty(Mod.getContext())) {}

};


} // end anonymous namespace


bool AMDGPULateCodeGenPrepare::run() {

  // "Optimize" the virtual regs that cross basic block boundaries. When

  // building the SelectionDAG, vectors of illegal types that cross basic blocks

  // will be scalarized and widened, with each scalar living in its

  // own register. To work around this, this optimization converts the

  // vectors to equivalent vectors of legal type (which are converted back

  // before uses in subsequent blocks), to pack the bits into fewer physical

  // registers (used in CopyToReg/CopyFromReg pairs).

  LiveRegOptimizer LRO(*F.getParent(), ST);


  bool Changed = false;


  bool HasScalarSubwordLoads = ST.hasScalarSubwordLoads();


  for (auto &BB : reverse(F))

    for (Instruction &I : make_early_inc_range(reverse(BB))) {

      Changed |= !HasScalarSubwordLoads && visit(I);

      Changed |= LRO.optimizeLiveType(&I, DeadInsts);

    }


  RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts);

  return Changed;

}


Type *LiveRegOptimizer::calculateConvertType(Type *OriginalType) {

  assert(OriginalType->getScalarSizeInBits() <=

         ConvertToScalar->getScalarSizeInBits());


  FixedVectorType *VTy = cast<FixedVectorType>(OriginalType);


  TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);

  TypeSize ConvertScalarSize = DL.getTypeSizeInBits(ConvertToScalar);

  unsigned ConvertEltCount =

      (OriginalSize + ConvertScalarSize - 1) / ConvertScalarSize;


  if (OriginalSize <= ConvertScalarSize)

    return IntegerType::get(Mod.getContext(), ConvertScalarSize);


  return VectorType::get(Type::getIntNTy(Mod.getContext(), ConvertScalarSize),

                         ConvertEltCount, false);

}


Value *LiveRegOptimizer::convertToOptType(Instruction *V,

                                          BasicBlock::iterator &InsertPt) {

  FixedVectorType *VTy = cast<FixedVectorType>(V->getType());

  Type *NewTy = calculateConvertType(V->getType());


  TypeSize OriginalSize = DL.getTypeSizeInBits(VTy);

  TypeSize NewSize = DL.getTypeSizeInBits(NewTy);


  IRBuilder<> Builder(V->getParent(), InsertPt);

  // If there is a bitsize match, we can fit the old vector into a new vector of

  // desired type.

  if (OriginalSize == NewSize)

    return Builder.CreateBitCast(V, NewTy, V->getName() + ".bc");


  // If there is a bitsize mismatch, we must use a wider vector.

  assert(NewSize > OriginalSize);

  uint64_t ExpandedVecElementCount = NewSize / VTy->getScalarSizeInBits();


  SmallVector<int, 8> ShuffleMask;

  uint64_t OriginalElementCount = VTy->getElementCount().getFixedValue();

  for (unsigned I = 0; I < OriginalElementCount; I++)

    ShuffleMask.push_back(I);


  for (uint64_t I = OriginalElementCount; I < ExpandedVecElementCount; I++)

    ShuffleMask.push_back(OriginalElementCount);


  Value *ExpandedVec = Builder.CreateShuffleVector(V, ShuffleMask);

  return Builder.CreateBitCast(ExpandedVec, NewTy, V->getName() + ".bc");

}


Value *LiveRegOptimizer::convertFromOptType(Type *ConvertType, Instruction *V,

                                            BasicBlock::iterator &InsertPt,

                                            BasicBlock *InsertBB) {

  FixedVectorType *NewVTy = cast<FixedVectorType>(ConvertType);


  TypeSize OriginalSize = DL.getTypeSizeInBits(V->getType());

  TypeSize NewSize = DL.getTypeSizeInBits(NewVTy);


  IRBuilder<> Builder(InsertBB, InsertPt);

  // If there is a bitsize match, we simply convert back to the original type.

  if (OriginalSize == NewSize)

    return Builder.CreateBitCast(V, NewVTy, V->getName() + ".bc");


  // If there is a bitsize mismatch, then we must have used a wider value to

  // hold the bits.

  assert(OriginalSize > NewSize);

  // For wide scalars, we can just truncate the value.

  if (!V->getType()->isVectorTy()) {

    Instruction *Trunc = cast<Instruction>(

        Builder.CreateTrunc(V, IntegerType::get(Mod.getContext(), NewSize)));

    return cast<Instruction>(Builder.CreateBitCast(Trunc, NewVTy));

  }


  // For wider vectors, we must strip the MSBs to convert back to the original

  // type.

  VectorType *ExpandedVT = VectorType::get(

      Type::getIntNTy(Mod.getContext(), NewVTy->getScalarSizeInBits()),

      (OriginalSize / NewVTy->getScalarSizeInBits()), false);

  Instruction *Converted =

      cast<Instruction>(Builder.CreateBitCast(V, ExpandedVT));


  unsigned NarrowElementCount = NewVTy->getElementCount().getFixedValue();

  SmallVector<int, 8> ShuffleMask(NarrowElementCount);

  std::iota(ShuffleMask.begin(), ShuffleMask.end(), 0);


  return Builder.CreateShuffleVector(Converted, ShuffleMask);

}


bool LiveRegOptimizer::optimizeLiveType(

    Instruction *I, SmallVectorImpl<WeakTrackingVH> &DeadInsts) {

  SmallVector<Instruction *, 4> Worklist;

  SmallPtrSet<PHINode *, 4> PhiNodes;

  SmallPtrSet<Instruction *, 4> Defs;

  SmallPtrSet<Instruction *, 4> Uses;


  Worklist.push_back(cast<Instruction>(I));

  while (!Worklist.empty()) {

    Instruction *II = Worklist.pop_back_val();


    if (!Visited.insert(II).second)

      continue;


    if (!shouldReplace(II->getType()))

      continue;


    if (PHINode *Phi = dyn_cast<PHINode>(II)) {

      PhiNodes.insert(Phi);

      // Collect all the incoming values of problematic PHI nodes.

      for (Value *V : Phi->incoming_values()) {

        // Repeat the collection process for newly found PHI nodes.

        if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {

          if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))

            Worklist.push_back(OpPhi);

          continue;

        }


        Instruction *IncInst = dyn_cast<Instruction>(V);

        // Other incoming value types (e.g. vector literals) are unhandled

        if (!IncInst && !isa<ConstantAggregateZero>(V))

          return false;


        // Collect all other incoming values for coercion.

        if (IncInst)

          Defs.insert(IncInst);

      }

    }


    // Collect all relevant uses.

    for (User *V : II->users()) {

      // Repeat the collection process for problematic PHI nodes.

      if (PHINode *OpPhi = dyn_cast<PHINode>(V)) {

        if (!PhiNodes.count(OpPhi) && !Visited.count(OpPhi))

          Worklist.push_back(OpPhi);

        continue;

      }


      Instruction *UseInst = cast<Instruction>(V);

      // Collect all uses of PHINodes and any use the crosses BB boundaries.

      if (UseInst->getParent() != II->getParent() || isa<PHINode>(II)) {

        Uses.insert(UseInst);

        if (!isa<PHINode>(II))

          Defs.insert(II);

      }

    }

  }


  // Coerce and track the defs.

  for (Instruction *D : Defs) {

    if (!ValMap.contains(D)) {

      BasicBlock::iterator InsertPt = std::next(D->getIterator());

      Value *ConvertVal = convertToOptType(D, InsertPt);

      assert(ConvertVal);

      ValMap[D] = ConvertVal;

    }

  }


  // Construct new-typed PHI nodes.

  for (PHINode *Phi : PhiNodes) {

    ValMap[Phi] = PHINode::Create(calculateConvertType(Phi->getType()),

                                  Phi->getNumIncomingValues(),

                                  Phi->getName() + ".tc", Phi->getIterator());

  }


  // Connect all the PHI nodes with their new incoming values.

  for (PHINode *Phi : PhiNodes) {

    PHINode *NewPhi = cast<PHINode>(ValMap[Phi]);

    bool MissingIncVal = false;

    for (int I = 0, E = Phi->getNumIncomingValues(); I < E; I++) {

      Value *IncVal = Phi->getIncomingValue(I);

      if (isa<ConstantAggregateZero>(IncVal)) {

        Type *NewType = calculateConvertType(Phi->getType());

        NewPhi->addIncoming(ConstantInt::get(NewType, 0, false),

                            Phi->getIncomingBlock(I));

      } else if (Value *Val = ValMap.lookup(IncVal))

        NewPhi->addIncoming(Val, Phi->getIncomingBlock(I));

      else

        MissingIncVal = true;

    }

    if (MissingIncVal) {

      Value *DeadVal = ValMap[Phi];

      // The coercion chain of the PHI is broken. Delete the Phi

      // from the ValMap and any connected / user Phis.

      SmallVector<Value *, 4> PHIWorklist;

      SmallPtrSet<Value *, 4> VisitedPhis;

      PHIWorklist.push_back(DeadVal);

      while (!PHIWorklist.empty()) {

        Value *NextDeadValue = PHIWorklist.pop_back_val();

        VisitedPhis.insert(NextDeadValue);

        auto OriginalPhi =

            std::find_if(PhiNodes.begin(), PhiNodes.end(),

                         [this, &NextDeadValue](PHINode *CandPhi) {

                           return ValMap[CandPhi] == NextDeadValue;

                         });

        // This PHI may have already been removed from maps when

        // unwinding a previous Phi

        if (OriginalPhi != PhiNodes.end())

          ValMap.erase(*OriginalPhi);


        DeadInsts.emplace_back(cast<Instruction>(NextDeadValue));


        for (User *U : NextDeadValue->users()) {

          if (!VisitedPhis.contains(cast<PHINode>(U)))

            PHIWorklist.push_back(U);

        }

      }

    } else {

      DeadInsts.emplace_back(cast<Instruction>(Phi));

    }

  }

  // Coerce back to the original type and replace the uses.

  for (Instruction *U : Uses) {

    // Replace all converted operands for a use.

    for (auto [OpIdx, Op] : enumerate(U->operands())) {

      if (ValMap.contains(Op) && ValMap[Op]) {

        Value *NewVal = nullptr;

        if (BBUseValMap.contains(U->getParent()) &&

            BBUseValMap[U->getParent()].contains(ValMap[Op]))

          NewVal = BBUseValMap[U->getParent()][ValMap[Op]];

        else {

          BasicBlock::iterator InsertPt = U->getParent()->getFirstNonPHIIt();

          // We may pick up ops that were previously converted for users in

          // other blocks. If there is an originally typed definition of the Op

          // already in this block, simply reuse it.

          if (isa<Instruction>(Op) && !isa<PHINode>(Op) &&

              U->getParent() == cast<Instruction>(Op)->getParent()) {

            NewVal = Op;

          } else {

            NewVal =

                convertFromOptType(Op->getType(), cast<Instruction>(ValMap[Op]),

                                   InsertPt, U->getParent());

            BBUseValMap[U->getParent()][ValMap[Op]] = NewVal;

          }

        }

        assert(NewVal);

        U->setOperand(OpIdx, NewVal);

      }

    }

  }


  return true;

}


bool AMDGPULateCodeGenPrepare::canWidenScalarExtLoad(LoadInst &LI) const {

  unsigned AS = LI.getPointerAddressSpace();

  // Skip non-constant address space.

  if (AS != AMDGPUAS::CONSTANT_ADDRESS &&

      AS != AMDGPUAS::CONSTANT_ADDRESS_32BIT)

    return false;

  // Skip non-simple loads.

  if (!LI.isSimple())

    return false;

  Type *Ty = LI.getType();

  // Skip aggregate types.

  if (Ty->isAggregateType())

    return false;

  unsigned TySize = DL.getTypeStoreSize(Ty);

  // Only handle sub-DWORD loads.

  if (TySize >= 4)

    return false;

  // That load must be at least naturally aligned.

  if (LI.getAlign() < DL.getABITypeAlign(Ty))

    return false;

  // It should be uniform, i.e. a scalar load.

  return UA.isUniform(&LI);

}


bool AMDGPULateCodeGenPrepare::visitLoadInst(LoadInst &LI) {

  if (!WidenLoads)

    return false;


  // Skip if that load is already aligned on DWORD at least as it's handled in

  // SDAG.

  if (LI.getAlign() >= 4)

    return false;


  if (!canWidenScalarExtLoad(LI))

    return false;


  int64_t Offset = 0;

  auto *Base =

      GetPointerBaseWithConstantOffset(LI.getPointerOperand(), Offset, DL);

  // If that base is not DWORD aligned, it's not safe to perform the following

  // transforms.

  if (!isDWORDAligned(Base))

    return false;


  int64_t Adjust = Offset & 0x3;

  if (Adjust == 0) {

    // With a zero adjust, the original alignment could be promoted with a

    // better one.

    LI.setAlignment(Align(4));

    return true;

  }


  IRBuilder<> IRB(&LI);

  IRB.SetCurrentDebugLocation(LI.getDebugLoc());


  unsigned LdBits = DL.getTypeStoreSizeInBits(LI.getType());

  auto *IntNTy = Type::getIntNTy(LI.getContext(), LdBits);


  auto *NewPtr = IRB.CreateConstGEP1_64(

      IRB.getInt8Ty(),

      IRB.CreateAddrSpaceCast(Base, LI.getPointerOperand()->getType()),

      Offset - Adjust);


  LoadInst *NewLd = IRB.CreateAlignedLoad(IRB.getInt32Ty(), NewPtr, Align(4));

  NewLd->copyMetadata(LI);

  NewLd->setMetadata(LLVMContext::MD_range, nullptr);


  unsigned ShAmt = Adjust * 8;

  Value *NewVal = IRB.CreateBitCast(

      IRB.CreateTrunc(IRB.CreateLShr(NewLd, ShAmt),

                      DL.typeSizeEqualsStoreSize(LI.getType()) ? IntNTy

                                                               : LI.getType()),

      LI.getType());

  LI.replaceAllUsesWith(NewVal);

  DeadInsts.emplace_back(&LI);


  return true;

}


PreservedAnalyses

AMDGPULateCodeGenPreparePass::run(Function &F, FunctionAnalysisManager &FAM) {

  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);


  AssumptionCache &AC = FAM.getResult<AssumptionAnalysis>(F);

  UniformityInfo &UI = FAM.getResult<UniformityInfoAnalysis>(F);


  bool Changed = AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();


  if (!Changed)

    return PreservedAnalyses::all();

  PreservedAnalyses PA = PreservedAnalyses::none();

  PA.preserveSet<CFGAnalyses>();

  return PA;

}


class AMDGPULateCodeGenPrepareLegacy : public FunctionPass {

public:

  static char ID;


  AMDGPULateCodeGenPrepareLegacy() : FunctionPass(ID) {}


  StringRef getPassName() const override {

    return "AMDGPU IR late optimizations";

  }


  void getAnalysisUsage(AnalysisUsage &AU) const override {

    AU.addRequired<TargetPassConfig>();

    AU.addRequired<AssumptionCacheTracker>();

    AU.addRequired<UniformityInfoWrapperPass>();

    AU.setPreservesAll();

  }


  bool runOnFunction(Function &F) override;

};


bool AMDGPULateCodeGenPrepareLegacy::runOnFunction(Function &F) {

  if (skipFunction(F))

    return false;


  const TargetPassConfig &TPC = getAnalysis<TargetPassConfig>();

  const TargetMachine &TM = TPC.getTM<TargetMachine>();

  const GCNSubtarget &ST = TM.getSubtarget<GCNSubtarget>(F);


  AssumptionCache &AC =

      getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);

  UniformityInfo &UI =

      getAnalysis<UniformityInfoWrapperPass>().getUniformityInfo();


  return AMDGPULateCodeGenPrepare(F, ST, &AC, UI).run();

}


INITIALIZE_PASS_BEGIN(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,

                      "AMDGPU IR late optimizations", false, false)

INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)

INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker)

INITIALIZE_PASS_DEPENDENCY(UniformityInfoWrapperPass)

INITIALIZE_PASS_END(AMDGPULateCodeGenPrepareLegacy, DEBUG_TYPE,

                    "AMDGPU IR late optimizations", false, false)


char AMDGPULateCodeGenPrepareLegacy::ID = 0;


FunctionPass *llvm::createAMDGPULateCodeGenPrepareLegacyPass() {

  return new AMDGPULateCodeGenPrepareLegacy();

}

late
aarch64 falkor hwpf fix late
Definition: AArch64FalkorHWPFFix.cpp:227

WidenLoads
static cl::opt< bool > WidenLoads("amdgpu-late-codegenprepare-widen-constant-loads", cl::desc("Widen sub-dword constant address space loads in " "AMDGPULateCodeGenPrepare"), cl::ReallyHidden, cl::init(true))

AMDGPUTargetMachine.h
The AMDGPU TargetMachine interface definition for hw codegen targets.

AMDGPU.h

DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: ARMSLSHardening.cpp:73

AssumptionCache.h

D
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")

CommandLine.h

runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:98

DEBUG_TYPE
#define DEBUG_TYPE
Definition: GenericCycleImpl.h:31

IRBuilder.h

InstVisitor.h

KnownBits.h

IR
Legalize the Machine IR a function s Machine IR
Definition: Legalizer.cpp:80

optimizations
Generic memory optimizations
Definition: LoadStoreOpt.cpp:54

F
#define F(x, y, z)
Definition: MD5.cpp:55

I
#define I(x, y, z)
Definition: MD5.cpp:58

II
uint64_t IntrinsicInst * II
Definition: NVVMIntrRange.cpp:51

Mod
if(auto Err=PB.parsePassPipeline(MPM, Passes)) return wrap(std MPM run * Mod
Definition: PassBuilderBindings.cpp:95

FAM
FunctionAnalysisManager FAM
Definition: PassBuilderBindings.cpp:61

INITIALIZE_PASS_DEPENDENCY
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55

INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:57

INITIALIZE_PASS_BEGIN
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52

Uses
Remove Loads Into Fake Uses
Definition: RemoveLoadsIntoFakeUses.cpp:74

assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())

visit
void visit(MachineFunction &MF, MachineBasicBlock &Start, std::function< void(MachineBasicBlock *)> op)
Definition: SPIRVPostLegalizer.cpp:132

TargetPassConfig.h
Target-Independent Code Generator Pass Configuration Options pass.

Local.h

UniformityAnalysis.h
LLVM IR instance of the generic uniformity analysis.

ValueTracking.h

AMDGPULateCodeGenPrepareLegacy
Definition: AMDGPULateCodeGenPrepare.cpp:494

AMDGPULateCodeGenPrepareLegacy::ID
static char ID
Definition: AMDGPULateCodeGenPrepare.cpp:496

AMDGPULateCodeGenPrepareLegacy::runOnFunction
bool runOnFunction(Function &F) override
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
Definition: AMDGPULateCodeGenPrepare.cpp:514

AMDGPULateCodeGenPrepareLegacy::getAnalysisUsage
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: AMDGPULateCodeGenPrepare.cpp:504

AMDGPULateCodeGenPrepareLegacy::AMDGPULateCodeGenPrepareLegacy
AMDGPULateCodeGenPrepareLegacy()
Definition: AMDGPULateCodeGenPrepare.cpp:498

AMDGPULateCodeGenPrepareLegacy::getPassName
StringRef getPassName() const override
getPassName - Return a nice clean name for a pass.
Definition: AMDGPULateCodeGenPrepare.cpp:500

VectorType
Definition: ItaniumDemangle.h:1173

llvm::AMDGPULateCodeGenPreparePass::run
PreservedAnalyses run(Function &, FunctionAnalysisManager &)
Definition: AMDGPULateCodeGenPrepare.cpp:479

llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: PassManager.h:253

llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:410

llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47

llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75

llvm::AnalysisUsage::setPreservesAll
void setPreservesAll()
Set by analyses that do not transform their input at all.
Definition: PassAnalysisSupport.h:130

llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173

llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:204

llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42

llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:61

llvm::BasicBlock::iterator
InstListType::iterator iterator
Instruction iterators...
Definition: BasicBlock.h:177

llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: Analysis.h:72

llvm::DWARFExpression::Operation
This class represents an Operation in the Expression.
Definition: DWARFExpression.h:32

llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63

llvm::DenseMapBase::lookup
ValueT lookup(const_arg_type_t< KeyT > Val) const
lookup - Return the entry for the specified key, or a default constructed value if no such entry exis...
Definition: DenseMap.h:194

llvm::DenseMapBase::erase
bool erase(const KeyT &Val)
Definition: DenseMap.h:321

llvm::DenseMapBase::contains
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:147

llvm::DenseMap
Definition: DenseMap.h:727

llvm::FixedVectorType
Class to represent fixed width SIMD vectors.
Definition: DerivedTypes.h:563

llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310

llvm::Function
Definition: Function.h:63

llvm::GCNSubtarget
Definition: GCNSubtarget.h:34

llvm::GenericUniformityInfo< SSAContext >

llvm::GenericUniformityInfo::isUniform
bool isUniform(ConstValueRefT V) const
Whether V is uniform/non-divergent.
Definition: GenericUniformityInfo.h:64

llvm::IRBuilder
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2705

llvm::InstVisitor
Base class for instruction visitors.
Definition: InstVisitor.h:78

llvm::InstVisitor::visitInstruction
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:283

llvm::InstVisitor::visitLoadInst
RetTy visitLoadInst(LoadInst &I)
Definition: InstVisitor.h:169

llvm::Instruction
Definition: Instruction.h:68

llvm::Instruction::getDebugLoc
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
Definition: Instruction.h:475

llvm::Instruction::setMetadata
void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
Definition: Metadata.cpp:1679

llvm::Instruction::copyMetadata
void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
Definition: Instruction.cpp:1331

llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311

llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:176

llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:261

llvm::LoadInst::setAlignment
void setAlignment(Align Align)
Definition: Instructions.h:215

llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:255

llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:247

llvm::LoadInst::getAlign
Align getAlign() const
Return the alignment of the access that is being performed.
Definition: Instructions.h:211

llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65

llvm::PHINode
Definition: Instructions.h:2600

llvm::PHINode::addIncoming
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
Definition: Instructions.h:2735

llvm::PHINode::Create
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
Definition: Instructions.h:2635

llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: Analysis.h:111

llvm::PreservedAnalyses::none
static PreservedAnalyses none()
Convenience factory function for the empty preserved set.
Definition: Analysis.h:114

llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: Analysis.h:117

llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: Analysis.h:146

llvm::SmallPtrSetImpl::count
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:452

llvm::SmallPtrSetImpl::insert
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:384

llvm::SmallPtrSetImpl::contains
bool contains(ConstPtrType Ptr) const
Definition: SmallPtrSet.h:458

llvm::SmallPtrSet
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:519

llvm::SmallVectorBase::empty
bool empty() const
Definition: SmallVector.h:81

llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573

llvm::SmallVectorImpl::pop_back_val
T pop_back_val()
Definition: SmallVector.h:673

llvm::SmallVectorImpl::emplace_back
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:937

llvm::SmallVectorTemplateBase::push_back
void push_back(const T &Elt)
Definition: SmallVector.h:413

llvm::SmallVectorTemplateCommon::end
iterator end()
Definition: SmallVector.h:269

llvm::SmallVectorTemplateCommon::begin
iterator begin()
Definition: SmallVector.h:267

llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196

llvm::StringRef
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51

llvm::TargetLoweringBase::TypeLegal
@ TypeLegal
Definition: TargetLowering.h:210

llvm::TargetLoweringBase::LegalizeKind
std::pair< LegalizeTypeAction, EVT > LegalizeKind
LegalizeKind holds the legalization kind that needs to happen to EVT in order to type-legalize it.
Definition: TargetLowering.h:231

llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77

llvm::TargetMachine::getSubtarget
const STC & getSubtarget(const Function &F) const
This method returns a pointer to the specified type of TargetSubtargetInfo.
Definition: TargetMachine.h:171

llvm::TargetPassConfig
Target-Independent Code Generator Pass Configuration Options.
Definition: TargetPassConfig.h:85

llvm::TargetPassConfig::getTM
TMC & getTM() const
Get the right type of TargetMachine for this target.
Definition: TargetPassConfig.h:160

llvm::TypeSize
Definition: TypeSize.h:334

llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45

llvm::Type::getIntNTy
static IntegerType * getIntNTy(LLVMContext &C, unsigned N)

llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.

llvm::Type::isAggregateType
bool isAggregateType() const
Return true if the type is an aggregate type.
Definition: Type.h:303

llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128

llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237

llvm::UniformityInfoAnalysis
Analysis pass which computes UniformityInfo.
Definition: UniformityAnalysis.h:29

llvm::UniformityInfoWrapperPass
Legacy analysis pass which computes a CycleInfo.
Definition: UniformityAnalysis.h:57

llvm::User
Definition: User.h:44

llvm::Value
LLVM Value Representation.
Definition: Value.h:74

llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255

llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:534

llvm::Value::users
iterator_range< user_iterator > users()
Definition: Value.h:421

llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:1075

llvm::VectorType::getElementCount
ElementCount getElementCount() const
Return an ElementCount instance to represent the (possibly scalable) number of elements in the vector...
Definition: DerivedTypes.h:665

llvm::VectorType::getElementType
Type * getElementType() const
Definition: DerivedTypes.h:460

llvm::cl::opt
Definition: CommandLine.h:1423

llvm::details::FixedOrScalableQuantity::getFixedValue
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202

llvm::ilist_detail::node_parent_access::getParent
const ParentTy * getParent() const
Definition: ilist_node.h:32

uint64_t

unsigned

false
Definition: StackSlotColoring.cpp:193

llvm::AMDGPUAS::CONSTANT_ADDRESS_32BIT
@ CONSTANT_ADDRESS_32BIT
Address space for 32-bit constant memory.
Definition: AMDGPUAddrSpace.h:38

llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPUAddrSpace.h:34

llvm::ARM_MB::ST
@ ST
Definition: ARMBaseInfo.h:73

llvm::M68k::MemAddrModeKind::U
@ U

llvm::M68k::MemAddrModeKind::V
@ V

llvm::cl::ReallyHidden
@ ReallyHidden
Definition: CommandLine.h:138

llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443

llvm::dxil::PointerTypeAnalysis::run
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
Definition: PointerTypeAnalysis.cpp:191

llvm::rdf::Phi
NodeAddr< PhiNode * > Phi
Definition: RDFGraph.h:390

llvm::sampleprof::Base
@ Base
Definition: Discriminator.h:58

llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18

llvm::Offset
@ Offset
Definition: DWP.cpp:480

llvm::enumerate
auto enumerate(FirstRange &&First, RestRanges &&...Rest)
Given two or more input ranges, returns a new range whose values are tuples (A, B,...
Definition: STLExtras.h:2448

llvm::GetPointerBaseWithConstantOffset
Value * GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, const DataLayout &DL, bool AllowNonInbounds=true)
Analyze the specified pointer to see if it can be expressed as a base pointer plus a constant offset.
Definition: ValueTracking.h:639

llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657

llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:420

llvm::computeKnownBits
void computeKnownBits(const Value *V, KnownBits &Known, const DataLayout &DL, unsigned Depth=0, AssumptionCache *AC=nullptr, const Instruction *CxtI=nullptr, const DominatorTree *DT=nullptr, bool UseInstrInfo=true)
Determine which bits of V are known to be either zero or one and return them in the KnownZero/KnownOn...
Definition: ValueTracking.cpp:164

llvm::Op
DWARFExpression::Operation Op
Definition: DWARFExpression.cpp:22

llvm::RecursivelyDeleteTriviallyDeadInstructionsPermissive
bool RecursivelyDeleteTriviallyDeadInstructionsPermissive(SmallVectorImpl< WeakTrackingVH > &DeadInsts, const TargetLibraryInfo *TLI=nullptr, MemorySSAUpdater *MSSAU=nullptr, std::function< void(Value *)> AboutToDeleteCallback=std::function< void(Value *)>())
Same functionality as RecursivelyDeleteTriviallyDeadInstructions, but allow instructions that are not...
Definition: Local.cpp:561

llvm::createAMDGPULateCodeGenPrepareLegacyPass
FunctionPass * createAMDGPULateCodeGenPrepareLegacyPass()
Definition: AMDGPULateCodeGenPrepare.cpp:540

llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39

llvm::EVT::getEVT
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:289

llvm::KnownBits
Definition: KnownBits.h:23

llvm::KnownBits::countMinTrailingZeros
unsigned countMinTrailingZeros() const
Returns the minimum number of trailing zero bits.
Definition: KnownBits.h:234

llvm::cl::desc
Definition: CommandLine.h:409