27 #define DEBUG_TYPE "scalarizer"
36 typedef std::map<Value *, ValueVector> ScatterMap;
52 ValueVector *cachePtr =
nullptr);
55 Value *operator[](
unsigned I);
58 unsigned size()
const {
return Size; }
64 ValueVector *CachePtr;
73 FCmpSplitter(
FCmpInst &fci) : FCI(fci) {}
84 ICmpSplitter(
ICmpInst &ici) : ICI(ici) {}
94 struct BinarySplitter {
105 VectorLayout() : VecTy(
nullptr), ElemTy(
nullptr), VecAlign(0), ElemSize(0) {}
108 uint64_t getElemAlign(
unsigned I) {
109 return MinAlign(VecAlign, I * ElemSize);
135 bool doInitialization(
Module &M)
override;
136 bool runOnFunction(
Function &
F)
override;
140 bool visitInstruction(
Instruction &) {
return false; }
154 static void registerOptions() {
159 &Scalarizer::ScalarizeLoadStore>(
160 "scalarize-load-store",
161 "Allow the scalarizer pass to scalarize loads and store",
false);
167 bool canTransferMetadata(
unsigned Kind);
168 void transferMetadata(
Instruction *,
const ValueVector &);
172 template<
typename T>
bool splitBinary(
Instruction &,
const T &);
176 ScatterMap Scattered;
178 unsigned ParallelLoopAccessMDKind;
179 bool ScalarizeLoadStore;
186 "Scalarize vector operations",
false,
false)
189 ValueVector *cachePtr)
190 : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
191 Type *Ty = V->getType();
194 Ty = PtrTy->getElementType();
197 Tmp.resize(Size,
nullptr);
198 else if (CachePtr->empty())
199 CachePtr->resize(Size,
nullptr);
201 assert(Size == CachePtr->size() &&
"Inconsistent vector sizes");
205 Value *Scatterer::operator[](
unsigned I) {
206 ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
215 PtrTy->getAddressSpace());
216 CV[0] = Builder.CreateBitCast(V, Ty, V->getName() +
".i0");
219 CV[
I] = Builder.CreateConstGEP1_32(
nullptr, CV[0], I,
220 V->getName() +
".i" +
Twine(I));
244 CV[
I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
245 V->getName() +
".i" +
Twine(I));
250 bool Scalarizer::doInitialization(
Module &M) {
251 ParallelLoopAccessMDKind =
258 bool Scalarizer::runOnFunction(
Function &
F) {
261 assert(Gathered.empty() && Scattered.empty());
265 bool Done = visit(I);
277 if (
Argument *VArg = dyn_cast<Argument>(V)) {
282 return Scatterer(BB, BB->
begin(), V, &Scattered[V]);
300 void Scalarizer::gather(
Instruction *
Op,
const ValueVector &CV) {
306 transferMetadata(Op, CV);
310 ValueVector &SV = Scattered[
Op];
312 for (
unsigned I = 0,
E = SV.size(); I !=
E; ++
I) {
324 Gathered.
push_back(GatherList::value_type(Op, &SV));
329 bool Scalarizer::canTransferMetadata(
unsigned Tag) {
336 || Tag == ParallelLoopAccessMDKind);
341 void Scalarizer::transferMetadata(
Instruction *Op,
const ValueVector &CV) {
344 for (
unsigned I = 0,
E = CV.size(); I !=
E; ++
I) {
345 if (
Instruction *New = dyn_cast<Instruction>(CV[I])) {
346 for (
const auto &MD : MDs)
347 if (canTransferMetadata(MD.first))
348 New->setMetadata(MD.first, MD.second);
357 bool Scalarizer::getVectorLayout(
Type *Ty,
unsigned Alignment,
365 Layout.ElemTy = Layout.VecTy->getElementType();
371 Layout.VecAlign = Alignment;
380 template<
typename Splitter>
390 assert(Op0.size() == NumElems &&
"Mismatched binary operation");
391 assert(Op1.size() == NumElems &&
"Mismatched binary operation");
393 Res.resize(NumElems);
394 for (
unsigned Elem = 0; Elem < NumElems; ++Elem)
395 Res[Elem] =
Split(Builder, Op0[Elem], Op1[Elem],
414 bool Scalarizer::splitCall(
CallInst &CI) {
430 ValueVector ScalarOperands(NumArgs);
433 Scattered.resize(NumArgs);
437 for (
unsigned I = 0; I != NumArgs; ++
I) {
440 Scattered[
I] = scatter(&CI, OpI);
441 assert(Scattered[I].size() == NumElems &&
"mismatched call operands");
443 ScalarOperands[
I] = OpI;
447 ValueVector Res(NumElems);
448 ValueVector ScalarCallOps(NumArgs);
454 for (
unsigned Elem = 0; Elem < NumElems; ++Elem) {
455 ScalarCallOps.clear();
457 for (
unsigned J = 0; J != NumArgs; ++J) {
459 ScalarCallOps.push_back(ScalarOperands[J]);
461 ScalarCallOps.push_back(Scattered[J][Elem]);
464 Res[Elem] = Builder.CreateCall(NewIntrin, ScalarCallOps,
472 bool Scalarizer::visitSelectInst(
SelectInst &SI) {
479 Scatterer Op1 = scatter(&SI, SI.
getOperand(1));
480 Scatterer Op2 = scatter(&SI, SI.
getOperand(2));
481 assert(Op1.size() == NumElems &&
"Mismatched select");
482 assert(Op2.size() == NumElems &&
"Mismatched select");
484 Res.resize(NumElems);
487 Scatterer Op0 = scatter(&SI, SI.
getOperand(0));
488 assert(Op0.size() == NumElems &&
"Mismatched select");
489 for (
unsigned I = 0; I < NumElems; ++
I)
490 Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
494 for (
unsigned I = 0; I < NumElems; ++
I)
495 Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
502 bool Scalarizer::visitICmpInst(
ICmpInst &ICI) {
503 return splitBinary(ICI, ICmpSplitter(ICI));
506 bool Scalarizer::visitFCmpInst(
FCmpInst &FCI) {
507 return splitBinary(FCI, FCmpSplitter(FCI));
511 return splitBinary(BO, BinarySplitter(BO));
523 Scatterer Base = scatter(&GEPI, GEPI.
getOperand(0));
527 for (
unsigned I = 0; I < NumIndices; ++
I)
528 Ops[I] = scatter(&GEPI, GEPI.
getOperand(I + 1));
531 Res.resize(NumElems);
532 for (
unsigned I = 0; I < NumElems; ++
I) {
534 Indices.
resize(NumIndices);
535 for (
unsigned J = 0; J < NumIndices; ++J)
536 Indices[J] = Ops[J][I];
541 NewGEPI->setIsInBounds();
547 bool Scalarizer::visitCastInst(
CastInst &CI) {
554 Scatterer Op0 = scatter(&CI, CI.
getOperand(0));
555 assert(Op0.size() == NumElems &&
"Mismatched cast");
557 Res.resize(NumElems);
558 for (
unsigned I = 0; I < NumElems; ++
I)
565 bool Scalarizer::visitBitCastInst(
BitCastInst &BCI) {
568 if (!DstVT || !SrcVT)
572 unsigned SrcNumElems = SrcVT->getNumElements();
574 Scatterer Op0 = scatter(&BCI, BCI.
getOperand(0));
576 Res.resize(DstNumElems);
578 if (DstNumElems == SrcNumElems) {
579 for (
unsigned I = 0; I < DstNumElems; ++
I)
582 }
else if (DstNumElems > SrcNumElems) {
585 unsigned FanOut = DstNumElems / SrcNumElems;
588 for (
unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
589 Value *V = Op0[Op0I];
593 while ((VI = dyn_cast<Instruction>(V)) &&
596 V = Builder.CreateBitCast(V, MidTy, V->
getName() +
".cast");
597 Scatterer Mid = scatter(&BCI, V);
598 for (
unsigned MidI = 0; MidI < FanOut; ++MidI)
599 Res[ResI++] = Mid[MidI];
603 unsigned FanIn = SrcNumElems / DstNumElems;
606 for (
unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
608 for (
unsigned MidI = 0; MidI < FanIn; ++MidI)
609 V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
611 +
".upto" +
Twine(MidI));
626 Scatterer Op0 = scatter(&SVI, SVI.
getOperand(0));
627 Scatterer Op1 = scatter(&SVI, SVI.
getOperand(1));
629 Res.resize(NumElems);
631 for (
unsigned I = 0; I < NumElems; ++
I) {
635 else if (
unsigned(Selector) < Op0.size())
636 Res[I] = Op0[Selector];
638 Res[
I] = Op1[Selector - Op0.size()];
644 bool Scalarizer::visitPHINode(
PHINode &PHI) {
652 Res.resize(NumElems);
655 for (
unsigned I = 0; I < NumElems; ++
I)
659 for (
unsigned I = 0; I < NumOps; ++
I) {
662 for (
unsigned J = 0; J < NumElems; ++J)
663 cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
669 bool Scalarizer::visitLoadInst(
LoadInst &LI) {
670 if (!ScalarizeLoadStore)
680 unsigned NumElems = Layout.VecTy->getNumElements();
684 Res.resize(NumElems);
686 for (
unsigned I = 0; I < NumElems; ++
I)
687 Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
693 bool Scalarizer::visitStoreInst(
StoreInst &SI) {
694 if (!ScalarizeLoadStore)
705 unsigned NumElems = Layout.VecTy->getNumElements();
708 Scatterer Val = scatter(&SI, FullValue);
711 Stores.resize(NumElems);
712 for (
unsigned I = 0; I < NumElems; ++
I) {
713 unsigned Align = Layout.getElemAlign(I);
714 Stores[
I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
716 transferMetadata(&SI, Stores);
720 bool Scalarizer::visitCallInst(
CallInst &CI) {
721 return splitCall(CI);
726 bool Scalarizer::finish() {
729 if (Gathered.empty() && Scattered.empty())
731 for (
const auto &GMI : Gathered) {
733 ValueVector &CV = *GMI.second;
742 if (isa<PHINode>(Op))
744 for (
unsigned I = 0; I < Count; ++
I)
745 Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
758 return new Scalarizer();
Value * getValueOperand()
SymbolTableList< Instruction >::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
A parsed version of the target data layout string in and methods for querying it. ...
Instruction::CastOps getOpcode() const
Return the opcode of this CastInst.
Value * CreateICmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="")
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Value * CreateBinOp(Instruction::BinaryOps Opc, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
LLVM Argument representation.
Base class for instruction visitors.
Type * getSourceElementType() const
A Module instance is used to store all the information related to an LLVM module. ...
unsigned getNumOperands() const
This class represents a function call, abstracting a target machine's calling convention.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
This instruction constructs a fixed permutation of two input vectors.
const Function * getParent() const
Return the enclosing method, or null if none.
An instruction for reading from memory.
unsigned getNumIndices() const
StringRef getName() const
Return a constant reference to the value's name.
iterator begin()
Instruction iterator methods.
This class represents the LLVM 'select' instruction.
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
This is the base class for all instructions that perform data casts.
unsigned getNumArgOperands() const
Return the number of call arguments.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
This instruction compares its operands according to the predicate given to the constructor.
This class represents a no-op cast from one type to another.
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
An instruction for storing to memory.
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
void takeName(Value *V)
Transfer the name from V to this value.
Type * getScalarType() const LLVM_READONLY
If this is a vector type, return the element type, otherwise return 'this'.
Type * getElementType() const
bool isInBounds() const
Determine whether the GEP has the inbounds flag.
Class to represent pointers.
static GCRegistry::Add< CoreCLRGC > E("coreclr","CoreCLR-compatible GC")
Value * CreateFCmp(CmpInst::Predicate P, Value *LHS, Value *RHS, const Twine &Name="", MDNode *FPMathTag=nullptr)
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
MinAlign - A and B are either alignments or offsets.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs ...
This instruction inserts a single (scalar) element into a VectorType value.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
LLVM Basic Block Representation.
The instances of the Type class are immutable: once they are created, they are never changed...
static Function * getScalarIntrinsicDeclaration(Module *M, Intrinsic::ID ID, VectorType *Ty)
void getAllMetadataOtherThanDebugLoc(SmallVectorImpl< std::pair< unsigned, MDNode * >> &MDs) const
This does the same thing as getAllMetadata, except that it filters out the debug location.
uint64_t getTypeStoreSizeInBits(Type *Ty) const
Returns the maximum number of bits that may be overwritten by storing the specified type; always a mu...
bool isVectorTy() const
True if this is an instance of VectorType.
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
static bool isTriviallyScalariable(Intrinsic::ID ID)
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
This instruction compares its operands according to the predicate given to the constructor.
ValT getOption() const
Query for a debug option's value.
uint64_t getNumElements() const
FunctionPass class - This class is used to implement most global optimizations.
unsigned getMDKindID(StringRef Name) const
getMDKindID - Return a unique non-zero ID for the specified metadata kind.
Value * getOperand(unsigned i) const
Value * getPointerOperand()
self_iterator getIterator()
Type * getSrcTy() const
Return the source type, as a convenience.
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
INITIALIZE_PASS_WITH_OPTIONS(Scalarizer,"scalarizer","Scalarize vector operations", false, false) Scatterer
unsigned getABITypeAlignment(Type *Ty) const
Returns the minimum ABI-required alignment for the specified type.
Iterator for intrusive lists based on ilist_node.
This is the shared class of boolean and integer constants.
Value * getIncomingValue(unsigned i) const
Return incoming value number x.
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Type * getType() const
All values are typed, get the type of this value.
void initializeScalarizerPass(PassRegistry &)
Type * getDestTy() const
Return the destination type, as a convenience.
Function * getCalledFunction() const
Return the function called, or null if this is an indirect function invocation.
const BasicBlock & getEntryBlock() const
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx)
Identifies if the intrinsic has a scalar operand.
static void registerOption(StringRef ArgStr, StringRef Desc, const ValT &InitValue)
Registers an option with the OptionRegistry singleton.
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
void setOperand(unsigned i, Value *Val)
Class to represent vector types.
void push_back(pointer val)
unsigned getVectorNumElements() const
static int getMaskValue(Constant *Mask, unsigned Elt)
Return the shuffle mask value for the specified element of the mask.
const DataLayout & getDataLayout() const
Get the data layout for the module's target platform.
unsigned getAlignment() const
Return the alignment of the access that is being performed.
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
VectorType * getType() const
Overload to return most specific vector type.
uint64_t getTypeStoreSize(Type *Ty) const
Returns the maximum number of bytes that may be overwritten by storing the specified type...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Module * getParent()
Get the module that this global value is contained inside of...
LLVM Value Representation.
unsigned getOpcode() const
Returns a member of one of the enums like Instruction::Add.
static VectorType * get(Type *ElementType, unsigned NumElements)
This static method is the primary way to construct an VectorType.
uint64_t getTypeSizeInBits(Type *Ty) const
Size examples:
FunctionPass * createScalarizerPass()
iterator getFirstInsertionPt()
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
static void Split(std::vector< std::string > &V, StringRef S)
Split - Splits a string of comma separated items in to a vector of strings.
Value * getPointerOperand()
const BasicBlock * getParent() const
LLVMContext & getContext() const
Get the global data context.
bool isVoidTy() const
Return true if this is 'void'.
bool isTriviallyVectorizable(Intrinsic::ID ID)
Identify if the intrinsic is trivially vectorizable.