27#define DEBUG_TYPE "x86-fixup-vector-constants"
29STATISTIC(NumInstChanges,
"Number of instructions changes");
39 return "X86 Fixup Vector Constants";
49 MachineFunctionProperties::Property::NoVRegs);
59char X86FixupVectorConstantsPass::ID = 0;
64 return new X86FixupVectorConstantsPass();
74 if (isa<UndefValue>(OpC))
86 unsigned NumBits =
C->getType()->getPrimitiveSizeInBits();
88 if (isa<UndefValue>(
C))
91 if (
auto *CInt = dyn_cast<ConstantInt>(
C))
92 return CInt->getValue();
94 if (
auto *CFP = dyn_cast<ConstantFP>(
C))
95 return CFP->getValue().bitcastToAPInt();
97 if (
auto *CV = dyn_cast<ConstantVector>(
C)) {
100 assert((NumBits % Bits->getBitWidth()) == 0 &&
"Illegal splat");
106 for (
unsigned I = 0, E = CV->getNumOperands();
I != E; ++
I) {
111 assert(NumBits == (E * SubBits->getBitWidth()) &&
112 "Illegal vector element size");
113 Bits.insertBits(*SubBits,
I * SubBits->getBitWidth());
118 if (
auto *CDS = dyn_cast<ConstantDataSequential>(
C)) {
119 bool IsInteger = CDS->getElementType()->isIntegerTy();
120 bool IsFloat = CDS->getElementType()->isHalfTy() ||
121 CDS->getElementType()->isBFloatTy() ||
122 CDS->getElementType()->isFloatTy() ||
123 CDS->getElementType()->isDoubleTy();
124 if (IsInteger || IsFloat) {
126 unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
127 for (
unsigned I = 0, E = CDS->getNumElements();
I != E; ++
I) {
129 Bits.insertBits(CDS->getElementAsAPInt(
I),
I * EltBits);
131 Bits.insertBits(CDS->getElementAsAPFloat(
I).bitcastToAPInt(),
144 return Bits->zextOrTrunc(NumBits);
151 unsigned SplatBitWidth) {
152 const Type *Ty =
C->getType();
154 "Illegal splat width");
157 if (Bits->isSplat(SplatBitWidth))
158 return Bits->trunc(SplatBitWidth);
162 if (
auto *CV = dyn_cast<ConstantVector>(
C)) {
163 unsigned NumOps = CV->getNumOperands();
165 unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
166 if ((SplatBitWidth % NumEltsBits) == 0) {
170 for (
unsigned Idx = 0;
Idx != NumOps; ++
Idx) {
172 if (isa<UndefValue>(Elt))
174 unsigned SplatIdx =
Idx % NumScaleOps;
175 if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
176 Sequence[SplatIdx] = Elt;
185 for (
unsigned I = 0;
I != NumScaleOps; ++
I) {
189 SplatBits.
insertBits(*Bits,
I * Bits->getBitWidth());
204 const APInt &Bits,
unsigned NumSclBits) {
205 unsigned BitWidth = Bits.getBitWidth();
207 if (NumSclBits == 8) {
210 RawBits.
push_back(Bits.extractBits(8,
I).getZExtValue());
214 if (NumSclBits == 16) {
217 RawBits.
push_back(Bits.extractBits(16,
I).getZExtValue());
223 if (NumSclBits == 32) {
226 RawBits.
push_back(Bits.extractBits(32,
I).getZExtValue());
232 assert(NumSclBits == 64 &&
"Unhandled vector element width");
236 RawBits.
push_back(Bits.extractBits(64,
I).getZExtValue());
245 unsigned ,
unsigned SplatBitWidth) {
253 Type *SclTy =
C->getType()->getScalarType();
255 NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
258 NumSclBits = (NumSclBits == 8 || NumSclBits == 16 || NumSclBits == 32)
268 unsigned ScalarBitWidth) {
269 Type *SclTy =
C->getType()->getScalarType();
273 if (NumBits > ScalarBitWidth) {
276 if (Bits->countLeadingZeros() >= (NumBits - ScalarBitWidth)) {
279 if (ScalarBitWidth > NumSclBits && (ScalarBitWidth % NumSclBits) == 0)
283 APInt RawBits = Bits->zextOrTrunc(ScalarBitWidth);
284 return ConstantInt::get(Ctx, RawBits);
293 unsigned NumBits,
unsigned NumElts,
294 unsigned SrcEltBitWidth) {
295 unsigned DstEltBitWidth = NumBits / NumElts;
296 assert((NumBits % NumElts) == 0 && (NumBits % SrcEltBitWidth) == 0 &&
297 (DstEltBitWidth % SrcEltBitWidth) == 0 &&
298 (DstEltBitWidth > SrcEltBitWidth) &&
"Illegal extension width");
301 assert((Bits->getBitWidth() / DstEltBitWidth) == NumElts &&
302 (Bits->getBitWidth() % DstEltBitWidth) == 0 &&
303 "Unexpected constant extension");
307 for (
unsigned I = 0;
I != NumElts; ++
I) {
308 APInt Elt = Bits->extractBits(DstEltBitWidth,
I * DstEltBitWidth);
315 Type *Ty =
C->getType();
323 unsigned NumElts,
unsigned SrcEltBitWidth) {
327 unsigned NumElts,
unsigned SrcEltBitWidth) {
331bool X86FixupVectorConstantsPass::processInstruction(
MachineFunction &MF,
334 unsigned Opc =
MI.getOpcode();
336 bool HasSSE41 =
ST->hasSSE41();
337 bool HasAVX2 =
ST->hasAVX2();
338 bool HasDQI =
ST->hasDQI();
339 bool HasBWI =
ST->hasBWI();
340 bool HasVLX =
ST->hasVLX();
350 unsigned OperandNo) {
351#ifdef EXPENSIVE_CHECKS
353 [](
const FixupEntry &
A,
const FixupEntry &
B) {
354 return (
A.NumCstElts *
A.MemBitWidth) <
355 (
B.NumCstElts *
B.MemBitWidth);
357 "Constant fixup table not sorted in ascending constant size");
360 "Unexpected number of operands!");
363 RegBitWidth ? RegBitWidth :
C->getType()->getPrimitiveSizeInBits();
364 for (
const FixupEntry &
Fixup : Fixups) {
369 C, RegBitWidth,
Fixup.NumCstElts,
Fixup.MemBitWidth)) {
371 CP->getConstantPoolIndex(NewCst,
Align(
Fixup.MemBitWidth / 8));
410 case X86::VMOVAPDYrm:
411 case X86::VMOVAPSYrm:
412 case X86::VMOVUPDYrm:
413 case X86::VMOVUPSYrm:
418 case X86::VMOVAPDZ128rm:
419 case X86::VMOVAPSZ128rm:
420 case X86::VMOVUPDZ128rm:
421 case X86::VMOVUPSZ128rm:
427 case X86::VMOVAPDZ256rm:
428 case X86::VMOVAPSZ256rm:
429 case X86::VMOVUPDZ256rm:
430 case X86::VMOVUPSZ256rm:
431 return FixupConstant(
436 case X86::VMOVAPDZrm:
437 case X86::VMOVAPSZrm:
438 case X86::VMOVUPDZrm:
439 case X86::VMOVUPSZrm:
447 case X86::MOVDQUrm: {
463 return FixupConstant(Fixups, 128, 1);
466 case X86::VMOVDQUrm: {
473 {HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 1, 32,
480 {HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 1, 64,
488 return FixupConstant(Fixups, 128, 1);
490 case X86::VMOVDQAYrm:
491 case X86::VMOVDQUYrm: {
495 {HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 1, 32,
499 {HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 1, 64,
505 {HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 1, 128,
513 return FixupConstant(Fixups, 256, 1);
515 case X86::VMOVDQA32Z128rm:
516 case X86::VMOVDQA64Z128rm:
517 case X86::VMOVDQU32Z128rm:
518 case X86::VMOVDQU64Z128rm: {
538 return FixupConstant(Fixups, 128, 1);
540 case X86::VMOVDQA32Z256rm:
541 case X86::VMOVDQA64Z256rm:
542 case X86::VMOVDQU32Z256rm:
543 case X86::VMOVDQU64Z256rm: {
562 return FixupConstant(Fixups, 256, 1);
564 case X86::VMOVDQA32Zrm:
565 case X86::VMOVDQA64Zrm:
566 case X86::VMOVDQU32Zrm:
567 case X86::VMOVDQU64Zrm: {
587 return FixupConstant(Fixups, 512, 1);
591 auto ConvertToBroadcastAVX512 = [&](
unsigned OpSrc32,
unsigned OpSrc64) {
592 unsigned OpBcst32 = 0, OpBcst64 = 0;
593 unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
597 OpBcst32 = Mem2Bcst->DstOp;
604 OpBcst64 = Mem2Bcst->DstOp;
608 assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
609 "OperandNo mismatch");
611 if (OpBcst32 || OpBcst64) {
612 unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
617 return FixupConstant(Fixups, 0, OpNo);
625 return ConvertToBroadcastAVX512(Opc, Opc);
629 if (HasVLX && !HasDQI) {
630 unsigned OpSrc32 = 0, OpSrc64 = 0;
635 OpSrc32 = X86 ::VPANDDZ128rm;
636 OpSrc64 = X86 ::VPANDQZ128rm;
641 OpSrc32 = X86 ::VPANDDZ256rm;
642 OpSrc64 = X86 ::VPANDQZ256rm;
647 OpSrc32 = X86 ::VPANDNDZ128rm;
648 OpSrc64 = X86 ::VPANDNQZ128rm;
650 case X86::VANDNPDYrm:
651 case X86::VANDNPSYrm:
653 OpSrc32 = X86 ::VPANDNDZ256rm;
654 OpSrc64 = X86 ::VPANDNQZ256rm;
659 OpSrc32 = X86 ::VPORDZ128rm;
660 OpSrc64 = X86 ::VPORQZ128rm;
665 OpSrc32 = X86 ::VPORDZ256rm;
666 OpSrc64 = X86 ::VPORQZ256rm;
671 OpSrc32 = X86 ::VPXORDZ128rm;
672 OpSrc64 = X86 ::VPXORQZ128rm;
677 OpSrc32 = X86 ::VPXORDZ256rm;
678 OpSrc64 = X86 ::VPXORQZ256rm;
681 if (OpSrc32 || OpSrc64)
682 return ConvertToBroadcastAVX512(OpSrc32, OpSrc64);
688bool X86FixupVectorConstantsPass::runOnMachineFunction(
MachineFunction &MF) {
690 bool Changed =
false;
692 TII =
ST->getInstrInfo();
693 SM = &
ST->getSchedModel();
697 if (processInstruction(MF,
MBB,
MI)) {
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static Constant * rebuildSplatCst(const Constant *C, unsigned, unsigned, unsigned SplatBitWidth)
static std::optional< APInt > getSplatableConstant(const Constant *C, unsigned SplatBitWidth)
static Constant * rebuildZExtCst(const Constant *C, unsigned NumBits, unsigned NumElts, unsigned SrcEltBitWidth)
static std::optional< APInt > extractConstantBits(const Constant *C)
static Constant * getSplatValueAllowUndef(const ConstantVector *C)
Normally, we only allow poison in vector splats.
static Constant * rebuildExtCst(const Constant *C, bool IsSExt, unsigned NumBits, unsigned NumElts, unsigned SrcEltBitWidth)
static Constant * rebuildZeroUpperCst(const Constant *C, unsigned NumBits, unsigned, unsigned ScalarBitWidth)
static Constant * rebuildSExtCst(const Constant *C, unsigned NumBits, unsigned NumElts, unsigned SrcEltBitWidth)
static Constant * rebuildConstant(LLVMContext &Ctx, Type *SclTy, const APInt &Bits, unsigned NumSclBits)
Class for arbitrary precision integers.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
static Constant * getFP(Type *ElementType, ArrayRef< uint16_t > Elts)
getFP() constructors - Return a constant of vector type with a float element type taken from argument...
Constant Vector Declarations.
This is an important base class in LLVM.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
This is an important class for using LLVM in a threaded context.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool is16bitFPTy() const
Return true if this is a 16-bit float type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
This is an optimization pass for GlobalISel generic memory operations.
const X86FoldTableEntry * lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
FunctionPass * createX86FixupVectorConstants()
Return a pass that reduces the size of vector constant pool loads.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Machine model for scheduling, bundling, and heuristics.