27#define DEBUG_TYPE "x86-fixup-vector-constants"
29STATISTIC(NumInstChanges,
"Number of instructions changes");
39 return "X86 Fixup Vector Constants";
49 MachineFunctionProperties::Property::NoVRegs);
59char X86FixupVectorConstantsPass::ID = 0;
64 return new X86FixupVectorConstantsPass();
74 if (isa<UndefValue>(OpC))
86 unsigned NumBits =
C->getType()->getPrimitiveSizeInBits();
88 if (isa<UndefValue>(
C))
91 if (
auto *CInt = dyn_cast<ConstantInt>(
C))
92 return CInt->getValue();
94 if (
auto *CFP = dyn_cast<ConstantFP>(
C))
95 return CFP->getValue().bitcastToAPInt();
97 if (
auto *CV = dyn_cast<ConstantVector>(
C)) {
100 assert((NumBits % Bits->getBitWidth()) == 0 &&
"Illegal splat");
106 for (
unsigned I = 0, E = CV->getNumOperands();
I != E; ++
I) {
111 assert(NumBits == (E * SubBits->getBitWidth()) &&
112 "Illegal vector element size");
113 Bits.insertBits(*SubBits,
I * SubBits->getBitWidth());
118 if (
auto *CDS = dyn_cast<ConstantDataSequential>(
C)) {
119 bool IsInteger = CDS->getElementType()->isIntegerTy();
120 bool IsFloat = CDS->getElementType()->isHalfTy() ||
121 CDS->getElementType()->isBFloatTy() ||
122 CDS->getElementType()->isFloatTy() ||
123 CDS->getElementType()->isDoubleTy();
124 if (IsInteger || IsFloat) {
126 unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
127 for (
unsigned I = 0, E = CDS->getNumElements();
I != E; ++
I) {
129 Bits.insertBits(CDS->getElementAsAPInt(
I),
I * EltBits);
131 Bits.insertBits(CDS->getElementAsAPFloat(
I).bitcastToAPInt(),
144 return Bits->zextOrTrunc(NumBits);
151 unsigned SplatBitWidth) {
152 const Type *Ty =
C->getType();
154 "Illegal splat width");
157 if (Bits->isSplat(SplatBitWidth))
158 return Bits->trunc(SplatBitWidth);
162 if (
auto *CV = dyn_cast<ConstantVector>(
C)) {
163 unsigned NumOps = CV->getNumOperands();
165 unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
166 if ((SplatBitWidth % NumEltsBits) == 0) {
170 for (
unsigned Idx = 0;
Idx != NumOps; ++
Idx) {
172 if (isa<UndefValue>(Elt))
174 unsigned SplatIdx =
Idx % NumScaleOps;
175 if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
176 Sequence[SplatIdx] = Elt;
185 for (
unsigned I = 0;
I != NumScaleOps; ++
I) {
189 SplatBits.
insertBits(*Bits,
I * Bits->getBitWidth());
204 const APInt &Bits,
unsigned NumSclBits) {
205 unsigned BitWidth = Bits.getBitWidth();
207 if (NumSclBits == 8) {
210 RawBits.
push_back(Bits.extractBits(8,
I).getZExtValue());
214 if (NumSclBits == 16) {
217 RawBits.
push_back(Bits.extractBits(16,
I).getZExtValue());
223 if (NumSclBits == 32) {
226 RawBits.
push_back(Bits.extractBits(32,
I).getZExtValue());
232 assert(NumSclBits == 64 &&
"Unhandled vector element width");
236 RawBits.
push_back(Bits.extractBits(64,
I).getZExtValue());
245 unsigned ,
unsigned SplatBitWidth) {
253 Type *SclTy =
C->getType()->getScalarType();
255 NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
258 NumSclBits = (NumSclBits == 8 || NumSclBits == 16 || NumSclBits == 32)
268 unsigned ScalarBitWidth) {
269 Type *SclTy =
C->getType()->getScalarType();
273 if (NumBits > ScalarBitWidth) {
276 if (Bits->countLeadingZeros() >= (NumBits - ScalarBitWidth)) {
279 if (ScalarBitWidth > NumSclBits && (ScalarBitWidth % NumSclBits) == 0)
283 APInt RawBits = Bits->zextOrTrunc(ScalarBitWidth);
284 return ConstantInt::get(Ctx, RawBits);
293 unsigned NumBits,
unsigned NumElts,
294 unsigned SrcEltBitWidth) {
295 unsigned DstEltBitWidth = NumBits / NumElts;
296 assert((NumBits % NumElts) == 0 && (NumBits % SrcEltBitWidth) == 0 &&
297 (DstEltBitWidth % SrcEltBitWidth) == 0 &&
298 (DstEltBitWidth > SrcEltBitWidth) &&
"Illegal extension width");
301 assert((Bits->getBitWidth() / DstEltBitWidth) == NumElts &&
302 (Bits->getBitWidth() % DstEltBitWidth) == 0 &&
303 "Unexpected constant extension");
307 for (
unsigned I = 0;
I != NumElts; ++
I) {
308 APInt Elt = Bits->extractBits(DstEltBitWidth,
I * DstEltBitWidth);
315 Type *Ty =
C->getType();
323 unsigned NumElts,
unsigned SrcEltBitWidth) {
327 unsigned NumElts,
unsigned SrcEltBitWidth) {
331bool X86FixupVectorConstantsPass::processInstruction(
MachineFunction &MF,
334 unsigned Opc =
MI.getOpcode();
336 bool HasSSE41 =
ST->hasSSE41();
337 bool HasAVX2 =
ST->hasAVX2();
338 bool HasDQI =
ST->hasDQI();
339 bool HasBWI =
ST->hasBWI();
340 bool HasVLX =
ST->hasVLX();
341 bool MultiDomain =
ST->hasAVX512() ||
ST->hasNoDomainDelayMov();
351 unsigned OperandNo) {
352#ifdef EXPENSIVE_CHECKS
354 [](
const FixupEntry &
A,
const FixupEntry &
B) {
355 return (
A.NumCstElts *
A.MemBitWidth) <
356 (
B.NumCstElts *
B.MemBitWidth);
358 "Constant fixup table not sorted in ascending constant size");
361 "Unexpected number of operands!");
364 RegBitWidth ? RegBitWidth :
C->getType()->getPrimitiveSizeInBits();
365 for (
const FixupEntry &
Fixup : Fixups) {
370 C, RegBitWidth,
Fixup.NumCstElts,
Fixup.MemBitWidth)) {
372 CP->getConstantPoolIndex(NewCst,
Align(
Fixup.MemBitWidth / 8));
405 case X86::VMOVUPSrm: {
421 return FixupConstant(Fixups, 128, 1);
423 case X86::VMOVAPDYrm:
424 case X86::VMOVAPSYrm:
425 case X86::VMOVUPDYrm:
426 case X86::VMOVUPSYrm: {
429 {HasAVX2 && MultiDomain ? X86::VPMOVSXBQYrm : 0, 4, 8,
rebuildSExtCst},
430 {HasAVX2 && MultiDomain ? X86::VPMOVZXBQYrm : 0, 4, 8,
rebuildZExtCst},
432 {HasAVX2 && MultiDomain ? X86::VPMOVSXBDYrm : 0, 8, 8,
rebuildSExtCst},
433 {HasAVX2 && MultiDomain ? X86::VPMOVZXBDYrm : 0, 8, 8,
rebuildZExtCst},
434 {HasAVX2 && MultiDomain ? X86::VPMOVSXWQYrm : 0, 4, 16,
rebuildSExtCst},
435 {HasAVX2 && MultiDomain ? X86::VPMOVZXWQYrm : 0, 4, 16,
rebuildZExtCst},
437 {HasAVX2 && MultiDomain ? X86::VPMOVSXWDYrm : 0, 8, 16,
rebuildSExtCst},
438 {HasAVX2 && MultiDomain ? X86::VPMOVZXWDYrm : 0, 8, 16,
rebuildZExtCst},
439 {HasAVX2 && MultiDomain ? X86::VPMOVSXDQYrm : 0, 4, 32,
rebuildSExtCst},
440 {HasAVX2 && MultiDomain ? X86::VPMOVZXDQYrm : 0, 4, 32,
442 return FixupConstant(Fixups, 256, 1);
444 case X86::VMOVAPDZ128rm:
445 case X86::VMOVAPSZ128rm:
446 case X86::VMOVUPDZ128rm:
447 case X86::VMOVUPSZ128rm: {
463 return FixupConstant(Fixups, 128, 1);
465 case X86::VMOVAPDZ256rm:
466 case X86::VMOVAPSZ256rm:
467 case X86::VMOVUPDZ256rm:
468 case X86::VMOVUPSZ256rm: {
483 return FixupConstant(Fixups, 256, 1);
485 case X86::VMOVAPDZrm:
486 case X86::VMOVAPSZrm:
487 case X86::VMOVUPDZrm:
488 case X86::VMOVUPSZrm: {
504 return FixupConstant(Fixups, 512, 1);
508 case X86::MOVDQUrm: {
524 return FixupConstant(Fixups, 128, 1);
527 case X86::VMOVDQUrm: {
534 {HasAVX2 ? X86::VPBROADCASTDrm : X86::VBROADCASTSSrm, 1, 32,
541 {HasAVX2 ? X86::VPBROADCASTQrm : X86::VMOVDDUPrm, 1, 64,
549 return FixupConstant(Fixups, 128, 1);
551 case X86::VMOVDQAYrm:
552 case X86::VMOVDQUYrm: {
556 {HasAVX2 ? X86::VPBROADCASTDYrm : X86::VBROADCASTSSYrm, 1, 32,
560 {HasAVX2 ? X86::VPBROADCASTQYrm : X86::VBROADCASTSDYrm, 1, 64,
566 {HasAVX2 ? X86::VBROADCASTI128rm : X86::VBROADCASTF128rm, 1, 128,
574 return FixupConstant(Fixups, 256, 1);
576 case X86::VMOVDQA32Z128rm:
577 case X86::VMOVDQA64Z128rm:
578 case X86::VMOVDQU32Z128rm:
579 case X86::VMOVDQU64Z128rm: {
599 return FixupConstant(Fixups, 128, 1);
601 case X86::VMOVDQA32Z256rm:
602 case X86::VMOVDQA64Z256rm:
603 case X86::VMOVDQU32Z256rm:
604 case X86::VMOVDQU64Z256rm: {
623 return FixupConstant(Fixups, 256, 1);
625 case X86::VMOVDQA32Zrm:
626 case X86::VMOVDQA64Zrm:
627 case X86::VMOVDQU32Zrm:
628 case X86::VMOVDQU64Zrm: {
648 return FixupConstant(Fixups, 512, 1);
652 auto ConvertToBroadcastAVX512 = [&](
unsigned OpSrc32,
unsigned OpSrc64) {
653 unsigned OpBcst32 = 0, OpBcst64 = 0;
654 unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
658 OpBcst32 = Mem2Bcst->DstOp;
665 OpBcst64 = Mem2Bcst->DstOp;
669 assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
670 "OperandNo mismatch");
672 if (OpBcst32 || OpBcst64) {
673 unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
678 return FixupConstant(Fixups, 0, OpNo);
686 return ConvertToBroadcastAVX512(Opc, Opc);
690 if (HasVLX && !HasDQI) {
691 unsigned OpSrc32 = 0, OpSrc64 = 0;
696 OpSrc32 = X86 ::VPANDDZ128rm;
697 OpSrc64 = X86 ::VPANDQZ128rm;
702 OpSrc32 = X86 ::VPANDDZ256rm;
703 OpSrc64 = X86 ::VPANDQZ256rm;
708 OpSrc32 = X86 ::VPANDNDZ128rm;
709 OpSrc64 = X86 ::VPANDNQZ128rm;
711 case X86::VANDNPDYrm:
712 case X86::VANDNPSYrm:
714 OpSrc32 = X86 ::VPANDNDZ256rm;
715 OpSrc64 = X86 ::VPANDNQZ256rm;
720 OpSrc32 = X86 ::VPORDZ128rm;
721 OpSrc64 = X86 ::VPORQZ128rm;
726 OpSrc32 = X86 ::VPORDZ256rm;
727 OpSrc64 = X86 ::VPORQZ256rm;
732 OpSrc32 = X86 ::VPXORDZ128rm;
733 OpSrc64 = X86 ::VPXORQZ128rm;
738 OpSrc32 = X86 ::VPXORDZ256rm;
739 OpSrc64 = X86 ::VPXORQZ256rm;
742 if (OpSrc32 || OpSrc64)
743 return ConvertToBroadcastAVX512(OpSrc32, OpSrc64);
749bool X86FixupVectorConstantsPass::runOnMachineFunction(
MachineFunction &MF) {
751 bool Changed =
false;
753 TII =
ST->getInstrInfo();
754 SM = &
ST->getSchedModel();
758 if (processInstruction(MF,
MBB,
MI)) {
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
PowerPC TLS Dynamic Call Fixup
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static Constant * rebuildSplatCst(const Constant *C, unsigned, unsigned, unsigned SplatBitWidth)
static std::optional< APInt > getSplatableConstant(const Constant *C, unsigned SplatBitWidth)
static Constant * rebuildZExtCst(const Constant *C, unsigned NumBits, unsigned NumElts, unsigned SrcEltBitWidth)
static std::optional< APInt > extractConstantBits(const Constant *C)
static Constant * getSplatValueAllowUndef(const ConstantVector *C)
Normally, we only allow poison in vector splats.
static Constant * rebuildExtCst(const Constant *C, bool IsSExt, unsigned NumBits, unsigned NumElts, unsigned SrcEltBitWidth)
static Constant * rebuildZeroUpperCst(const Constant *C, unsigned NumBits, unsigned, unsigned ScalarBitWidth)
static Constant * rebuildSExtCst(const Constant *C, unsigned NumBits, unsigned NumElts, unsigned SrcEltBitWidth)
static Constant * rebuildConstant(LLVMContext &Ctx, Type *SclTy, const APInt &Bits, unsigned NumSclBits)
Class for arbitrary precision integers.
unsigned getActiveBits() const
Compute the number of active bits in the value.
APInt trunc(unsigned width) const
Truncate to new width.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
unsigned getSignificantBits() const
Get the minimum bit size for this signed APInt.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
static Constant * getFP(Type *ElementType, ArrayRef< uint16_t > Elts)
getFP() constructors - Return a constant of vector type with a float element type taken from argument...
Constant Vector Declarations.
This is an important base class in LLVM.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
This is an important class for using LLVM in a threaded context.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool is16bitFPTy() const
Return true if this is a 16-bit float type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Value * getOperand(unsigned i) const
LLVM Value Representation.
@ C
The default llvm calling convention, compatible with C.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
const Constant * getConstantFromPool(const MachineInstr &MI, unsigned OpNo)
Find any constant pool entry associated with a specific instruction operand.
This is an optimization pass for GlobalISel generic memory operations.
const X86FoldTableEntry * lookupBroadcastFoldTableBySize(unsigned MemOp, unsigned BroadcastBits)
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
DWARFExpression::Operation Op
constexpr unsigned BitWidth
FunctionPass * createX86FixupVectorConstants()
Return a pass that reduces the size of vector constant pool loads.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Machine model for scheduling, bundling, and heuristics.