26#define DEBUG_TYPE "x86-fixup-vector-constants"
28STATISTIC(NumInstChanges,
"Number of instructions changes");
38 return "X86 Fixup Vector Constants";
48 MachineFunctionProperties::Property::NoVRegs);
58char X86FixupVectorConstantsPass::ID = 0;
63 return new X86FixupVectorConstantsPass();
68 if (!
Op.isCPI() ||
Op.getOffset() != 0)
72 MI.getParent()->getParent()->getConstantPool()->getConstants();
85 unsigned NumBits =
C->getType()->getPrimitiveSizeInBits();
87 if (
auto *CInt = dyn_cast<ConstantInt>(
C))
88 return CInt->getValue();
90 if (
auto *CFP = dyn_cast<ConstantFP>(
C))
91 return CFP->getValue().bitcastToAPInt();
93 if (
auto *CV = dyn_cast<ConstantVector>(
C)) {
94 if (
auto *CVSplat = CV->getSplatValue(
true)) {
96 assert((NumBits % Bits->getBitWidth()) == 0 &&
"Illegal splat");
102 if (
auto *CDS = dyn_cast<ConstantDataSequential>(
C)) {
103 bool IsInteger = CDS->getElementType()->isIntegerTy();
104 bool IsFloat = CDS->getElementType()->isHalfTy() ||
105 CDS->getElementType()->isBFloatTy() ||
106 CDS->getElementType()->isFloatTy() ||
107 CDS->getElementType()->isDoubleTy();
108 if (IsInteger || IsFloat) {
110 unsigned EltBits = CDS->getElementType()->getPrimitiveSizeInBits();
111 for (
unsigned I = 0,
E = CDS->getNumElements();
I !=
E; ++
I) {
113 Bits.insertBits(CDS->getElementAsAPInt(
I),
I * EltBits);
115 Bits.insertBits(CDS->getElementAsAPFloat(
I).bitcastToAPInt(),
128 unsigned SplatBitWidth) {
129 const Type *Ty =
C->getType();
131 "Illegal splat width");
134 if (Bits->isSplat(SplatBitWidth))
135 return Bits->trunc(SplatBitWidth);
139 if (
auto *CV = dyn_cast<ConstantVector>(
C)) {
140 unsigned NumOps = CV->getNumOperands();
142 unsigned NumScaleOps = SplatBitWidth / NumEltsBits;
143 if ((SplatBitWidth % NumEltsBits) == 0) {
147 for (
unsigned Idx = 0;
Idx != NumOps; ++
Idx) {
149 if (isa<UndefValue>(Elt))
151 unsigned SplatIdx =
Idx % NumScaleOps;
152 if (!Sequence[SplatIdx] || Sequence[SplatIdx] == Elt) {
153 Sequence[SplatIdx] = Elt;
162 for (
unsigned I = 0;
I != NumScaleOps; ++
I) {
166 SplatBits.
insertBits(*Bits,
I * Bits->getBitWidth());
182 unsigned SplatBitWidth) {
189 const Type *OriginalType =
C->getType();
192 NumSclBits = std::min<unsigned>(NumSclBits, SplatBitWidth);
194 if (NumSclBits == 8) {
196 for (
unsigned I = 0;
I != SplatBitWidth;
I += 8)
201 if (NumSclBits == 16) {
203 for (
unsigned I = 0;
I != SplatBitWidth;
I += 16)
210 if (NumSclBits == 32) {
212 for (
unsigned I = 0;
I != SplatBitWidth;
I += 32)
221 for (
unsigned I = 0;
I != SplatBitWidth;
I += 64)
228bool X86FixupVectorConstantsPass::processInstruction(
MachineFunction &MF,
231 unsigned Opc =
MI.getOpcode();
233 bool HasDQI =
ST->hasDQI();
234 bool HasBWI =
ST->hasBWI();
236 auto ConvertToBroadcast = [&](
unsigned OpBcst256,
unsigned OpBcst128,
237 unsigned OpBcst64,
unsigned OpBcst32,
238 unsigned OpBcst16,
unsigned OpBcst8,
239 unsigned OperandNo) {
241 "Unexpected number of operands!");
246 std::pair<unsigned, unsigned> Broadcasts[] = {
247 {8, OpBcst8}, {16, OpBcst16}, {32, OpBcst32},
248 {64, OpBcst64}, {128, OpBcst128}, {256, OpBcst256},
250 for (
auto [
BitWidth, OpBcst] : Broadcasts) {
257 MI.setDesc(
TII->get(OpBcst));
280 return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
282 case X86::VMOVAPDYrm:
283 case X86::VMOVAPSYrm:
284 case X86::VMOVUPDYrm:
285 case X86::VMOVUPSYrm:
286 return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
287 X86::VBROADCASTSSYrm, 0, 0, 1);
288 case X86::VMOVAPDZ128rm:
289 case X86::VMOVAPSZ128rm:
290 case X86::VMOVUPDZ128rm:
291 case X86::VMOVUPSZ128rm:
292 return ConvertToBroadcast(0, 0, X86::VMOVDDUPZ128rm,
293 X86::VBROADCASTSSZ128rm, 0, 0, 1);
294 case X86::VMOVAPDZ256rm:
295 case X86::VMOVAPSZ256rm:
296 case X86::VMOVUPDZ256rm:
297 case X86::VMOVUPSZ256rm:
298 return ConvertToBroadcast(
299 0, HasDQI ? X86::VBROADCASTF64X2Z128rm : X86::VBROADCASTF32X4Z256rm,
300 X86::VBROADCASTSDZ256rm, X86::VBROADCASTSSZ256rm, 0, 0, 1);
301 case X86::VMOVAPDZrm:
302 case X86::VMOVAPSZrm:
303 case X86::VMOVUPDZrm:
304 case X86::VMOVUPSZrm:
305 return ConvertToBroadcast(
306 HasDQI ? X86::VBROADCASTF32X8rm : X86::VBROADCASTF64X4rm,
307 HasDQI ? X86::VBROADCASTF64X2rm : X86::VBROADCASTF32X4rm,
308 X86::VBROADCASTSDZrm, X86::VBROADCASTSSZrm, 0, 0, 1);
313 return ConvertToBroadcast(0, 0, X86::VPBROADCASTQrm, X86::VPBROADCASTDrm,
314 X86::VPBROADCASTWrm, X86::VPBROADCASTBrm, 1);
315 return ConvertToBroadcast(0, 0, X86::VMOVDDUPrm, X86::VBROADCASTSSrm, 0, 0,
317 case X86::VMOVDQAYrm:
318 case X86::VMOVDQUYrm:
320 return ConvertToBroadcast(0, X86::VBROADCASTI128, X86::VPBROADCASTQYrm,
321 X86::VPBROADCASTDYrm, X86::VPBROADCASTWYrm,
322 X86::VPBROADCASTBYrm, 1);
323 return ConvertToBroadcast(0, X86::VBROADCASTF128, X86::VBROADCASTSDYrm,
324 X86::VBROADCASTSSYrm, 0, 0, 1);
325 case X86::VMOVDQA32Z128rm:
326 case X86::VMOVDQA64Z128rm:
327 case X86::VMOVDQU32Z128rm:
328 case X86::VMOVDQU64Z128rm:
329 return ConvertToBroadcast(0, 0, X86::VPBROADCASTQZ128rm,
330 X86::VPBROADCASTDZ128rm,
331 HasBWI ? X86::VPBROADCASTWZ128rm : 0,
332 HasBWI ? X86::VPBROADCASTBZ128rm : 0, 1);
333 case X86::VMOVDQA32Z256rm:
334 case X86::VMOVDQA64Z256rm:
335 case X86::VMOVDQU32Z256rm:
336 case X86::VMOVDQU64Z256rm:
337 return ConvertToBroadcast(
338 0, HasDQI ? X86::VBROADCASTI64X2Z128rm : X86::VBROADCASTI32X4Z256rm,
339 X86::VPBROADCASTQZ256rm, X86::VPBROADCASTDZ256rm,
340 HasBWI ? X86::VPBROADCASTWZ256rm : 0,
341 HasBWI ? X86::VPBROADCASTBZ256rm : 0, 1);
342 case X86::VMOVDQA32Zrm:
343 case X86::VMOVDQA64Zrm:
344 case X86::VMOVDQU32Zrm:
345 case X86::VMOVDQU64Zrm:
346 return ConvertToBroadcast(
347 HasDQI ? X86::VBROADCASTI32X8rm : X86::VBROADCASTI64X4rm,
348 HasDQI ? X86::VBROADCASTI64X2rm : X86::VBROADCASTI32X4rm,
349 X86::VPBROADCASTQZrm, X86::VPBROADCASTDZrm,
350 HasBWI ? X86::VPBROADCASTWZrm : 0, HasBWI ? X86::VPBROADCASTBZrm : 0,
357 unsigned OpBcst32 = 0, OpBcst64 = 0;
358 unsigned OpNoBcst32 = 0, OpNoBcst64 = 0;
361 OpBcst32 = Mem2Bcst->DstOp;
366 OpBcst64 = Mem2Bcst->DstOp;
369 assert(((OpBcst32 == 0) || (OpBcst64 == 0) || (OpNoBcst32 == OpNoBcst64)) &&
370 "OperandNo mismatch");
372 if (OpBcst32 || OpBcst64) {
373 unsigned OpNo = OpBcst32 == 0 ? OpNoBcst64 : OpNoBcst32;
374 return ConvertToBroadcast(0, 0, OpBcst64, OpBcst32, 0, 0, OpNo);
381bool X86FixupVectorConstantsPass::runOnMachineFunction(
MachineFunction &MF) {
383 bool Changed =
false;
385 TII =
ST->getInstrInfo();
386 SM = &
ST->getSchedModel();
390 if (processInstruction(MF,
MBB,
MI)) {
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< APInt > getSplatableConstant(const Constant *C, unsigned SplatBitWidth)
static std::optional< APInt > extractConstantBits(const Constant *C)
static const Constant * getConstantFromPool(const MachineInstr &MI, const MachineOperand &Op)
static Constant * rebuildSplatableConstant(const Constant *C, unsigned SplatBitWidth)
Class for arbitrary precision integers.
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
void insertBits(const APInt &SubBits, unsigned bitPosition)
Insert the bits from a smaller APInt starting at bitPosition.
static APInt getZero(unsigned numBits)
Get the '0' value for the specified bit-width.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
static Constant * get(LLVMContext &Context, ArrayRef< uint8_t > Elts)
get() constructors - Return a constant with vector type with an element count and element type matchi...
static Constant * getFP(Type *ElementType, ArrayRef< uint16_t > Elts)
getFP() constructors - Return a constant of vector type with a float element type taken from argument...
This is an important base class in LLVM.
Constant * getAggregateElement(unsigned Elt) const
For aggregates (struct/array/vector) return the constant that corresponds to the specified element if...
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
This class is a data container for one entry in a MachineConstantPool.
bool isMachineConstantPoolEntry() const
isMachineConstantPoolEntry - Return true if the MachineConstantPoolEntry is indeed a target specific ...
const Constant * ConstVal
union llvm::MachineConstantPoolEntry::@195 Val
The constant itself.
The MachineConstantPool class keeps track of constants referenced by a function which must be spilled...
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
The instances of the Type class are immutable: once they are created, they are never changed.
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
bool is16bitFPTy() const
Return true if this is a 16-bit float type.
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
bool isDoubleTy() const
Return true if this is 'double', a 64-bit IEEE fp type.
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
@ C
The default llvm calling convention, compatible with C.
@ AddrNumOperands
AddrNumOperands - Total number of operands in a memory reference.
This is an optimization pass for GlobalISel generic memory operations.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
const X86MemoryFoldTableEntry * lookupBroadcastFoldTable(unsigned MemOp, unsigned BroadcastBits)
constexpr unsigned BitWidth
FunctionPass * createX86FixupVectorConstants()
Return a pass that reduces the size of vector constant pool loads.
This struct is a compact representation of a valid (non-zero power of two) alignment.
Machine model for scheduling, bundling, and heuristics.