62#define COMP_EVEX_DESC "Compressing EVEX instrs when possible"
63#define COMP_EVEX_NAME "x86-compress-evex"
65#define DEBUG_TYPE COMP_EVEX_NAME
71#define GET_X86_COMPRESS_EVEX_TABLE
72#include "X86GenInstrMapping.inc"
90char CompressEVEXLegacy::ID = 0;
95 if (
Reg >= X86::XMM16 &&
Reg <= X86::XMM31)
98 if (
Reg >= X86::YMM16 &&
Reg <= X86::YMM31)
114 "ZMM instructions should not be in the EVEX->VEX tables");
125 unsigned Opc =
MI.getOpcode();
127 case X86::VALIGNDZ128rri:
128 case X86::VALIGNDZ128rmi:
129 case X86::VALIGNQZ128rri:
130 case X86::VALIGNQZ128rmi: {
131 assert((NewOpc == X86::VPALIGNRrri || NewOpc == X86::VPALIGNRrmi) &&
132 "Unexpected new opcode!");
134 (
Opc == X86::VALIGNQZ128rri ||
Opc == X86::VALIGNQZ128rmi) ? 8 : 4;
136 Imm.setImm(Imm.getImm() * Scale);
139 case X86::VSHUFF32X4Z256rmi:
140 case X86::VSHUFF32X4Z256rri:
141 case X86::VSHUFF64X2Z256rmi:
142 case X86::VSHUFF64X2Z256rri:
143 case X86::VSHUFI32X4Z256rmi:
144 case X86::VSHUFI32X4Z256rri:
145 case X86::VSHUFI64X2Z256rmi:
146 case X86::VSHUFI64X2Z256rri: {
147 assert((NewOpc == X86::VPERM2F128rri || NewOpc == X86::VPERM2I128rri ||
148 NewOpc == X86::VPERM2F128rmi || NewOpc == X86::VPERM2I128rmi) &&
149 "Unexpected new opcode!");
151 int64_t ImmVal = Imm.getImm();
153 Imm.setImm(0x20 | ((ImmVal & 2) << 3) | (ImmVal & 1));
156 case X86::VRNDSCALEPDZ128rri:
157 case X86::VRNDSCALEPDZ128rmi:
158 case X86::VRNDSCALEPSZ128rri:
159 case X86::VRNDSCALEPSZ128rmi:
160 case X86::VRNDSCALEPDZ256rri:
161 case X86::VRNDSCALEPDZ256rmi:
162 case X86::VRNDSCALEPSZ256rri:
163 case X86::VRNDSCALEPSZ256rmi:
164 case X86::VRNDSCALESDZrri:
165 case X86::VRNDSCALESDZrmi:
166 case X86::VRNDSCALESSZrri:
167 case X86::VRNDSCALESSZrmi:
168 case X86::VRNDSCALESDZrri_Int:
169 case X86::VRNDSCALESDZrmi_Int:
170 case X86::VRNDSCALESSZrri_Int:
171 case X86::VRNDSCALESSZrmi_Int:
173 int64_t ImmVal = Imm.getImm();
175 if ((ImmVal & 0xf) != ImmVal)
184 unsigned VPMOVBits = 0;
186 case X86::VPMOVQ2MZ128kr:
189 case X86::VPMOVQ2MZ256kr:
190 case X86::VPMOVD2MZ128kr:
193 case X86::VPMOVD2MZ256kr:
196 case X86::VPMOVB2MZ128kr:
199 case X86::VPMOVB2MZ256kr:
206 unsigned KMOVSize = 0;
221 return KMOVSize < VPMOVBits;
234 unsigned Opc =
MI.getOpcode();
235 if (
Opc != X86::VPMOVD2MZ128kr &&
Opc != X86::VPMOVD2MZ256kr &&
236 Opc != X86::VPMOVQ2MZ128kr &&
Opc != X86::VPMOVQ2MZ256kr &&
237 Opc != X86::VPMOVB2MZ128kr &&
Opc != X86::VPMOVB2MZ256kr)
241 Register SrcVecReg =
MI.getOperand(1).getReg();
243 unsigned MovMskOpc = 0;
245 case X86::VPMOVD2MZ128kr:
246 MovMskOpc = X86::VMOVMSKPSrr;
248 case X86::VPMOVD2MZ256kr:
249 MovMskOpc = X86::VMOVMSKPSYrr;
251 case X86::VPMOVQ2MZ128kr:
252 MovMskOpc = X86::VMOVMSKPDrr;
254 case X86::VPMOVQ2MZ256kr:
255 MovMskOpc = X86::VMOVMSKPDYrr;
257 case X86::VPMOVB2MZ128kr:
258 MovMskOpc = X86::VPMOVMSKBrr;
260 case X86::VPMOVB2MZ256kr:
261 MovMskOpc = X86::VPMOVMSKBYrr;
271 if (CurMI.modifiesRegister(MaskReg,
TRI)) {
277 if (CurMI.readsRegister(MaskReg,
TRI)) {
281 unsigned UseOpc = CurMI.getOpcode();
282 bool IsKMOV = UseOpc == X86::KMOVBrk || UseOpc == X86::KMOVWrk ||
283 UseOpc == X86::KMOVDrk;
285 if (IsKMOV && CurMI.getOperand(1).getReg() == MaskReg &&
305 if (MO.getParent()->getParent() != &
MBB)
334 auto IsRedundantNewDataDest = [&](
unsigned &
Opc) {
342 X86::isCFCMOVCC(
MI.getOpcode()))
349 if (!
Desc.isCommutable() ||
Desc.getNumOperands() < 3 ||
350 !
MI.getOperand(2).isReg() ||
MI.getOperand(2).getReg() != Reg0)
353 ST.getInstrInfo()->commuteInstruction(
MI,
false, 1, 2);
354 Opc =
MI.getOpcode();
369 unsigned Opc =
MI.getOpcode();
370 bool IsSetZUCCm =
Opc == X86::SETZUCCm;
374 bool IsNDLike = IsND ||
Opc == X86::MOVBE32rr ||
Opc == X86::MOVBE64rr;
375 bool IsRedundantNDD = IsNDLike ? IsRedundantNewDataDest(
Opc) :
false;
377 auto GetCompressedOpc = [&](
unsigned Opc) ->
unsigned {
380 if (
I == Table.
end() ||
I->OldOpc !=
Opc)
390 if (IsRedundantNDD) {
400 if (
MI.definesRegister(Super,
nullptr))
401 IsRedundantNDD =
false;
409 "Unexpected NDD instruction with relocation!");
410 }
else if (
Opc == X86::ADD32ri_ND ||
Opc == X86::ADD64ri32_ND ||
411 Opc == X86::ADD32rr_ND ||
Opc == X86::ADD64rr_ND) {
416 MI.registerDefIsDead(X86::EFLAGS,
nullptr)) {
419 bool Is32BitReg =
Opc == X86::ADD32ri_ND ||
Opc == X86::ADD32rr_ND;
421 ST.getInstrInfo()->get(Is32BitReg ? X86::LEA64_32r : X86::LEA64r);
427 if (
Opc == X86::ADD32ri_ND ||
Opc == X86::ADD64ri32_ND)
434 MI.removeFromParent();
441 unsigned NewOpc = IsRedundantNDD
443 : ((IsNDLike && ST.hasNF() &&
444 MI.registerDefIsDead(X86::EFLAGS,
nullptr))
446 : GetCompressedOpc(
Opc));
451 const MCInstrDesc &NewDesc = ST.getInstrInfo()->get(NewOpc);
464 "Unknown EVEX2EVEX compression");
469 MI.setAsmPrinterFlag(AsmComment);
471 MI.tieOperands(0, 1);
480 static std::atomic<bool> TableChecked(
false);
481 if (!TableChecked.load(std::memory_order_relaxed)) {
483 "X86CompressEVEXTable is not sorted!");
484 TableChecked.store(
true, std::memory_order_relaxed);
488 if (!ST.hasAVX512() && !ST.hasEGPR() && !ST.hasNDD() && !ST.hasZU())
501 MI->eraseFromParent();
512 return new CompressEVEXLegacy();
unsigned const MachineRegisterInfo * MRI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
const HexagonInstrInfo * TII
Register const TargetRegisterInfo * TRI
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
This file defines the SmallVector class.
static bool tryCompressVPMOVPattern(MachineInstr &MI, MachineBasicBlock &MBB, const X86Subtarget &ST, SmallVectorImpl< MachineInstr * > &ToErase)
static bool performCustomAdjustments(MachineInstr &MI, unsigned NewOpc)
static bool CompressEVEXImpl(MachineInstr &MI, MachineBasicBlock &MBB, const X86Subtarget &ST, SmallVectorImpl< MachineInstr * > &ToErase)
cl::opt< bool > X86EnableAPXForRelocation
static bool isKMovNarrowing(unsigned VPMOVOpc, unsigned KMOVOpc)
static bool runOnMF(MachineFunction &MF)
static bool usesExtendedRegister(const MachineInstr &MI)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Represents analyses that only rely on functions' control flow.
FunctionPass class - This class is used to implement most global optimizations.
Describe properties that are true of each instruction in the target description file.
Wrapper class representing physical registers. Should be passed by value.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
bool modifiesRegister(Register Reg, const TargetRegisterInfo *TRI) const
Return true if the MachineInstr modifies (fully define or partially define) the specified register.
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
LLVM_ABI void setDesc(const MCInstrDesc &TID)
Replace the instruction descriptor (thus opcode) of the current instruction with a new one.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
LLVM_ABI void setReg(Register Reg)
Change the register this operand corresponds to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
A set of analyses that are preserved following a run of a transformation pass.
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Wrapper class representing virtual and physical registers.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
Pass manager infrastructure for declaring and invalidating analyses.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isZMMReg(MCRegister Reg)
bool hasNewDataDest(uint64_t TSFlags)
@ EVEX
EVEX - Specifies that this instruction use EVEX form which provides syntax support up to 32 512-bit r...
@ VEX
VEX - encoding using 0xC4/0xC5.
@ LEGACY
LEGACY - encoding using REX/REX2 or w/o opcode prefix.
bool isApxExtendedReg(MCRegister Reg)
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
unsigned getNonNDVariant(unsigned Opc)
unsigned getNFVariant(unsigned Opc)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createX86CompressEVEXLegacyPass()
static bool isAddMemInstrWithRelocation(const MachineInstr &MI)
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
MCRegister getX86SubSuperRegister(MCRegister Reg, unsigned Size, bool High=false)
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
auto lower_bound(R &&Range, T &&Value)
Provide wrappers to std::lower_bound which take ranges instead of having to pass begin/end explicitly...
ArrayRef(const T &OneElt) -> ArrayRef< T >