42#define DEBUG_TYPE "loongarch-opt-w-instrs"
43#define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
45STATISTIC(NumRemovedSExtW,
"Number of removed sign-extensions");
47 "Number of instructions transformed to W-ops");
51 cl::desc(
"Disable removal of sign-extend insn"),
55 cl::desc(
"Disable convert to D suffix"),
87char LoongArchOptWInstrs::ID = 0;
92 return new LoongArchOptWInstrs();
105 Worklist.
push_back(std::make_pair(&OrigMI, OrigBits));
107 while (!Worklist.
empty()) {
110 unsigned Bits =
P.second;
116 if (
MI->getNumExplicitDefs() != 1)
119 Register DestReg =
MI->getOperand(0).getReg();
123 for (
auto &UserOp :
MRI.use_nodbg_operands(DestReg)) {
125 unsigned OpIdx = UserOp.getOperandNo();
132 case LoongArch::ADD_W:
133 case LoongArch::ADDI_W:
134 case LoongArch::SUB_W:
135 case LoongArch::ALSL_W:
136 case LoongArch::ALSL_WU:
137 case LoongArch::MUL_W:
138 case LoongArch::MULH_W:
139 case LoongArch::MULH_WU:
140 case LoongArch::MULW_D_W:
141 case LoongArch::MULW_D_WU:
147 case LoongArch::SLL_W:
148 case LoongArch::SLLI_W:
149 case LoongArch::SRL_W:
150 case LoongArch::SRLI_W:
151 case LoongArch::SRA_W:
152 case LoongArch::SRAI_W:
153 case LoongArch::ROTR_W:
154 case LoongArch::ROTRI_W:
155 case LoongArch::CLO_W:
156 case LoongArch::CLZ_W:
157 case LoongArch::CTO_W:
158 case LoongArch::CTZ_W:
159 case LoongArch::BYTEPICK_W:
160 case LoongArch::REVB_2H:
161 case LoongArch::BITREV_4B:
162 case LoongArch::BITREV_W:
163 case LoongArch::BSTRINS_W:
164 case LoongArch::BSTRPICK_W:
165 case LoongArch::CRC_W_W_W:
166 case LoongArch::CRCC_W_W_W:
167 case LoongArch::MOVGR2FCSR:
168 case LoongArch::MOVGR2FRH_W:
169 case LoongArch::MOVGR2FR_W_64:
173 case LoongArch::MOVGR2CF:
177 case LoongArch::EXT_W_B:
181 case LoongArch::EXT_W_H:
186 case LoongArch::SRLI_D: {
192 Worklist.
push_back(std::make_pair(UserMI, Bits - ShAmt));
200 case LoongArch::SLLI_D:
203 Worklist.
push_back(std::make_pair(UserMI, Bits));
205 case LoongArch::ANDI: {
209 Worklist.
push_back(std::make_pair(UserMI, Bits));
212 case LoongArch::ORI: {
214 if (Bits >= (
unsigned)llvm::bit_width<uint64_t>(~Imm))
216 Worklist.
push_back(std::make_pair(UserMI, Bits));
220 case LoongArch::SLL_D:
223 if (Bits >=
Log2_32(ST.getGRLen()))
227 Worklist.
push_back(std::make_pair(UserMI, Bits));
230 case LoongArch::SRA_D:
231 case LoongArch::SRL_D:
232 case LoongArch::ROTR_D:
234 if (OpIdx == 2 && Bits >=
Log2_32(ST.getGRLen()))
238 case LoongArch::ST_B:
239 case LoongArch::STX_B:
240 case LoongArch::STGT_B:
241 case LoongArch::STLE_B:
242 case LoongArch::IOCSRWR_B:
244 if (OpIdx == 0 && Bits >= 8)
247 case LoongArch::ST_H:
248 case LoongArch::STX_H:
249 case LoongArch::STGT_H:
250 case LoongArch::STLE_H:
251 case LoongArch::IOCSRWR_H:
253 if (OpIdx == 0 && Bits >= 16)
256 case LoongArch::ST_W:
257 case LoongArch::STX_W:
258 case LoongArch::SCREL_W:
259 case LoongArch::STPTR_W:
260 case LoongArch::STGT_W:
261 case LoongArch::STLE_W:
262 case LoongArch::IOCSRWR_W:
264 if (OpIdx == 0 && Bits >= 32)
268 case LoongArch::CRC_W_B_W:
269 case LoongArch::CRCC_W_B_W:
270 if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))
273 case LoongArch::CRC_W_H_W:
274 case LoongArch::CRCC_W_H_W:
275 if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))
278 case LoongArch::CRC_W_D_W:
279 case LoongArch::CRCC_W_D_W:
280 if (OpIdx == 2 && Bits >= 32)
286 case LoongArch::COPY:
288 case LoongArch::ADD_D:
289 case LoongArch::ADDI_D:
290 case LoongArch::SUB_D:
291 case LoongArch::MUL_D:
296 case LoongArch::XORI:
297 case LoongArch::ANDN:
299 Worklist.
push_back(std::make_pair(UserMI, Bits));
302 case LoongArch::MASKNEZ:
303 case LoongArch::MASKEQZ:
306 Worklist.
push_back(std::make_pair(UserMI, Bits));
325 switch (
MI.getOpcode()) {
327 case LoongArch::ADD_W:
328 case LoongArch::SUB_W:
329 case LoongArch::ADDI_W:
330 case LoongArch::ALSL_W:
331 case LoongArch::LU12I_W:
333 case LoongArch::SLTU:
334 case LoongArch::SLTI:
335 case LoongArch::SLTUI:
336 case LoongArch::ANDI:
337 case LoongArch::MUL_W:
338 case LoongArch::MULH_W:
339 case LoongArch::MULH_WU:
340 case LoongArch::DIV_W:
341 case LoongArch::MOD_W:
342 case LoongArch::DIV_WU:
343 case LoongArch::MOD_WU:
344 case LoongArch::SLL_W:
345 case LoongArch::SRL_W:
346 case LoongArch::SRA_W:
347 case LoongArch::ROTR_W:
348 case LoongArch::SLLI_W:
349 case LoongArch::SRLI_W:
350 case LoongArch::SRAI_W:
351 case LoongArch::ROTRI_W:
352 case LoongArch::EXT_W_B:
353 case LoongArch::EXT_W_H:
354 case LoongArch::CLO_W:
355 case LoongArch::CLZ_W:
356 case LoongArch::CTO_W:
357 case LoongArch::CTZ_W:
358 case LoongArch::BYTEPICK_W:
359 case LoongArch::REVB_2H:
360 case LoongArch::BITREV_4B:
361 case LoongArch::BITREV_W:
362 case LoongArch::BSTRINS_W:
363 case LoongArch::BSTRPICK_W:
364 case LoongArch::LD_B:
365 case LoongArch::LD_H:
366 case LoongArch::LD_W:
367 case LoongArch::LD_BU:
368 case LoongArch::LD_HU:
369 case LoongArch::LL_W:
370 case LoongArch::LLACQ_W:
371 case LoongArch::RDTIMEL_W:
372 case LoongArch::RDTIMEH_W:
373 case LoongArch::CPUCFG:
374 case LoongArch::LDX_B:
375 case LoongArch::LDX_H:
376 case LoongArch::LDX_W:
377 case LoongArch::LDX_BU:
378 case LoongArch::LDX_HU:
379 case LoongArch::LDPTR_W:
380 case LoongArch::LDGT_B:
381 case LoongArch::LDGT_H:
382 case LoongArch::LDGT_W:
383 case LoongArch::LDLE_B:
384 case LoongArch::LDLE_H:
385 case LoongArch::LDLE_W:
386 case LoongArch::AMSWAP_B:
387 case LoongArch::AMSWAP_H:
388 case LoongArch::AMSWAP_W:
389 case LoongArch::AMADD_B:
390 case LoongArch::AMADD_H:
391 case LoongArch::AMADD_W:
392 case LoongArch::AMAND_W:
393 case LoongArch::AMOR_W:
394 case LoongArch::AMXOR_W:
395 case LoongArch::AMMAX_W:
396 case LoongArch::AMMIN_W:
397 case LoongArch::AMMAX_WU:
398 case LoongArch::AMMIN_WU:
399 case LoongArch::AMSWAP__DB_B:
400 case LoongArch::AMSWAP__DB_H:
401 case LoongArch::AMSWAP__DB_W:
402 case LoongArch::AMADD__DB_B:
403 case LoongArch::AMADD__DB_H:
404 case LoongArch::AMADD__DB_W:
405 case LoongArch::AMAND__DB_W:
406 case LoongArch::AMOR__DB_W:
407 case LoongArch::AMXOR__DB_W:
408 case LoongArch::AMMAX__DB_W:
409 case LoongArch::AMMIN__DB_W:
410 case LoongArch::AMMAX__DB_WU:
411 case LoongArch::AMMIN__DB_WU:
412 case LoongArch::AMCAS_B:
413 case LoongArch::AMCAS_H:
414 case LoongArch::AMCAS_W:
415 case LoongArch::AMCAS__DB_B:
416 case LoongArch::AMCAS__DB_H:
417 case LoongArch::AMCAS__DB_W:
418 case LoongArch::CRC_W_B_W:
419 case LoongArch::CRC_W_H_W:
420 case LoongArch::CRC_W_W_W:
421 case LoongArch::CRC_W_D_W:
422 case LoongArch::CRCC_W_B_W:
423 case LoongArch::CRCC_W_H_W:
424 case LoongArch::CRCC_W_W_W:
425 case LoongArch::CRCC_W_D_W:
426 case LoongArch::IOCSRRD_B:
427 case LoongArch::IOCSRRD_H:
428 case LoongArch::IOCSRRD_W:
429 case LoongArch::MOVFR2GR_S:
430 case LoongArch::MOVFCSR2GR:
431 case LoongArch::MOVCF2GR:
432 case LoongArch::MOVFRH2GR_S:
433 case LoongArch::MOVFR2GR_S_64:
438 case LoongArch::SRAI_D:
439 return MI.getOperand(2).getImm() >= 32;
440 case LoongArch::SRLI_D:
441 return MI.getOperand(2).getImm() > 32;
443 case LoongArch::ADDI_D:
445 return MI.getOperand(1).isReg() &&
446 MI.getOperand(1).getReg() == LoongArch::R0;
448 case LoongArch::BSTRPICK_D:
449 return MI.getOperand(2).getImm() < 31;
451 case LoongArch::COPY:
452 return MI.getOperand(1).getReg() == LoongArch::R0;
454 case LoongArch::PseudoMaskedAtomicSwap32:
455 case LoongArch::PseudoAtomicSwap32:
456 case LoongArch::PseudoMaskedAtomicLoadAdd32:
457 case LoongArch::PseudoMaskedAtomicLoadSub32:
458 case LoongArch::PseudoAtomicLoadNand32:
459 case LoongArch::PseudoMaskedAtomicLoadNand32:
460 case LoongArch::PseudoAtomicLoadAdd32:
461 case LoongArch::PseudoAtomicLoadSub32:
462 case LoongArch::PseudoAtomicLoadAnd32:
463 case LoongArch::PseudoAtomicLoadOr32:
464 case LoongArch::PseudoAtomicLoadXor32:
465 case LoongArch::PseudoMaskedAtomicLoadUMax32:
466 case LoongArch::PseudoMaskedAtomicLoadUMin32:
467 case LoongArch::PseudoCmpXchg32:
468 case LoongArch::PseudoMaskedCmpXchg32:
469 case LoongArch::PseudoMaskedAtomicLoadMax32:
470 case LoongArch::PseudoMaskedAtomicLoadMin32:
483 auto AddRegToWorkList = [&](
Register SrcReg) {
490 if (!AddRegToWorkList(SrcReg))
493 while (!Worklist.
empty()) {
497 if (!Visited.
insert(Reg).second)
504 int OpNo =
MI->findRegisterDefOperandIdx(Reg,
nullptr);
505 assert(OpNo != -1 &&
"Couldn't find register");
512 switch (
MI->getOpcode()) {
516 case LoongArch::COPY: {
523 if (
MI->getParent() == &MF->
front()) {
529 Register CopySrcReg =
MI->getOperand(1).getReg();
530 if (CopySrcReg == LoongArch::R4) {
540 auto II =
MI->getIterator();
554 auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
558 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
559 unsigned BitWidth = IntTy->getBitWidth();
560 if ((
BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
561 (
BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
565 if (!AddRegToWorkList(CopySrcReg))
572 case LoongArch::MOD_D:
573 case LoongArch::ANDI:
575 case LoongArch::XORI:
579 if (!AddRegToWorkList(
MI->getOperand(1).getReg()))
583 case LoongArch::MOD_DU:
587 case LoongArch::ANDN:
589 case LoongArch::PHI: {
595 unsigned B = 1, E = 3,
D = 1;
596 switch (
MI->getOpcode()) {
598 E =
MI->getNumOperands();
603 for (
unsigned I =
B;
I != E;
I +=
D) {
604 if (!
MI->getOperand(
I).isReg())
607 if (!AddRegToWorkList(
MI->getOperand(
I).getReg()))
614 case LoongArch::MASKEQZ:
615 case LoongArch::MASKNEZ:
618 if (!AddRegToWorkList(
MI->getOperand(1).getReg()))
624 case LoongArch::SLLI_D:
626 if (
MI->getOperand(2).getImm() >= 32)
629 case LoongArch::ADDI_D:
630 case LoongArch::ADD_D:
631 case LoongArch::LD_D:
632 case LoongArch::LD_WU:
633 case LoongArch::MUL_D:
634 case LoongArch::SUB_D:
650 case LoongArch::ADDI_D:
651 return LoongArch::ADDI_W;
652 case LoongArch::ADD_D:
653 return LoongArch::ADD_W;
654 case LoongArch::LD_D:
655 case LoongArch::LD_WU:
656 return LoongArch::LD_W;
657 case LoongArch::MUL_D:
658 return LoongArch::MUL_W;
659 case LoongArch::SLLI_D:
660 return LoongArch::SLLI_W;
661 case LoongArch::SUB_D:
662 return LoongArch::SUB_W;
675 bool MadeChange =
false;
694 if (!
MRI.constrainRegClass(SrcReg,
MRI.getRegClass(DstReg)))
700 Fixable->setDesc(
TII.get(
getWOp(Fixable->getOpcode())));
701 Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
702 Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
703 Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
705 ++NumTransformedToWInstrs;
709 MRI.replaceRegWith(DstReg, SrcReg);
710 MRI.clearKillFlags(SrcReg);
711 MI.eraseFromParent();
724 bool MadeChange =
false;
728 switch (
MI.getOpcode()) {
731 case LoongArch::ADDI_W:
732 Opc = LoongArch::ADDI_D;
737 MI.setDesc(
TII.get(Opc));
750 bool MadeChange =
false;
755 switch (
MI.getOpcode()) {
758 case LoongArch::ADD_D:
759 WOpc = LoongArch::ADD_W;
761 case LoongArch::ADDI_D:
762 WOpc = LoongArch::ADDI_W;
764 case LoongArch::SUB_D:
765 WOpc = LoongArch::SUB_W;
767 case LoongArch::MUL_D:
768 WOpc = LoongArch::MUL_W;
770 case LoongArch::SLLI_D:
772 if (
MI.getOperand(2).getImm() >= 32)
774 WOpc = LoongArch::SLLI_W;
776 case LoongArch::LD_D:
777 case LoongArch::LD_WU:
778 WOpc = LoongArch::LD_W;
784 MI.setDesc(
TII.get(WOpc));
785 MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
786 MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
787 MI.clearFlag(MachineInstr::MIFlag::IsExact);
789 ++NumTransformedToWInstrs;
809 bool MadeChange =
false;
810 MadeChange |= removeSExtWInstrs(MF,
TII, ST,
MRI);
813 MadeChange |= convertToDSuffixes(MF,
TII, ST,
MRI);
815 if (
ST.preferWInst())
816 MadeChange |= convertToWSuffixes(MF,
TII, ST,
MRI);
unsigned const MachineRegisterInfo * MRI
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
const HexagonInstrInfo * TII
static bool isSignExtendingOpW(const MachineInstr &MI, const MachineRegisterInfo &MRI, unsigned OpNo)
static cl::opt< bool > DisableSExtWRemoval("loongarch-disable-sextw-removal", cl::desc("Disable removal of sign-extend insn"), cl::init(false), cl::Hidden)
static bool hasAllWUsers(const MachineInstr &OrigMI, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI)
static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI, SmallPtrSetImpl< MachineInstr * > &FixableDef)
#define LOONGARCH_OPT_W_INSTRS_NAME
static bool hasAllNBitUsers(const MachineInstr &OrigMI, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI, unsigned OrigBits)
static unsigned getWOp(unsigned Opcode)
static cl::opt< bool > DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix", cl::desc("Disable convert to D suffix"), cl::init(false), cl::Hidden)
uint64_t IntrinsicInst * II
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
FunctionPass class - This class is used to implement most global optimizations.
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
bool isSExt32Register(Register Reg) const
instr_iterator instr_begin()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isCall(QueryType Type=AnyInBundle) const
const MachineOperand & getOperand(unsigned i) const
const GlobalValue * getGlobal() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Wrapper class representing virtual and physical registers.
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
bool isSEXT_W(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
This is an optimization pass for GlobalISel generic memory operations.
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
FunctionPass * createLoongArchOptWInstrsPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
constexpr unsigned BitWidth