LLVM 20.0.0git
LoongArchOptWInstrs.cpp
Go to the documentation of this file.
1//===- LoongArchOptWInstrs.cpp - MI W instruction optimizations ----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===---------------------------------------------------------------------===//
8//
9// This pass does some optimizations for *W instructions at the MI level.
10//
11// First it removes unneeded sext(addi.w rd, rs, 0) instructions. Either
12// because the sign extended bits aren't consumed or because the input was
13// already sign extended by an earlier instruction.
14//
15// Then:
16// 1. Unless explicit disabled or the target prefers instructions with W suffix,
17// it removes the -w suffix from opw instructions whenever all users are
18// dependent only on the lower word of the result of the instruction.
19// The cases handled are:
20// * addi.w because it helps reduce test differences between LA32 and LA64
21// w/o being a pessimization.
22//
23// 2. Or if explicit enabled or the target prefers instructions with W suffix,
24// it adds the W suffix to the instruction whenever all users are dependent
25// only on the lower word of the result of the instruction.
26// The cases handled are:
27// * add.d/addi.d/sub.d/mul.d.
28// * slli.d with imm < 32.
29// * ld.d/ld.wu.
30//===---------------------------------------------------------------------===//
31
32#include "LoongArch.h"
34#include "LoongArchSubtarget.h"
35#include "llvm/ADT/SmallSet.h"
36#include "llvm/ADT/Statistic.h"
39
40using namespace llvm;
41
42#define DEBUG_TYPE "loongarch-opt-w-instrs"
43#define LOONGARCH_OPT_W_INSTRS_NAME "LoongArch Optimize W Instructions"
44
45STATISTIC(NumRemovedSExtW, "Number of removed sign-extensions");
46STATISTIC(NumTransformedToWInstrs,
47 "Number of instructions transformed to W-ops");
48
49static cl::opt<bool>
50 DisableSExtWRemoval("loongarch-disable-sextw-removal",
51 cl::desc("Disable removal of sign-extend insn"),
52 cl::init(false), cl::Hidden);
53static cl::opt<bool>
54 DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix",
55 cl::desc("Disable convert to D suffix"),
56 cl::init(false), cl::Hidden);
57
58namespace {
59
60class LoongArchOptWInstrs : public MachineFunctionPass {
61public:
62 static char ID;
63
64 LoongArchOptWInstrs() : MachineFunctionPass(ID) {}
65
66 bool runOnMachineFunction(MachineFunction &MF) override;
67 bool removeSExtWInstrs(MachineFunction &MF, const LoongArchInstrInfo &TII,
68 const LoongArchSubtarget &ST,
70 bool convertToDSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
71 const LoongArchSubtarget &ST,
73 bool convertToWSuffixes(MachineFunction &MF, const LoongArchInstrInfo &TII,
74 const LoongArchSubtarget &ST,
76
77 void getAnalysisUsage(AnalysisUsage &AU) const override {
78 AU.setPreservesCFG();
80 }
81
82 StringRef getPassName() const override { return LOONGARCH_OPT_W_INSTRS_NAME; }
83};
84
85} // end anonymous namespace
86
87char LoongArchOptWInstrs::ID = 0;
89 false, false)
90
92 return new LoongArchOptWInstrs();
93}
94
95// Checks if all users only demand the lower \p OrigBits of the original
96// instruction's result.
97// TODO: handle multiple interdependent transformations
98static bool hasAllNBitUsers(const MachineInstr &OrigMI,
99 const LoongArchSubtarget &ST,
100 const MachineRegisterInfo &MRI, unsigned OrigBits) {
101
104
105 Worklist.push_back(std::make_pair(&OrigMI, OrigBits));
106
107 while (!Worklist.empty()) {
108 auto P = Worklist.pop_back_val();
109 const MachineInstr *MI = P.first;
110 unsigned Bits = P.second;
111
112 if (!Visited.insert(P).second)
113 continue;
114
115 // Only handle instructions with one def.
116 if (MI->getNumExplicitDefs() != 1)
117 return false;
118
119 Register DestReg = MI->getOperand(0).getReg();
120 if (!DestReg.isVirtual())
121 return false;
122
123 for (auto &UserOp : MRI.use_nodbg_operands(DestReg)) {
124 const MachineInstr *UserMI = UserOp.getParent();
125 unsigned OpIdx = UserOp.getOperandNo();
126
127 switch (UserMI->getOpcode()) {
128 default:
129 // TODO: Add vector
130 return false;
131
132 case LoongArch::ADD_W:
133 case LoongArch::ADDI_W:
134 case LoongArch::SUB_W:
135 case LoongArch::ALSL_W:
136 case LoongArch::ALSL_WU:
137 case LoongArch::MUL_W:
138 case LoongArch::MULH_W:
139 case LoongArch::MULH_WU:
140 case LoongArch::MULW_D_W:
141 case LoongArch::MULW_D_WU:
142 // TODO: {DIV,MOD}.{W,WU} consumes the upper 32 bits before LA664+.
143 // case LoongArch::DIV_W:
144 // case LoongArch::DIV_WU:
145 // case LoongArch::MOD_W:
146 // case LoongArch::MOD_WU:
147 case LoongArch::SLL_W:
148 case LoongArch::SLLI_W:
149 case LoongArch::SRL_W:
150 case LoongArch::SRLI_W:
151 case LoongArch::SRA_W:
152 case LoongArch::SRAI_W:
153 case LoongArch::ROTR_W:
154 case LoongArch::ROTRI_W:
155 case LoongArch::CLO_W:
156 case LoongArch::CLZ_W:
157 case LoongArch::CTO_W:
158 case LoongArch::CTZ_W:
159 case LoongArch::BYTEPICK_W:
160 case LoongArch::REVB_2H:
161 case LoongArch::BITREV_4B:
162 case LoongArch::BITREV_W:
163 case LoongArch::BSTRINS_W:
164 case LoongArch::BSTRPICK_W:
165 case LoongArch::CRC_W_W_W:
166 case LoongArch::CRCC_W_W_W:
167 case LoongArch::MOVGR2FCSR:
168 case LoongArch::MOVGR2FRH_W:
169 case LoongArch::MOVGR2FR_W_64:
170 if (Bits >= 32)
171 break;
172 return false;
173 case LoongArch::MOVGR2CF:
174 if (Bits >= 1)
175 break;
176 return false;
177 case LoongArch::EXT_W_B:
178 if (Bits >= 8)
179 break;
180 return false;
181 case LoongArch::EXT_W_H:
182 if (Bits >= 16)
183 break;
184 return false;
185
186 case LoongArch::SRLI_D: {
187 // If we are shifting right by less than Bits, and users don't demand
188 // any bits that were shifted into [Bits-1:0], then we can consider this
189 // as an N-Bit user.
190 unsigned ShAmt = UserMI->getOperand(2).getImm();
191 if (Bits > ShAmt) {
192 Worklist.push_back(std::make_pair(UserMI, Bits - ShAmt));
193 break;
194 }
195 return false;
196 }
197
198 // these overwrite higher input bits, otherwise the lower word of output
199 // depends only on the lower word of input. So check their uses read W.
200 case LoongArch::SLLI_D:
201 if (Bits >= (ST.getGRLen() - UserMI->getOperand(2).getImm()))
202 break;
203 Worklist.push_back(std::make_pair(UserMI, Bits));
204 break;
205 case LoongArch::ANDI: {
206 uint64_t Imm = UserMI->getOperand(2).getImm();
207 if (Bits >= (unsigned)llvm::bit_width(Imm))
208 break;
209 Worklist.push_back(std::make_pair(UserMI, Bits));
210 break;
211 }
212 case LoongArch::ORI: {
213 uint64_t Imm = UserMI->getOperand(2).getImm();
214 if (Bits >= (unsigned)llvm::bit_width<uint64_t>(~Imm))
215 break;
216 Worklist.push_back(std::make_pair(UserMI, Bits));
217 break;
218 }
219
220 case LoongArch::SLL_D:
221 // Operand 2 is the shift amount which uses log2(grlen) bits.
222 if (OpIdx == 2) {
223 if (Bits >= Log2_32(ST.getGRLen()))
224 break;
225 return false;
226 }
227 Worklist.push_back(std::make_pair(UserMI, Bits));
228 break;
229
230 case LoongArch::SRA_D:
231 case LoongArch::SRL_D:
232 case LoongArch::ROTR_D:
233 // Operand 2 is the shift amount which uses 6 bits.
234 if (OpIdx == 2 && Bits >= Log2_32(ST.getGRLen()))
235 break;
236 return false;
237
238 case LoongArch::ST_B:
239 case LoongArch::STX_B:
240 case LoongArch::STGT_B:
241 case LoongArch::STLE_B:
242 case LoongArch::IOCSRWR_B:
243 // The first argument is the value to store.
244 if (OpIdx == 0 && Bits >= 8)
245 break;
246 return false;
247 case LoongArch::ST_H:
248 case LoongArch::STX_H:
249 case LoongArch::STGT_H:
250 case LoongArch::STLE_H:
251 case LoongArch::IOCSRWR_H:
252 // The first argument is the value to store.
253 if (OpIdx == 0 && Bits >= 16)
254 break;
255 return false;
256 case LoongArch::ST_W:
257 case LoongArch::STX_W:
258 case LoongArch::SCREL_W:
259 case LoongArch::STPTR_W:
260 case LoongArch::STGT_W:
261 case LoongArch::STLE_W:
262 case LoongArch::IOCSRWR_W:
263 // The first argument is the value to store.
264 if (OpIdx == 0 && Bits >= 32)
265 break;
266 return false;
267
268 case LoongArch::CRC_W_B_W:
269 case LoongArch::CRCC_W_B_W:
270 if ((OpIdx == 1 && Bits >= 8) || (OpIdx == 2 && Bits >= 32))
271 break;
272 return false;
273 case LoongArch::CRC_W_H_W:
274 case LoongArch::CRCC_W_H_W:
275 if ((OpIdx == 1 && Bits >= 16) || (OpIdx == 2 && Bits >= 32))
276 break;
277 return false;
278 case LoongArch::CRC_W_D_W:
279 case LoongArch::CRCC_W_D_W:
280 if (OpIdx == 2 && Bits >= 32)
281 break;
282 return false;
283
284 // For these, lower word of output in these operations, depends only on
285 // the lower word of input. So, we check all uses only read lower word.
286 case LoongArch::COPY:
287 case LoongArch::PHI:
288 case LoongArch::ADD_D:
289 case LoongArch::ADDI_D:
290 case LoongArch::SUB_D:
291 case LoongArch::MUL_D:
292 case LoongArch::AND:
293 case LoongArch::OR:
294 case LoongArch::NOR:
295 case LoongArch::XOR:
296 case LoongArch::XORI:
297 case LoongArch::ANDN:
298 case LoongArch::ORN:
299 Worklist.push_back(std::make_pair(UserMI, Bits));
300 break;
301
302 case LoongArch::MASKNEZ:
303 case LoongArch::MASKEQZ:
304 if (OpIdx != 1)
305 return false;
306 Worklist.push_back(std::make_pair(UserMI, Bits));
307 break;
308 }
309 }
310 }
311
312 return true;
313}
314
315static bool hasAllWUsers(const MachineInstr &OrigMI,
316 const LoongArchSubtarget &ST,
317 const MachineRegisterInfo &MRI) {
318 return hasAllNBitUsers(OrigMI, ST, MRI, 32);
319}
320
321// This function returns true if the machine instruction always outputs a value
322// where bits 63:32 match bit 31.
324 const MachineRegisterInfo &MRI, unsigned OpNo) {
325 switch (MI.getOpcode()) {
326 // Normal cases
327 case LoongArch::ADD_W:
328 case LoongArch::SUB_W:
329 case LoongArch::ADDI_W:
330 case LoongArch::ALSL_W:
331 case LoongArch::LU12I_W:
332 case LoongArch::SLT:
333 case LoongArch::SLTU:
334 case LoongArch::SLTI:
335 case LoongArch::SLTUI:
336 case LoongArch::ANDI:
337 case LoongArch::MUL_W:
338 case LoongArch::MULH_W:
339 case LoongArch::MULH_WU:
340 case LoongArch::DIV_W:
341 case LoongArch::MOD_W:
342 case LoongArch::DIV_WU:
343 case LoongArch::MOD_WU:
344 case LoongArch::SLL_W:
345 case LoongArch::SRL_W:
346 case LoongArch::SRA_W:
347 case LoongArch::ROTR_W:
348 case LoongArch::SLLI_W:
349 case LoongArch::SRLI_W:
350 case LoongArch::SRAI_W:
351 case LoongArch::ROTRI_W:
352 case LoongArch::EXT_W_B:
353 case LoongArch::EXT_W_H:
354 case LoongArch::CLO_W:
355 case LoongArch::CLZ_W:
356 case LoongArch::CTO_W:
357 case LoongArch::CTZ_W:
358 case LoongArch::BYTEPICK_W:
359 case LoongArch::REVB_2H:
360 case LoongArch::BITREV_4B:
361 case LoongArch::BITREV_W:
362 case LoongArch::BSTRINS_W:
363 case LoongArch::BSTRPICK_W:
364 case LoongArch::LD_B:
365 case LoongArch::LD_H:
366 case LoongArch::LD_W:
367 case LoongArch::LD_BU:
368 case LoongArch::LD_HU:
369 case LoongArch::LL_W:
370 case LoongArch::LLACQ_W:
371 case LoongArch::RDTIMEL_W:
372 case LoongArch::RDTIMEH_W:
373 case LoongArch::CPUCFG:
374 case LoongArch::LDX_B:
375 case LoongArch::LDX_H:
376 case LoongArch::LDX_W:
377 case LoongArch::LDX_BU:
378 case LoongArch::LDX_HU:
379 case LoongArch::LDPTR_W:
380 case LoongArch::LDGT_B:
381 case LoongArch::LDGT_H:
382 case LoongArch::LDGT_W:
383 case LoongArch::LDLE_B:
384 case LoongArch::LDLE_H:
385 case LoongArch::LDLE_W:
386 case LoongArch::AMSWAP_B:
387 case LoongArch::AMSWAP_H:
388 case LoongArch::AMSWAP_W:
389 case LoongArch::AMADD_B:
390 case LoongArch::AMADD_H:
391 case LoongArch::AMADD_W:
392 case LoongArch::AMAND_W:
393 case LoongArch::AMOR_W:
394 case LoongArch::AMXOR_W:
395 case LoongArch::AMMAX_W:
396 case LoongArch::AMMIN_W:
397 case LoongArch::AMMAX_WU:
398 case LoongArch::AMMIN_WU:
399 case LoongArch::AMSWAP__DB_B:
400 case LoongArch::AMSWAP__DB_H:
401 case LoongArch::AMSWAP__DB_W:
402 case LoongArch::AMADD__DB_B:
403 case LoongArch::AMADD__DB_H:
404 case LoongArch::AMADD__DB_W:
405 case LoongArch::AMAND__DB_W:
406 case LoongArch::AMOR__DB_W:
407 case LoongArch::AMXOR__DB_W:
408 case LoongArch::AMMAX__DB_W:
409 case LoongArch::AMMIN__DB_W:
410 case LoongArch::AMMAX__DB_WU:
411 case LoongArch::AMMIN__DB_WU:
412 case LoongArch::AMCAS_B:
413 case LoongArch::AMCAS_H:
414 case LoongArch::AMCAS_W:
415 case LoongArch::AMCAS__DB_B:
416 case LoongArch::AMCAS__DB_H:
417 case LoongArch::AMCAS__DB_W:
418 case LoongArch::CRC_W_B_W:
419 case LoongArch::CRC_W_H_W:
420 case LoongArch::CRC_W_W_W:
421 case LoongArch::CRC_W_D_W:
422 case LoongArch::CRCC_W_B_W:
423 case LoongArch::CRCC_W_H_W:
424 case LoongArch::CRCC_W_W_W:
425 case LoongArch::CRCC_W_D_W:
426 case LoongArch::IOCSRRD_B:
427 case LoongArch::IOCSRRD_H:
428 case LoongArch::IOCSRRD_W:
429 case LoongArch::MOVFR2GR_S:
430 case LoongArch::MOVFCSR2GR:
431 case LoongArch::MOVCF2GR:
432 case LoongArch::MOVFRH2GR_S:
433 case LoongArch::MOVFR2GR_S_64:
434 // TODO: Add vector
435 return true;
436 // Special cases that require checking operands.
437 // shifting right sufficiently makes the value 32-bit sign-extended
438 case LoongArch::SRAI_D:
439 return MI.getOperand(2).getImm() >= 32;
440 case LoongArch::SRLI_D:
441 return MI.getOperand(2).getImm() > 32;
442 // The LI pattern ADDI rd, R0, imm and ORI rd, R0, imm are sign extended.
443 case LoongArch::ADDI_D:
444 case LoongArch::ORI:
445 return MI.getOperand(1).isReg() &&
446 MI.getOperand(1).getReg() == LoongArch::R0;
447 // A bits extract is sign extended if the msb is less than 31.
448 case LoongArch::BSTRPICK_D:
449 return MI.getOperand(2).getImm() < 31;
450 // Copying from R0 produces zero.
451 case LoongArch::COPY:
452 return MI.getOperand(1).getReg() == LoongArch::R0;
453 // Ignore the scratch register destination.
454 case LoongArch::PseudoMaskedAtomicSwap32:
455 case LoongArch::PseudoAtomicSwap32:
456 case LoongArch::PseudoMaskedAtomicLoadAdd32:
457 case LoongArch::PseudoMaskedAtomicLoadSub32:
458 case LoongArch::PseudoAtomicLoadNand32:
459 case LoongArch::PseudoMaskedAtomicLoadNand32:
460 case LoongArch::PseudoAtomicLoadAdd32:
461 case LoongArch::PseudoAtomicLoadSub32:
462 case LoongArch::PseudoAtomicLoadAnd32:
463 case LoongArch::PseudoAtomicLoadOr32:
464 case LoongArch::PseudoAtomicLoadXor32:
465 case LoongArch::PseudoMaskedAtomicLoadUMax32:
466 case LoongArch::PseudoMaskedAtomicLoadUMin32:
467 case LoongArch::PseudoCmpXchg32:
468 case LoongArch::PseudoMaskedCmpXchg32:
469 case LoongArch::PseudoMaskedAtomicLoadMax32:
470 case LoongArch::PseudoMaskedAtomicLoadMin32:
471 return OpNo == 0;
472 }
473
474 return false;
475}
476
477static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST,
480 SmallSet<Register, 4> Visited;
482
483 auto AddRegToWorkList = [&](Register SrcReg) {
484 if (!SrcReg.isVirtual())
485 return false;
486 Worklist.push_back(SrcReg);
487 return true;
488 };
489
490 if (!AddRegToWorkList(SrcReg))
491 return false;
492
493 while (!Worklist.empty()) {
494 Register Reg = Worklist.pop_back_val();
495
496 // If we already visited this register, we don't need to check it again.
497 if (!Visited.insert(Reg).second)
498 continue;
499
500 MachineInstr *MI = MRI.getVRegDef(Reg);
501 if (!MI)
502 continue;
503
504 int OpNo = MI->findRegisterDefOperandIdx(Reg, /*TRI=*/nullptr);
505 assert(OpNo != -1 && "Couldn't find register");
506
507 // If this is a sign extending operation we don't need to look any further.
508 if (isSignExtendingOpW(*MI, MRI, OpNo))
509 continue;
510
511 // Is this an instruction that propagates sign extend?
512 switch (MI->getOpcode()) {
513 default:
514 // Unknown opcode, give up.
515 return false;
516 case LoongArch::COPY: {
517 const MachineFunction *MF = MI->getMF();
518 const LoongArchMachineFunctionInfo *LAFI =
520
521 // If this is the entry block and the register is livein, see if we know
522 // it is sign extended.
523 if (MI->getParent() == &MF->front()) {
524 Register VReg = MI->getOperand(0).getReg();
525 if (MF->getRegInfo().isLiveIn(VReg) && LAFI->isSExt32Register(VReg))
526 continue;
527 }
528
529 Register CopySrcReg = MI->getOperand(1).getReg();
530 if (CopySrcReg == LoongArch::R4) {
531 // For a method return value, we check the ZExt/SExt flags in attribute.
532 // We assume the following code sequence for method call.
533 // PseudoCALL @bar, ...
534 // ADJCALLSTACKUP 0, 0, implicit-def dead $r3, implicit $r3
535 // %0:gpr = COPY $r4
536 //
537 // We use the PseudoCall to look up the IR function being called to find
538 // its return attributes.
539 const MachineBasicBlock *MBB = MI->getParent();
540 auto II = MI->getIterator();
541 if (II == MBB->instr_begin() ||
542 (--II)->getOpcode() != LoongArch::ADJCALLSTACKUP)
543 return false;
544
545 const MachineInstr &CallMI = *(--II);
546 if (!CallMI.isCall() || !CallMI.getOperand(0).isGlobal())
547 return false;
548
549 auto *CalleeFn =
550 dyn_cast_if_present<Function>(CallMI.getOperand(0).getGlobal());
551 if (!CalleeFn)
552 return false;
553
554 auto *IntTy = dyn_cast<IntegerType>(CalleeFn->getReturnType());
555 if (!IntTy)
556 return false;
557
558 const AttributeSet &Attrs = CalleeFn->getAttributes().getRetAttrs();
559 unsigned BitWidth = IntTy->getBitWidth();
560 if ((BitWidth <= 32 && Attrs.hasAttribute(Attribute::SExt)) ||
561 (BitWidth < 32 && Attrs.hasAttribute(Attribute::ZExt)))
562 continue;
563 }
564
565 if (!AddRegToWorkList(CopySrcReg))
566 return false;
567
568 break;
569 }
570
571 // For these, we just need to check if the 1st operand is sign extended.
572 case LoongArch::MOD_D:
573 case LoongArch::ANDI:
574 case LoongArch::ORI:
575 case LoongArch::XORI:
576 // |Remainder| is always <= |Dividend|. If D is 32-bit, then so is R.
577 // DIV doesn't work because of the edge case 0xf..f 8000 0000 / (long)-1
578 // Logical operations use a sign extended 12-bit immediate.
579 if (!AddRegToWorkList(MI->getOperand(1).getReg()))
580 return false;
581
582 break;
583 case LoongArch::MOD_DU:
584 case LoongArch::AND:
585 case LoongArch::OR:
586 case LoongArch::XOR:
587 case LoongArch::ANDN:
588 case LoongArch::ORN:
589 case LoongArch::PHI: {
590 // If all incoming values are sign-extended, the output of AND, OR, XOR,
591 // or PHI is also sign-extended.
592
593 // The input registers for PHI are operand 1, 3, ...
594 // The input registers for others are operand 1 and 2.
595 unsigned B = 1, E = 3, D = 1;
596 switch (MI->getOpcode()) {
597 case LoongArch::PHI:
598 E = MI->getNumOperands();
599 D = 2;
600 break;
601 }
602
603 for (unsigned I = B; I != E; I += D) {
604 if (!MI->getOperand(I).isReg())
605 return false;
606
607 if (!AddRegToWorkList(MI->getOperand(I).getReg()))
608 return false;
609 }
610
611 break;
612 }
613
614 case LoongArch::MASKEQZ:
615 case LoongArch::MASKNEZ:
616 // Instructions return zero or operand 1. Result is sign extended if
617 // operand 1 is sign extended.
618 if (!AddRegToWorkList(MI->getOperand(1).getReg()))
619 return false;
620 break;
621
622 // With these opcode, we can "fix" them with the W-version
623 // if we know all users of the result only rely on bits 31:0
624 case LoongArch::SLLI_D:
625 // SLLI_W reads the lowest 5 bits, while SLLI_D reads lowest 6 bits
626 if (MI->getOperand(2).getImm() >= 32)
627 return false;
628 [[fallthrough]];
629 case LoongArch::ADDI_D:
630 case LoongArch::ADD_D:
631 case LoongArch::LD_D:
632 case LoongArch::LD_WU:
633 case LoongArch::MUL_D:
634 case LoongArch::SUB_D:
635 if (hasAllWUsers(*MI, ST, MRI)) {
636 FixableDef.insert(MI);
637 break;
638 }
639 return false;
640 }
641 }
642
643 // If we get here, then every node we visited produces a sign extended value
644 // or propagated sign extended values. So the result must be sign extended.
645 return true;
646}
647
648static unsigned getWOp(unsigned Opcode) {
649 switch (Opcode) {
650 case LoongArch::ADDI_D:
651 return LoongArch::ADDI_W;
652 case LoongArch::ADD_D:
653 return LoongArch::ADD_W;
654 case LoongArch::LD_D:
655 case LoongArch::LD_WU:
656 return LoongArch::LD_W;
657 case LoongArch::MUL_D:
658 return LoongArch::MUL_W;
659 case LoongArch::SLLI_D:
660 return LoongArch::SLLI_W;
661 case LoongArch::SUB_D:
662 return LoongArch::SUB_W;
663 default:
664 llvm_unreachable("Unexpected opcode for replacement with W variant");
665 }
666}
667
668bool LoongArchOptWInstrs::removeSExtWInstrs(MachineFunction &MF,
669 const LoongArchInstrInfo &TII,
670 const LoongArchSubtarget &ST,
673 return false;
674
675 bool MadeChange = false;
676 for (MachineBasicBlock &MBB : MF) {
678 // We're looking for the sext.w pattern ADDI.W rd, rs, 0.
680 continue;
681
682 Register SrcReg = MI.getOperand(1).getReg();
683
685
686 // If all users only use the lower bits, this sext.w is redundant.
687 // Or if all definitions reaching MI sign-extend their output,
688 // then sext.w is redundant.
689 if (!hasAllWUsers(MI, ST, MRI) &&
690 !isSignExtendedW(SrcReg, ST, MRI, FixableDefs))
691 continue;
692
693 Register DstReg = MI.getOperand(0).getReg();
694 if (!MRI.constrainRegClass(SrcReg, MRI.getRegClass(DstReg)))
695 continue;
696
697 // Convert Fixable instructions to their W versions.
698 for (MachineInstr *Fixable : FixableDefs) {
699 LLVM_DEBUG(dbgs() << "Replacing " << *Fixable);
700 Fixable->setDesc(TII.get(getWOp(Fixable->getOpcode())));
701 Fixable->clearFlag(MachineInstr::MIFlag::NoSWrap);
702 Fixable->clearFlag(MachineInstr::MIFlag::NoUWrap);
703 Fixable->clearFlag(MachineInstr::MIFlag::IsExact);
704 LLVM_DEBUG(dbgs() << " with " << *Fixable);
705 ++NumTransformedToWInstrs;
706 }
707
708 LLVM_DEBUG(dbgs() << "Removing redundant sign-extension\n");
709 MRI.replaceRegWith(DstReg, SrcReg);
710 MRI.clearKillFlags(SrcReg);
711 MI.eraseFromParent();
712 ++NumRemovedSExtW;
713 MadeChange = true;
714 }
715 }
716
717 return MadeChange;
718}
719
720bool LoongArchOptWInstrs::convertToDSuffixes(MachineFunction &MF,
721 const LoongArchInstrInfo &TII,
722 const LoongArchSubtarget &ST,
724 bool MadeChange = false;
725 for (MachineBasicBlock &MBB : MF) {
726 for (MachineInstr &MI : MBB) {
727 unsigned Opc;
728 switch (MI.getOpcode()) {
729 default:
730 continue;
731 case LoongArch::ADDI_W:
732 Opc = LoongArch::ADDI_D;
733 break;
734 }
735
736 if (hasAllWUsers(MI, ST, MRI)) {
737 MI.setDesc(TII.get(Opc));
738 MadeChange = true;
739 }
740 }
741 }
742
743 return MadeChange;
744}
745
746bool LoongArchOptWInstrs::convertToWSuffixes(MachineFunction &MF,
747 const LoongArchInstrInfo &TII,
748 const LoongArchSubtarget &ST,
750 bool MadeChange = false;
751 for (MachineBasicBlock &MBB : MF) {
752 for (MachineInstr &MI : MBB) {
753 unsigned WOpc;
754 // TODO: Add more?
755 switch (MI.getOpcode()) {
756 default:
757 continue;
758 case LoongArch::ADD_D:
759 WOpc = LoongArch::ADD_W;
760 break;
761 case LoongArch::ADDI_D:
762 WOpc = LoongArch::ADDI_W;
763 break;
764 case LoongArch::SUB_D:
765 WOpc = LoongArch::SUB_W;
766 break;
767 case LoongArch::MUL_D:
768 WOpc = LoongArch::MUL_W;
769 break;
770 case LoongArch::SLLI_D:
771 // SLLI.W reads the lowest 5 bits, while SLLI.D reads lowest 6 bits
772 if (MI.getOperand(2).getImm() >= 32)
773 continue;
774 WOpc = LoongArch::SLLI_W;
775 break;
776 case LoongArch::LD_D:
777 case LoongArch::LD_WU:
778 WOpc = LoongArch::LD_W;
779 break;
780 }
781
782 if (hasAllWUsers(MI, ST, MRI)) {
783 LLVM_DEBUG(dbgs() << "Replacing " << MI);
784 MI.setDesc(TII.get(WOpc));
785 MI.clearFlag(MachineInstr::MIFlag::NoSWrap);
786 MI.clearFlag(MachineInstr::MIFlag::NoUWrap);
787 MI.clearFlag(MachineInstr::MIFlag::IsExact);
788 LLVM_DEBUG(dbgs() << " with " << MI);
789 ++NumTransformedToWInstrs;
790 MadeChange = true;
791 }
792 }
793 }
794
795 return MadeChange;
796}
797
798bool LoongArchOptWInstrs::runOnMachineFunction(MachineFunction &MF) {
799 if (skipFunction(MF.getFunction()))
800 return false;
801
804 const LoongArchInstrInfo &TII = *ST.getInstrInfo();
805
806 if (!ST.is64Bit())
807 return false;
808
809 bool MadeChange = false;
810 MadeChange |= removeSExtWInstrs(MF, TII, ST, MRI);
811
812 if (!(DisableCvtToDSuffix || ST.preferWInst()))
813 MadeChange |= convertToDSuffixes(MF, TII, ST, MRI);
814
815 if (ST.preferWInst())
816 MadeChange |= convertToWSuffixes(MF, TII, ST, MRI);
817
818 return MadeChange;
819}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< StatepointGC > D("statepoint-example", "an example strategy for statepoint")
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
static bool isSignExtendingOpW(const MachineInstr &MI, const MachineRegisterInfo &MRI, unsigned OpNo)
static cl::opt< bool > DisableSExtWRemoval("loongarch-disable-sextw-removal", cl::desc("Disable removal of sign-extend insn"), cl::init(false), cl::Hidden)
static bool hasAllWUsers(const MachineInstr &OrigMI, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI)
static bool isSignExtendedW(Register SrcReg, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI, SmallPtrSetImpl< MachineInstr * > &FixableDef)
#define LOONGARCH_OPT_W_INSTRS_NAME
static bool hasAllNBitUsers(const MachineInstr &OrigMI, const LoongArchSubtarget &ST, const MachineRegisterInfo &MRI, unsigned OrigBits)
#define DEBUG_TYPE
static unsigned getWOp(unsigned Opcode)
static cl::opt< bool > DisableCvtToDSuffix("loongarch-disable-cvt-to-d-suffix", cl::desc("Disable convert to D suffix"), cl::init(false), cl::Hidden)
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
#define P(N)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the SmallSet class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
LoongArchMachineFunctionInfo - This class is derived from MachineFunctionInfo and contains private Lo...
instr_iterator instr_begin()
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
const MachineBasicBlock & front() const
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:346
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:950
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isLiveIn(Register Reg) const
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:346
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:367
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:502
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:135
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:179
bool empty() const
Definition: SmallVector.h:94
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
bool isSEXT_W(const MachineInstr &MI)
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:443
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
int bit_width(T Value)
Returns the number of bits needed to represent Value if Value is nonzero.
Definition: bit.h:317
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:340
FunctionPass * createLoongArchOptWInstrsPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191