LLVM 22.0.0git
LoongArchMergeBaseOffset.cpp
Go to the documentation of this file.
1//===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Merge the offset of address calculation into the offset field
10// of instructions in a global address lowering sequence.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArch.h"
17#include "llvm/CodeGen/Passes.h"
19#include "llvm/Support/Debug.h"
21#include <optional>
22
23using namespace llvm;
24
25#define DEBUG_TYPE "loongarch-merge-base-offset"
26#define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
27
28namespace {
29
30class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
31 const LoongArchSubtarget *ST = nullptr;
33
34public:
35 static char ID;
36 bool runOnMachineFunction(MachineFunction &Fn) override;
37 bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
38 MachineInstr *&Lo20, MachineInstr *&Hi12,
40 bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Add,
41 MachineInstr *&Lo12);
42
43 bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
44 MachineInstr *&Lo20, MachineInstr *&Hi12,
46 void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
48 int64_t Offset);
49 bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
50 MachineInstr *&Lo20, MachineInstr *&Hi12,
51 MachineInstr *&Last, MachineInstr &TailAdd,
52 Register GAReg);
53
54 bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
55 MachineInstr *&Lo20, MachineInstr *&Hi12,
57
58 LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
59
60 MachineFunctionProperties getRequiredProperties() const override {
61 return MachineFunctionProperties().setIsSSA();
62 }
63
64 void getAnalysisUsage(AnalysisUsage &AU) const override {
65 AU.setPreservesCFG();
67 }
68
69 StringRef getPassName() const override {
71 }
72};
73} // end anonymous namespace
74
75char LoongArchMergeBaseOffsetOpt::ID = 0;
76INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
78
79// Detect either of the patterns:
80//
81// 1. (small/medium):
82// pcalau12i vreg1, %pc_hi20(s)
83// addi.d vreg2, vreg1, %pc_lo12(s)
84//
85// 2. (large):
86// pcalau12i vreg1, %pc_hi20(s)
87// addi.d vreg2, $zero, %pc_lo12(s)
88// lu32i.d vreg3, vreg2, %pc64_lo20(s)
89// lu52i.d vreg4, vreg3, %pc64_hi12(s)
90// add.d vreg5, vreg4, vreg1
91
92// The pattern is only accepted if:
93// 1) For small and medium pattern, the first instruction has only one use,
94// which is the ADDI.
95// 2) For large pattern, the first four instructions each have only one use,
96// and the user of the fourth instruction is ADD.
97// 3) The address operands have the appropriate type, reflecting the
98// lowering of a global address or constant pool using the pattern.
99// 4) The offset value in the Global Address or Constant Pool is 0.
100bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
101 MachineInstr *&Lo12,
102 MachineInstr *&Lo20,
103 MachineInstr *&Hi12,
104 MachineInstr *&Last) {
105 if (Hi20.getOpcode() != LoongArch::PCALAU12I)
106 return false;
107
108 const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
110 return false;
111
112 auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
113 return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
114 };
115
116 if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
117 return false;
118
119 Register HiDestReg = Hi20.getOperand(0).getReg();
120 if (!MRI->hasOneUse(HiDestReg))
121 return false;
122
123 MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
124 if (UseInst->getOpcode() != LoongArch::ADD_D) {
125 Lo12 = UseInst;
126 if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
127 (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
128 return false;
129 } else {
130 assert(ST->is64Bit());
131 Last = UseInst;
132
133 Register LastOp1Reg = Last->getOperand(1).getReg();
134 if (!LastOp1Reg.isVirtual())
135 return false;
136 Hi12 = MRI->getVRegDef(LastOp1Reg);
137 const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
138 if (Hi12Op2.getTargetFlags() != LoongArchII::MO_PCREL64_HI)
139 return false;
140 if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
141 return false;
142 if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
143 return false;
144
145 Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
146 const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
147 if (Lo20Op2.getTargetFlags() != LoongArchII::MO_PCREL64_LO)
148 return false;
149 if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
150 return false;
151 if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
152 return false;
153
154 Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
155 if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
156 return false;
157 }
158
159 const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
160 assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
162 !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
163 Lo12Op2.getOffset() != 0)
164 return false;
165
166 if (Hi20Op1.isGlobal()) {
167 LLVM_DEBUG(dbgs() << " Found lowered global address: "
168 << *Hi20Op1.getGlobal() << "\n");
169 } else if (Hi20Op1.isBlockAddress()) {
170 LLVM_DEBUG(dbgs() << " Found lowered basic address: "
171 << *Hi20Op1.getBlockAddress() << "\n");
172 } else if (Hi20Op1.isCPI()) {
173 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
174 << "\n");
175 }
176
177 return true;
178}
179
180// Detect the pattern:
181//
182// (small/medium):
183// lu12i.w vreg1, %le_hi20_r(s)
184// add.w/d vreg2, vreg1, r2, %le_add_r(s)
185// addi.w/d vreg3, vreg2, %le_lo12_r(s)
186
187// The pattern is only accepted if:
188// 1) The first instruction has only one use, which is the PseudoAddTPRel.
189// The second instruction has only one use, which is the ADDI. The
190// second instruction's last operand is the tp register.
191// 2) The address operands have the appropriate type, reflecting the
192// lowering of a thread_local global address using the pattern.
193// 3) The offset value in the ThreadLocal Global Address is 0.
194bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
196 MachineInstr *&Lo12) {
197 if (Hi20.getOpcode() != LoongArch::LU12I_W)
198 return false;
199
200 auto isGlobalOrCPI = [](const MachineOperand &Op) {
201 return Op.isGlobal() || Op.isCPI();
202 };
203
204 const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
206 !isGlobalOrCPI(Hi20Op1) || Hi20Op1.getOffset() != 0)
207 return false;
208
209 Register HiDestReg = Hi20.getOperand(0).getReg();
210 if (!MRI->hasOneUse(HiDestReg))
211 return false;
212
213 Add = &*MRI->use_instr_begin(HiDestReg);
214 if ((ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_D) ||
215 (!ST->is64Bit() && Add->getOpcode() != LoongArch::PseudoAddTPRel_W))
216 return false;
217
218 if (Add->getOperand(2).getReg() != LoongArch::R2)
219 return false;
220
221 const MachineOperand &AddOp3 = Add->getOperand(3);
223 !(isGlobalOrCPI(AddOp3) || AddOp3.isMCSymbol()) ||
224 AddOp3.getOffset() != 0)
225 return false;
226
227 Register AddDestReg = Add->getOperand(0).getReg();
228 if (!MRI->hasOneUse(AddDestReg))
229 return false;
230
231 Lo12 = &*MRI->use_instr_begin(AddDestReg);
232 if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
233 (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
234 return false;
235
236 const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
238 !(isGlobalOrCPI(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
239 Lo12Op2.getOffset() != 0)
240 return false;
241
242 if (Hi20Op1.isGlobal()) {
243 LLVM_DEBUG(dbgs() << " Found lowered global address: "
244 << *Hi20Op1.getGlobal() << "\n");
245 } else if (Hi20Op1.isCPI()) {
246 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
247 << "\n");
248 }
249
250 return true;
251}
252
253// Update the offset in Hi20, (Add), Lo12, (Lo20 and Hi12) instructions.
254// Delete the tail instruction and update all the uses to use the
255// output from Last.
256void LoongArchMergeBaseOffsetOpt::foldOffset(
257 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
258 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &Tail,
259 int64_t Offset) {
260 // Put the offset back in Hi and the Lo
261 Hi20.getOperand(1).setOffset(Offset);
262 Lo12.getOperand(2).setOffset(Offset);
263 if (Lo20 && Hi12) {
264 Lo20->getOperand(2).setOffset(Offset);
265 Hi12->getOperand(2).setOffset(Offset);
266 }
267
268 // For tls-le, offset of the second PseudoAddTPRel instr should also be
269 // updated.
270 MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
271 if (Hi20.getOpcode() == LoongArch::LU12I_W)
272 Add->getOperand(3).setOffset(Offset);
273
274 // Delete the tail instruction.
275 MachineInstr *Def = Last ? Last : &Lo12;
276 MRI->constrainRegClass(Def->getOperand(0).getReg(),
277 MRI->getRegClass(Tail.getOperand(0).getReg()));
278 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
279 Tail.eraseFromParent();
280
281 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
282 << " " << Hi20;);
283 if (Hi20.getOpcode() == LoongArch::LU12I_W) {
284 LLVM_DEBUG(dbgs() << " " << *Add;);
285 }
286 LLVM_DEBUG(dbgs() << " " << Lo12;);
287 if (Lo20 && Hi12) {
288 LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
289 }
290}
291
292// Detect patterns for large offsets that are passed into an ADD instruction.
293// If the pattern is found, updates the offset in Hi20, (Add), Lo12,
294// (Lo20 and Hi12) instructions and deletes TailAdd and the instructions that
295// produced the offset.
296//
297// (The instructions marked with "!" are not necessarily present)
298//
299// Base address lowering is of the form:
300// 1) pcala:
301// Hi20: pcalau12i vreg1, %pc_hi20(s)
302// +--- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
303// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
304// +--- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
305// |
306// | 2) tls-le:
307// | Hi20: lu12i.w vreg1, %le_hi20_r(s)
308// | Add: add.w/d vreg1, vreg1, r2, %le_add_r(s)
309// +--- Lo12: addi.w/d vreg2, vreg1, %le_lo12_r(s)
310// |
311// | The large offset can be one of the forms:
312// |
313// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
314// | OffsetHi20: lu12i.w vreg3, 4
315// | OffsetLo12: ori voff, vreg3, 188 ------------------+
316// | |
317// +-> 2) Offset that has non zero bits in Hi20 bits only: |
318// | OffsetHi20: lu12i.w voff, 128 ------------------+
319// | |
320// +-> 3) Offset that has non zero bits in Lo20 bits: |
321// | OffsetHi20: lu12i.w vreg3, 121 ! |
322// | OffsetLo12: ori voff, vreg3, 122 ! |
323// | OffsetLo20: lu32i.d voff, 123 ------------------+
324// +-> 4) Offset that has non zero bits in Hi12 bits: |
325// OffsetHi20: lu12i.w vreg3, 121 ! |
326// OffsetLo12: ori voff, vreg3, 122 ! |
327// OffsetLo20: lu32i.d vreg3, 123 ! |
328// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
329// |
330// TailAdd: add.d vreg4, vreg2, voff <------------------+
331//
332bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
333 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
334 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
335 Register GAReg) {
336 assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
337 TailAdd.getOpcode() == LoongArch::ADD_D) &&
338 "Expected ADD instruction!");
339 Register Rs = TailAdd.getOperand(1).getReg();
340 Register Rt = TailAdd.getOperand(2).getReg();
341 Register Reg = Rs == GAReg ? Rt : Rs;
342 SmallVector<MachineInstr *, 4> Instrs;
343 int64_t Offset = 0;
344 int64_t Mask = -1;
345
346 // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
347 for (int i = 0; i < 4; i++) {
348 // Handle Reg is R0.
349 if (Reg == LoongArch::R0)
350 break;
351
352 // Can't fold if the register has more than one use.
353 if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
354 return false;
355
356 MachineInstr *Curr = MRI->getVRegDef(Reg);
357 if (!Curr)
358 break;
359
360 switch (Curr->getOpcode()) {
361 default:
362 // Can't fold if the instruction opcode is unexpected.
363 return false;
364 case LoongArch::ORI: {
365 MachineOperand ImmOp = Curr->getOperand(2);
367 return false;
368 Offset += ImmOp.getImm();
369 Reg = Curr->getOperand(1).getReg();
370 Instrs.push_back(Curr);
371 break;
372 }
373 case LoongArch::LU12I_W: {
374 MachineOperand ImmOp = Curr->getOperand(1);
376 return false;
377 Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
378 Reg = LoongArch::R0;
379 Instrs.push_back(Curr);
380 break;
381 }
382 case LoongArch::LU32I_D: {
383 MachineOperand ImmOp = Curr->getOperand(2);
384 if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
385 return false;
386 Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
387 Mask ^= 0x000FFFFF00000000ULL;
388 Reg = Curr->getOperand(1).getReg();
389 Instrs.push_back(Curr);
390 break;
391 }
392 case LoongArch::LU52I_D: {
393 MachineOperand ImmOp = Curr->getOperand(2);
394 if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
395 return false;
396 Offset += ImmOp.getImm() << 52;
397 Mask ^= 0xFFF0000000000000ULL;
398 Reg = Curr->getOperand(1).getReg();
399 Instrs.push_back(Curr);
400 break;
401 }
402 }
403 }
404
405 // Can't fold if the offset is not extracted.
406 if (!Offset)
407 return false;
408
409 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
410 LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
411 for (auto I : Instrs) {
412 LLVM_DEBUG(dbgs() << " " << *I);
413 I->eraseFromParent();
414 }
415
416 return true;
417}
418
419bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
420 MachineInstr &Lo12,
421 MachineInstr *&Lo20,
422 MachineInstr *&Hi12,
423 MachineInstr *&Last) {
424 Register DestReg =
425 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
426
427 // Look for arithmetic instructions we can get an offset from.
428 // We might be able to remove the arithmetic instructions by folding the
429 // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I) or
430 // LU12I_W+PseudoAddTPRel+ADDI.
431 if (!MRI->hasOneUse(DestReg))
432 return false;
433
434 // DestReg has only one use.
435 MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
436 switch (Tail.getOpcode()) {
437 default:
438 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
439 << Tail);
440 break;
441 case LoongArch::ADDI_W:
442 if (ST->is64Bit())
443 return false;
444 [[fallthrough]];
445 case LoongArch::ADDI_D:
446 case LoongArch::ADDU16I_D: {
447 // Offset is simply an immediate operand.
448 int64_t Offset = Tail.getOperand(2).getImm();
449 if (Tail.getOpcode() == LoongArch::ADDU16I_D)
451
452 // We might have two ADDIs in a row.
453 Register TailDestReg = Tail.getOperand(0).getReg();
454 if (MRI->hasOneUse(TailDestReg)) {
455 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
456 if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
457 return false;
458 if (TailTail.getOpcode() == LoongArch::ADDI_W ||
459 TailTail.getOpcode() == LoongArch::ADDI_D) {
460 Offset += TailTail.getOperand(2).getImm();
461 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
462 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
463 Tail.eraseFromParent();
464 return true;
465 }
466 }
467
468 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
469 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
470 return true;
471 }
472 case LoongArch::ADD_W:
473 if (ST->is64Bit())
474 return false;
475 [[fallthrough]];
476 case LoongArch::ADD_D:
477 // The offset is too large to fit in the immediate field of ADDI.
478 return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
479 break;
480 }
481
482 return false;
483}
484
485// Memory access opcode mapping for transforms.
486static unsigned getNewOpc(unsigned Op, bool isLarge) {
487 switch (Op) {
488 case LoongArch::LD_B:
489 return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
490 case LoongArch::LD_H:
491 return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
492 case LoongArch::LD_W:
493 case LoongArch::LDPTR_W:
494 return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
495 case LoongArch::LD_D:
496 case LoongArch::LDPTR_D:
497 return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
498 case LoongArch::LD_BU:
499 return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
500 case LoongArch::LD_HU:
501 return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
502 case LoongArch::LD_WU:
503 return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
504 case LoongArch::FLD_S:
505 return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
506 case LoongArch::FLD_D:
507 return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
508 case LoongArch::VLD:
509 return isLarge ? LoongArch::VLDX : LoongArch::VLD;
510 case LoongArch::XVLD:
511 return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
512 case LoongArch::VLDREPL_B:
513 return LoongArch::VLDREPL_B;
514 case LoongArch::XVLDREPL_B:
515 return LoongArch::XVLDREPL_B;
516 case LoongArch::ST_B:
517 return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
518 case LoongArch::ST_H:
519 return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
520 case LoongArch::ST_W:
521 case LoongArch::STPTR_W:
522 return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
523 case LoongArch::ST_D:
524 case LoongArch::STPTR_D:
525 return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
526 case LoongArch::FST_S:
527 return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
528 case LoongArch::FST_D:
529 return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
530 case LoongArch::VST:
531 return isLarge ? LoongArch::VSTX : LoongArch::VST;
532 case LoongArch::XVST:
533 return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
534 default:
535 llvm_unreachable("Unexpected opcode for replacement");
536 }
537}
538
539bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
540 MachineInstr &Lo12,
541 MachineInstr *&Lo20,
542 MachineInstr *&Hi12,
543 MachineInstr *&Last) {
544 Register DestReg =
545 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
546
547 // If all the uses are memory ops with the same offset, we can transform:
548 //
549 // 1. (small/medium):
550 // 1.1. pcala
551 // pcalau12i vreg1, %pc_hi20(s)
552 // addi.d vreg2, vreg1, %pc_lo12(s)
553 // ld.w vreg3, 8(vreg2)
554 //
555 // =>
556 //
557 // pcalau12i vreg1, %pc_hi20(s+8)
558 // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
559 //
560 // 1.2. tls-le
561 // lu12i.w vreg1, %le_hi20_r(s)
562 // add.w/d vreg2, vreg1, r2, %le_add_r(s)
563 // addi.w/d vreg3, vreg2, %le_lo12_r(s)
564 // ld.w vreg4, 8(vreg3)
565 //
566 // =>
567 //
568 // lu12i.w vreg1, %le_hi20_r(s+8)
569 // add.w/d vreg2, vreg1, r2, %le_add_r(s+8)
570 // ld.w vreg4, vreg2, %le_lo12_r(s+8)(vreg2)
571 //
572 // 2. (large):
573 // pcalau12i vreg1, %pc_hi20(s)
574 // addi.d vreg2, $zero, %pc_lo12(s)
575 // lu32i.d vreg3, vreg2, %pc64_lo20(s)
576 // lu52i.d vreg4, vreg3, %pc64_hi12(s)
577 // add.d vreg5, vreg4, vreg1
578 // ld.w vreg6, 8(vreg5)
579 //
580 // =>
581 //
582 // pcalau12i vreg1, %pc_hi20(s+8)
583 // addi.d vreg2, $zero, %pc_lo12(s+8)
584 // lu32i.d vreg3, vreg2, %pc64_lo20(s+8)
585 // lu52i.d vreg4, vreg3, %pc64_hi12(s+8)
586 // ldx.w vreg6, vreg4, vreg1
587
588 std::optional<int64_t> CommonOffset;
589 DenseMap<const MachineInstr *, SmallVector<unsigned>>
590 InlineAsmMemoryOpIndexesMap;
591 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
592 switch (UseMI.getOpcode()) {
593 default:
594 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
595 return false;
596 case LoongArch::VLDREPL_B:
597 case LoongArch::XVLDREPL_B:
598 // We can't do this for large pattern.
599 if (Last)
600 return false;
601 [[fallthrough]];
602 case LoongArch::LD_B:
603 case LoongArch::LD_H:
604 case LoongArch::LD_W:
605 case LoongArch::LD_D:
606 case LoongArch::LD_BU:
607 case LoongArch::LD_HU:
608 case LoongArch::LD_WU:
609 case LoongArch::LDPTR_W:
610 case LoongArch::LDPTR_D:
611 case LoongArch::FLD_S:
612 case LoongArch::FLD_D:
613 case LoongArch::VLD:
614 case LoongArch::XVLD:
615 case LoongArch::ST_B:
616 case LoongArch::ST_H:
617 case LoongArch::ST_W:
618 case LoongArch::ST_D:
619 case LoongArch::STPTR_W:
620 case LoongArch::STPTR_D:
621 case LoongArch::FST_S:
622 case LoongArch::FST_D:
623 case LoongArch::VST:
624 case LoongArch::XVST: {
625 if (UseMI.getOperand(1).isFI())
626 return false;
627 // Register defined by Lo should not be the value register.
628 if (DestReg == UseMI.getOperand(0).getReg())
629 return false;
630 assert(DestReg == UseMI.getOperand(1).getReg() &&
631 "Expected base address use");
632 // All load/store instructions must use the same offset.
633 int64_t Offset = UseMI.getOperand(2).getImm();
634 if (CommonOffset && Offset != CommonOffset)
635 return false;
636 CommonOffset = Offset;
637 break;
638 }
639 case LoongArch::INLINEASM:
640 case LoongArch::INLINEASM_BR: {
641 // We can't do this for large pattern.
642 if (Last)
643 return false;
644 SmallVector<unsigned> InlineAsmMemoryOpIndexes;
645 unsigned NumOps = 0;
646 for (unsigned I = InlineAsm::MIOp_FirstOperand;
647 I < UseMI.getNumOperands(); I += 1 + NumOps) {
648 const MachineOperand &FlagsMO = UseMI.getOperand(I);
649 // Should be an imm.
650 if (!FlagsMO.isImm())
651 continue;
652
653 const InlineAsm::Flag Flags(FlagsMO.getImm());
654 NumOps = Flags.getNumOperandRegisters();
655
656 // Memory constraints have two operands.
657 if (NumOps != 2 || !Flags.isMemKind()) {
658 // If the register is used by something other than a memory contraint,
659 // we should not fold.
660 for (unsigned J = 0; J < NumOps; ++J) {
661 const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
662 if (MO.isReg() && MO.getReg() == DestReg)
663 return false;
664 }
665 continue;
666 }
667
668 // We can only do this for constraint m.
669 if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m)
670 return false;
671
672 const MachineOperand &AddrMO = UseMI.getOperand(I + 1);
673 if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
674 continue;
675
676 const MachineOperand &OffsetMO = UseMI.getOperand(I + 2);
677 if (!OffsetMO.isImm())
678 continue;
679
680 // All inline asm memory operands must use the same offset.
681 int64_t Offset = OffsetMO.getImm();
682 if (CommonOffset && Offset != CommonOffset)
683 return false;
684 CommonOffset = Offset;
685 InlineAsmMemoryOpIndexes.push_back(I + 1);
686 }
687 InlineAsmMemoryOpIndexesMap.insert(
688 std::make_pair(&UseMI, InlineAsmMemoryOpIndexes));
689 break;
690 }
691 }
692 }
693
694 // We found a common offset.
695 // Update the offsets in global address lowering.
696 // We may have already folded some arithmetic so we need to add to any
697 // existing offset.
698 int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset;
699 // LA32 ignores the upper 32 bits.
700 if (!ST->is64Bit())
701 NewOffset = SignExtend64<32>(NewOffset);
702 // We can only fold simm32 offsets.
703 if (!isInt<32>(NewOffset))
704 return false;
705
706 // If optimized by this pass successfully, MO_RELAX bitmask target-flag should
707 // be removed from the pcala code sequence. Code sequence of tls-le can still
708 // be relaxed after being optimized.
709 //
710 // For example:
711 // pcalau12i $a0, %pc_hi20(symbol)
712 // addi.d $a0, $a0, %pc_lo12(symbol)
713 // ld.w $a0, $a0, 0
714 //
715 // =>
716 //
717 // pcalau12i $a0, %pc_hi20(symbol)
718 // ld.w $a0, $a0, %pc_lo12(symbol)
719 //
720 // Code sequence optimized before can be relax by linker. But after being
721 // optimized, it cannot be relaxed any more. So MO_RELAX flag should not be
722 // carried by them.
723 Hi20.getOperand(1).setOffset(NewOffset);
724 MachineOperand &ImmOp = Lo12.getOperand(2);
725 ImmOp.setOffset(NewOffset);
726 if (Lo20 && Hi12) {
727 Lo20->getOperand(2).setOffset(NewOffset);
728 Hi12->getOperand(2).setOffset(NewOffset);
729 }
730 if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
734 } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
735 MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
736 Add->getOperand(3).setOffset(NewOffset);
737 }
738
739 // Update the immediate in the load/store instructions to add the offset.
740 const LoongArchInstrInfo &TII = *ST->getInstrInfo();
741 for (MachineInstr &UseMI :
742 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
743 if (UseMI.getOpcode() == LoongArch::INLINEASM ||
744 UseMI.getOpcode() == LoongArch::INLINEASM_BR) {
745 auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
746 for (unsigned I : InlineAsmMemoryOpIndexes) {
747 MachineOperand &MO = UseMI.getOperand(I + 1);
748 switch (ImmOp.getType()) {
750 MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
752 break;
754 MO.ChangeToMCSymbol(ImmOp.getMCSymbol(),
756 MO.setOffset(ImmOp.getOffset());
757 break;
759 MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
761 break;
762 default:
763 report_fatal_error("unsupported machine operand type");
764 break;
765 }
766 }
767 } else {
768 UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last)));
769 if (Last) {
770 UseMI.removeOperand(2);
771 UseMI.removeOperand(1);
772 UseMI.addOperand(Last->getOperand(1));
773 UseMI.addOperand(Last->getOperand(2));
774 UseMI.getOperand(1).setIsKill(false);
775 UseMI.getOperand(2).setIsKill(false);
776 } else {
777 UseMI.removeOperand(2);
778 UseMI.addOperand(ImmOp);
779 }
780 }
781 }
782
783 if (Last) {
784 Last->eraseFromParent();
785 return true;
786 }
787
788 if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
789 MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
790 Hi20.getOperand(0).getReg());
791 } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
792 MachineInstr *Add = &*MRI->use_instr_begin(Hi20.getOperand(0).getReg());
793 MRI->replaceRegWith(Lo12.getOperand(0).getReg(),
794 Add->getOperand(0).getReg());
795 }
796 Lo12.eraseFromParent();
797 return true;
798}
799
800bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
801 if (skipFunction(Fn.getFunction()))
802 return false;
803
804 ST = &Fn.getSubtarget<LoongArchSubtarget>();
805
806 bool MadeChange = false;
807 MRI = &Fn.getRegInfo();
808 for (MachineBasicBlock &MBB : Fn) {
809 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
810 for (MachineInstr &Hi20 : MBB) {
811 MachineInstr *Lo12 = nullptr;
812 MachineInstr *Lo20 = nullptr;
813 MachineInstr *Hi12 = nullptr;
814 MachineInstr *Last = nullptr;
815 if (Hi20.getOpcode() == LoongArch::PCALAU12I) {
816 // Detect foldable pcala code sequence in small/medium/large code model.
817 if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
818 continue;
819 } else if (Hi20.getOpcode() == LoongArch::LU12I_W) {
820 MachineInstr *Add = nullptr;
821 // Detect foldable tls-le code sequence in small/medium code model.
822 if (!detectFoldable(Hi20, Add, Lo12))
823 continue;
824 } else {
825 continue;
826 }
827 // For tls-le, we do not pass the second PseudoAddTPRel instr in order to
828 // reuse the existing hooks and the last three paramaters should always be
829 // nullptr.
830 MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
831 MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
832 }
833 }
834
835 return MadeChange;
836}
837
838/// Returns an instance of the Merge Base Offset Optimization pass.
840 return new LoongArchMergeBaseOffsetOpt();
841}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
#define DEBUG_TYPE
const HexagonInstrInfo * TII
const size_t AbstractManglingParser< Derived, Alloc >::NumOps
static unsigned getNewOpc(unsigned Op, bool isLarge)
#define LoongArch_MERGE_BASE_OFFSET_NAME
#define I(x, y, z)
Definition MD5.cpp:58
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define LLVM_DEBUG(...)
Definition Debug.h:119
Represent the analysis usage information of a pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition DenseMap.h:214
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
const LoongArchInstrInfo * getInstrInfo() const override
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
Properties which a MachineFunction may have at a given point in time.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
LLVM_ABI void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags=0)
ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
LLVM_ABI void ChangeToBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
ChangeToBA - Replace this operand with a new block address operand.
const BlockAddress * getBlockAddress() const
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Register getReg() const
getReg - Returns the register number.
void setTargetFlags(unsigned F)
MCSymbol * getMCSymbol() const
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
@ MO_GlobalAddress
Address of a global value.
@ MO_BlockAddress
Address of a basic block.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
void push_back(const T &Elt)
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
static unsigned getDirectFlags(const MachineOperand &MO)
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:174
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:646
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
@ Add
Sum of integers.
DWARFExpression::Operation Op
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:577
FunctionPass * createLoongArchMergeBaseOffsetOptPass()
Returns an instance of the Merge Base Offset Optimization pass.