LLVM 20.0.0git
LoongArchMergeBaseOffset.cpp
Go to the documentation of this file.
1//===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Merge the offset of address calculation into the offset field
10// of instructions in a global address lowering sequence.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArch.h"
17#include "llvm/CodeGen/Passes.h"
19#include "llvm/Support/Debug.h"
21#include <optional>
22
23using namespace llvm;
24
25#define DEBUG_TYPE "loongarch-merge-base-offset"
26#define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
27
28namespace {
29
30class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
31 const LoongArchSubtarget *ST = nullptr;
33
34public:
35 static char ID;
36 bool runOnMachineFunction(MachineFunction &Fn) override;
37 bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
38 MachineInstr *&Lo20, MachineInstr *&Hi12,
40
41 bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
42 MachineInstr *&Lo20, MachineInstr *&Hi12,
44 void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
46 int64_t Offset);
47 bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
48 MachineInstr *&Lo20, MachineInstr *&Hi12,
49 MachineInstr *&Last, MachineInstr &TailAdd,
50 Register GAReg);
51
52 bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
53 MachineInstr *&Lo20, MachineInstr *&Hi12,
55
56 LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
57
60 MachineFunctionProperties::Property::IsSSA);
61 }
62
63 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 AU.setPreservesCFG();
66 }
67
68 StringRef getPassName() const override {
70 }
71};
72} // end anonymous namespace
73
74char LoongArchMergeBaseOffsetOpt::ID = 0;
75INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
77
78// Detect either of the patterns:
79//
80// 1. (small/medium):
81// pcalau12i vreg1, %pc_hi20(s)
82// addi.d vreg2, vreg1, %pc_lo12(s)
83//
84// 2. (large):
85// pcalau12i vreg1, %pc_hi20(s)
86// addi.d vreg2, $zero, %pc_lo12(s)
87// lu32i.d vreg3, vreg2, %pc64_lo20(s)
88// lu52i.d vreg4, vreg3, %pc64_hi12(s)
89// add.d vreg5, vreg4, vreg1
90
91// The pattern is only accepted if:
92// 1) For small and medium pattern, the first instruction has only one use,
93// which is the ADDI.
94// 2) For large pattern, the first four instructions each have only one use,
95// and the user of the fourth instruction is ADD.
96// 3) The address operands have the appropriate type, reflecting the
97// lowering of a global address or constant pool using the pattern.
98// 4) The offset value in the Global Address or Constant Pool is 0.
99bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
100 MachineInstr *&Lo12,
101 MachineInstr *&Lo20,
102 MachineInstr *&Hi12,
103 MachineInstr *&Last) {
104 if (Hi20.getOpcode() != LoongArch::PCALAU12I)
105 return false;
106
107 const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
109 return false;
110
111 auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
112 return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
113 };
114
115 if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
116 return false;
117
118 Register HiDestReg = Hi20.getOperand(0).getReg();
119 if (!MRI->hasOneUse(HiDestReg))
120 return false;
121
122 MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
123 if (UseInst->getOpcode() != LoongArch::ADD_D) {
124 Lo12 = UseInst;
125 if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
126 (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
127 return false;
128 } else {
129 assert(ST->is64Bit());
130 Last = UseInst;
131
132 Register LastOp1Reg = Last->getOperand(1).getReg();
133 if (!LastOp1Reg.isVirtual())
134 return false;
135 Hi12 = MRI->getVRegDef(LastOp1Reg);
136 const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
138 return false;
139 if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
140 return false;
141 if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
142 return false;
143
144 Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
145 const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
147 return false;
148 if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
149 return false;
150 if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
151 return false;
152
153 Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
154 if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
155 return false;
156 }
157
158 const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
159 assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
160 if (Lo12Op2.getTargetFlags() != LoongArchII::MO_PCREL_LO ||
161 !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
162 Lo12Op2.getOffset() != 0)
163 return false;
164
165 if (Hi20Op1.isGlobal()) {
166 LLVM_DEBUG(dbgs() << " Found lowered global address: "
167 << *Hi20Op1.getGlobal() << "\n");
168 } else if (Hi20Op1.isBlockAddress()) {
169 LLVM_DEBUG(dbgs() << " Found lowered basic address: "
170 << *Hi20Op1.getBlockAddress() << "\n");
171 } else if (Hi20Op1.isCPI()) {
172 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
173 << "\n");
174 }
175
176 return true;
177}
178
179// Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions.
180// Delete the tail instruction and update all the uses to use the
181// output from Last.
182void LoongArchMergeBaseOffsetOpt::foldOffset(
183 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
185 int64_t Offset) {
186 // Put the offset back in Hi and the Lo
187 Hi20.getOperand(1).setOffset(Offset);
188 Lo12.getOperand(2).setOffset(Offset);
189 if (Lo20 && Hi12) {
190 Lo20->getOperand(2).setOffset(Offset);
191 Hi12->getOperand(2).setOffset(Offset);
192 }
193 // Delete the tail instruction.
194 MachineInstr *Def = Last ? Last : &Lo12;
195 MRI->constrainRegClass(Def->getOperand(0).getReg(),
196 MRI->getRegClass(Tail.getOperand(0).getReg()));
197 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
198 Tail.eraseFromParent();
199 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
200 << " " << Hi20 << " " << Lo12;);
201 if (Lo20 && Hi12) {
202 LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
203 }
204}
205
206// Detect patterns for large offsets that are passed into an ADD instruction.
207// If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12
208// instructions and deletes TailAdd and the instructions that produced the
209// offset.
210//
211// (The instructions marked with "!" are not necessarily present)
212//
213// Base address lowering is of the form:
214// Hi20: pcalau12i vreg1, %pc_hi20(s)
215// +- Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
216// | Lo20: lu32i.d vreg2, %pc64_lo20(s) !
217// +- Hi12: lu52i.d vreg2, vreg2, %pc64_hi12(s) !
218// |
219// | The large offset can be one of the forms:
220// |
221// +-> 1) Offset that has non zero bits in Hi20 and Lo12 bits:
222// | OffsetHi20: lu12i.w vreg3, 4
223// | OffsetLo12: ori voff, vreg3, 188 ------------------+
224// | |
225// +-> 2) Offset that has non zero bits in Hi20 bits only: |
226// | OffsetHi20: lu12i.w voff, 128 ------------------+
227// | |
228// +-> 3) Offset that has non zero bits in Lo20 bits: |
229// | OffsetHi20: lu12i.w vreg3, 121 ! |
230// | OffsetLo12: ori voff, vreg3, 122 ! |
231// | OffsetLo20: lu32i.d voff, 123 ------------------+
232// +-> 4) Offset that has non zero bits in Hi12 bits: |
233// OffsetHi20: lu12i.w vreg3, 121 ! |
234// OffsetLo12: ori voff, vreg3, 122 ! |
235// OffsetLo20: lu32i.d vreg3, 123 ! |
236// OffsetHi12: lu52i.d voff, vrg3, 124 ------------------+
237// |
238// TailAdd: add.d vreg4, vreg2, voff <------------------+
239//
240bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
241 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
242 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
243 Register GAReg) {
244 assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
245 TailAdd.getOpcode() == LoongArch::ADD_D) &&
246 "Expected ADD instruction!");
247 Register Rs = TailAdd.getOperand(1).getReg();
248 Register Rt = TailAdd.getOperand(2).getReg();
249 Register Reg = Rs == GAReg ? Rt : Rs;
251 int64_t Offset = 0;
252 int64_t Mask = -1;
253
254 // This can point to one of [ORI, LU12I.W, LU32I.D, LU52I.D]:
255 for (int i = 0; i < 4; i++) {
256 // Handle Reg is R0.
257 if (Reg == LoongArch::R0)
258 break;
259
260 // Can't fold if the register has more than one use.
261 if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
262 return false;
263
264 MachineInstr *Curr = MRI->getVRegDef(Reg);
265 if (!Curr)
266 break;
267
268 switch (Curr->getOpcode()) {
269 default:
270 // Can't fold if the instruction opcode is unexpected.
271 return false;
272 case LoongArch::ORI: {
273 MachineOperand ImmOp = Curr->getOperand(2);
275 return false;
276 Offset += ImmOp.getImm();
277 Reg = Curr->getOperand(1).getReg();
278 Instrs.push_back(Curr);
279 break;
280 }
281 case LoongArch::LU12I_W: {
282 MachineOperand ImmOp = Curr->getOperand(1);
284 return false;
285 Offset += SignExtend64<32>(ImmOp.getImm() << 12) & Mask;
286 Reg = LoongArch::R0;
287 Instrs.push_back(Curr);
288 break;
289 }
290 case LoongArch::LU32I_D: {
291 MachineOperand ImmOp = Curr->getOperand(2);
292 if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Lo20)
293 return false;
294 Offset += SignExtend64<52>(ImmOp.getImm() << 32) & Mask;
295 Mask ^= 0x000FFFFF00000000ULL;
296 Reg = Curr->getOperand(1).getReg();
297 Instrs.push_back(Curr);
298 break;
299 }
300 case LoongArch::LU52I_D: {
301 MachineOperand ImmOp = Curr->getOperand(2);
302 if (ImmOp.getTargetFlags() != LoongArchII::MO_None || !Hi12)
303 return false;
304 Offset += ImmOp.getImm() << 52;
305 Mask ^= 0xFFF0000000000000ULL;
306 Reg = Curr->getOperand(1).getReg();
307 Instrs.push_back(Curr);
308 break;
309 }
310 }
311 }
312
313 // Can't fold if the offset is not extracted.
314 if (!Offset)
315 return false;
316
317 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
318 LLVM_DEBUG(dbgs() << " Offset Instrs:\n");
319 for (auto I : Instrs) {
320 LLVM_DEBUG(dbgs() << " " << *I);
321 I->eraseFromParent();
322 }
323
324 return true;
325}
326
327bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
328 MachineInstr &Lo12,
329 MachineInstr *&Lo20,
330 MachineInstr *&Hi12,
331 MachineInstr *&Last) {
332 Register DestReg =
333 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
334
335 // Look for arithmetic instructions we can get an offset from.
336 // We might be able to remove the arithmetic instructions by folding the
337 // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I).
338 if (!MRI->hasOneUse(DestReg))
339 return false;
340
341 // DestReg has only one use.
342 MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
343 switch (Tail.getOpcode()) {
344 default:
345 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
346 << Tail);
347 break;
348 case LoongArch::ADDI_W:
349 if (ST->is64Bit())
350 return false;
351 [[fallthrough]];
352 case LoongArch::ADDI_D:
353 case LoongArch::ADDU16I_D: {
354 // Offset is simply an immediate operand.
355 int64_t Offset = Tail.getOperand(2).getImm();
356 if (Tail.getOpcode() == LoongArch::ADDU16I_D)
357 Offset = SignExtend64<32>(Offset << 16);
358
359 // We might have two ADDIs in a row.
360 Register TailDestReg = Tail.getOperand(0).getReg();
361 if (MRI->hasOneUse(TailDestReg)) {
362 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
363 if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
364 return false;
365 if (TailTail.getOpcode() == LoongArch::ADDI_W ||
366 TailTail.getOpcode() == LoongArch::ADDI_D) {
367 Offset += TailTail.getOperand(2).getImm();
368 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
369 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
370 Tail.eraseFromParent();
371 return true;
372 }
373 }
374
375 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
376 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
377 return true;
378 }
379 case LoongArch::ADD_W:
380 if (ST->is64Bit())
381 return false;
382 [[fallthrough]];
383 case LoongArch::ADD_D:
384 // The offset is too large to fit in the immediate field of ADDI.
385 return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
386 break;
387 }
388
389 return false;
390}
391
392// Memory access opcode mapping for transforms.
393static unsigned getNewOpc(unsigned Op, bool isLarge) {
394 switch (Op) {
395 case LoongArch::LD_B:
396 return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
397 case LoongArch::LD_H:
398 return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
399 case LoongArch::LD_W:
400 case LoongArch::LDPTR_W:
401 return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
402 case LoongArch::LD_D:
403 case LoongArch::LDPTR_D:
404 return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
405 case LoongArch::LD_BU:
406 return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
407 case LoongArch::LD_HU:
408 return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
409 case LoongArch::LD_WU:
410 return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
411 case LoongArch::FLD_S:
412 return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
413 case LoongArch::FLD_D:
414 return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
415 case LoongArch::VLD:
416 return isLarge ? LoongArch::VLDX : LoongArch::VLD;
417 case LoongArch::XVLD:
418 return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
419 case LoongArch::VLDREPL_B:
420 return LoongArch::VLDREPL_B;
421 case LoongArch::XVLDREPL_B:
422 return LoongArch::XVLDREPL_B;
423 case LoongArch::ST_B:
424 return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
425 case LoongArch::ST_H:
426 return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
427 case LoongArch::ST_W:
428 case LoongArch::STPTR_W:
429 return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
430 case LoongArch::ST_D:
431 case LoongArch::STPTR_D:
432 return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
433 case LoongArch::FST_S:
434 return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
435 case LoongArch::FST_D:
436 return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
437 case LoongArch::VST:
438 return isLarge ? LoongArch::VSTX : LoongArch::VST;
439 case LoongArch::XVST:
440 return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
441 default:
442 llvm_unreachable("Unexpected opcode for replacement");
443 }
444}
445
446bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
447 MachineInstr &Lo12,
448 MachineInstr *&Lo20,
449 MachineInstr *&Hi12,
450 MachineInstr *&Last) {
451 Register DestReg =
452 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
453
454 // If all the uses are memory ops with the same offset, we can transform:
455 //
456 // 1. (small/medium):
457 // pcalau12i vreg1, %pc_hi20(s)
458 // addi.d vreg2, vreg1, %pc_lo12(s)
459 // ld.w vreg3, 8(vreg2)
460 //
461 // =>
462 //
463 // pcalau12i vreg1, %pc_hi20(s+8)
464 // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
465 //
466 // 2. (large):
467 // pcalau12i vreg1, %pc_hi20(s)
468 // addi.d vreg2, $zero, %pc_lo12(s)
469 // lu32i.d vreg3, vreg2, %pc64_lo20(s)
470 // lu52i.d vreg4, vreg3, %pc64_hi12(s)
471 // add.d vreg5, vreg4, vreg1
472 // ld.w vreg6, 8(vreg5)
473 //
474 // =>
475 //
476 // pcalau12i vreg1, %pc_hi20(s+8)
477 // addi.d vreg2, $zero, %pc_lo12(s+8)
478 // lu32i.d vreg3, vreg2, %pc64_lo20(s+8)
479 // lu52i.d vreg4, vreg3, %pc64_hi12(s+8)
480 // ldx.w vreg6, vreg4, vreg1
481
482 std::optional<int64_t> CommonOffset;
484 InlineAsmMemoryOpIndexesMap;
485 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
486 switch (UseMI.getOpcode()) {
487 default:
488 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
489 return false;
490 case LoongArch::VLDREPL_B:
491 case LoongArch::XVLDREPL_B:
492 // We can't do this for large pattern.
493 if (Last)
494 return false;
495 [[fallthrough]];
496 case LoongArch::LD_B:
497 case LoongArch::LD_H:
498 case LoongArch::LD_W:
499 case LoongArch::LD_D:
500 case LoongArch::LD_BU:
501 case LoongArch::LD_HU:
502 case LoongArch::LD_WU:
503 case LoongArch::LDPTR_W:
504 case LoongArch::LDPTR_D:
505 case LoongArch::FLD_S:
506 case LoongArch::FLD_D:
507 case LoongArch::VLD:
508 case LoongArch::XVLD:
509 case LoongArch::ST_B:
510 case LoongArch::ST_H:
511 case LoongArch::ST_W:
512 case LoongArch::ST_D:
513 case LoongArch::STPTR_W:
514 case LoongArch::STPTR_D:
515 case LoongArch::FST_S:
516 case LoongArch::FST_D:
517 case LoongArch::VST:
518 case LoongArch::XVST: {
519 if (UseMI.getOperand(1).isFI())
520 return false;
521 // Register defined by Lo should not be the value register.
522 if (DestReg == UseMI.getOperand(0).getReg())
523 return false;
524 assert(DestReg == UseMI.getOperand(1).getReg() &&
525 "Expected base address use");
526 // All load/store instructions must use the same offset.
527 int64_t Offset = UseMI.getOperand(2).getImm();
528 if (CommonOffset && Offset != CommonOffset)
529 return false;
530 CommonOffset = Offset;
531 break;
532 }
533 case LoongArch::INLINEASM:
534 case LoongArch::INLINEASM_BR: {
535 // We can't do this for large pattern.
536 if (Last)
537 return false;
538 SmallVector<unsigned> InlineAsmMemoryOpIndexes;
539 unsigned NumOps = 0;
540 for (unsigned I = InlineAsm::MIOp_FirstOperand;
541 I < UseMI.getNumOperands(); I += 1 + NumOps) {
542 const MachineOperand &FlagsMO = UseMI.getOperand(I);
543 // Should be an imm.
544 if (!FlagsMO.isImm())
545 continue;
546
547 const InlineAsm::Flag Flags(FlagsMO.getImm());
548 NumOps = Flags.getNumOperandRegisters();
549
550 // Memory constraints have two operands.
551 if (NumOps != 2 || !Flags.isMemKind()) {
552 // If the register is used by something other than a memory contraint,
553 // we should not fold.
554 for (unsigned J = 0; J < NumOps; ++J) {
555 const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
556 if (MO.isReg() && MO.getReg() == DestReg)
557 return false;
558 }
559 continue;
560 }
561
562 // We can only do this for constraint m.
563 if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m)
564 return false;
565
566 const MachineOperand &AddrMO = UseMI.getOperand(I + 1);
567 if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
568 continue;
569
570 const MachineOperand &OffsetMO = UseMI.getOperand(I + 2);
571 if (!OffsetMO.isImm())
572 continue;
573
574 // All inline asm memory operands must use the same offset.
575 int64_t Offset = OffsetMO.getImm();
576 if (CommonOffset && Offset != CommonOffset)
577 return false;
578 CommonOffset = Offset;
579 InlineAsmMemoryOpIndexes.push_back(I + 1);
580 }
581 InlineAsmMemoryOpIndexesMap.insert(
582 std::make_pair(&UseMI, InlineAsmMemoryOpIndexes));
583 break;
584 }
585 }
586 }
587
588 // We found a common offset.
589 // Update the offsets in global address lowering.
590 // We may have already folded some arithmetic so we need to add to any
591 // existing offset.
592 int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset;
593 // LA32 ignores the upper 32 bits.
594 if (!ST->is64Bit())
595 NewOffset = SignExtend64<32>(NewOffset);
596 // We can only fold simm32 offsets.
597 if (!isInt<32>(NewOffset))
598 return false;
599
600 Hi20.getOperand(1).setOffset(NewOffset);
601 MachineOperand &ImmOp = Lo12.getOperand(2);
602 ImmOp.setOffset(NewOffset);
603 if (Lo20 && Hi12) {
604 Lo20->getOperand(2).setOffset(NewOffset);
605 Hi12->getOperand(2).setOffset(NewOffset);
606 }
607
608 // Update the immediate in the load/store instructions to add the offset.
609 const LoongArchInstrInfo &TII = *ST->getInstrInfo();
610 for (MachineInstr &UseMI :
611 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
612 if (UseMI.getOpcode() == LoongArch::INLINEASM ||
613 UseMI.getOpcode() == LoongArch::INLINEASM_BR) {
614 auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
615 for (unsigned I : InlineAsmMemoryOpIndexes) {
616 MachineOperand &MO = UseMI.getOperand(I + 1);
617 switch (ImmOp.getType()) {
619 MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
620 ImmOp.getTargetFlags());
621 break;
623 MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), ImmOp.getTargetFlags());
624 MO.setOffset(ImmOp.getOffset());
625 break;
627 MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
628 ImmOp.getTargetFlags());
629 break;
630 default:
631 report_fatal_error("unsupported machine operand type");
632 break;
633 }
634 }
635 } else {
636 UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last)));
637 if (Last) {
638 UseMI.removeOperand(2);
639 UseMI.removeOperand(1);
640 UseMI.addOperand(Last->getOperand(1));
641 UseMI.addOperand(Last->getOperand(2));
642 UseMI.getOperand(1).setIsKill(false);
643 UseMI.getOperand(2).setIsKill(false);
644 } else {
645 UseMI.removeOperand(2);
646 UseMI.addOperand(ImmOp);
647 }
648 }
649 }
650
651 if (Last) {
652 Last->eraseFromParent();
653 return true;
654 }
655
656 MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg());
657 Lo12.eraseFromParent();
658 return true;
659}
660
661bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
662 if (skipFunction(Fn.getFunction()))
663 return false;
664
666
667 bool MadeChange = false;
668 MRI = &Fn.getRegInfo();
669 for (MachineBasicBlock &MBB : Fn) {
670 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
671 for (MachineInstr &Hi20 : MBB) {
672 MachineInstr *Lo12 = nullptr;
673 MachineInstr *Lo20 = nullptr;
674 MachineInstr *Hi12 = nullptr;
675 MachineInstr *Last = nullptr;
676 if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
677 continue;
678 MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
679 MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
680 }
681 }
682
683 return MadeChange;
684}
685
686/// Returns an instance of the Merge Base Offset Optimization pass.
688 return new LoongArchMergeBaseOffsetOpt();
689}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
#define LLVM_DEBUG(...)
Definition: Debug.h:106
const HexagonInstrInfo * TII
static unsigned getNewOpc(unsigned Op, bool isLarge)
#define LoongArch_MERGE_BASE_OFFSET_NAME
#define DEBUG_TYPE
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
This class represents an Operation in the Expression.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:575
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:585
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
bool isMCSymbol() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags=0)
ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void ChangeToBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
ChangeToBA - Replace this operand with a new block address operand.
const BlockAddress * getBlockAddress() const
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
bool isBlockAddress() const
isBlockAddress - Tests if this is a MO_BlockAddress operand.
Register getReg() const
getReg - Returns the register number.
MCSymbol * getMCSymbol() const
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
@ MO_GlobalAddress
Address of a global value.
@ MO_BlockAddress
Address of a basic block.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:657
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
DWARFExpression::Operation Op
FunctionPass * createLoongArchMergeBaseOffsetOptPass()
Returns an instance of the Merge Base Offset Optimization pass.