LLVM 20.0.0git
LoongArchMergeBaseOffset.cpp
Go to the documentation of this file.
1//===---- LoongArchMergeBaseOffset.cpp - Optimise address calculations ----===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Merge the offset of address calculation into the offset field
10// of instructions in a global address lowering sequence.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LoongArch.h"
17#include "llvm/CodeGen/Passes.h"
19#include "llvm/Support/Debug.h"
21#include <optional>
22
23using namespace llvm;
24
25#define DEBUG_TYPE "loongarch-merge-base-offset"
26#define LoongArch_MERGE_BASE_OFFSET_NAME "LoongArch Merge Base Offset"
27
28namespace {
29
30class LoongArchMergeBaseOffsetOpt : public MachineFunctionPass {
31 const LoongArchSubtarget *ST = nullptr;
33
34public:
35 static char ID;
36 bool runOnMachineFunction(MachineFunction &Fn) override;
37 bool detectFoldable(MachineInstr &Hi20, MachineInstr *&Lo12,
38 MachineInstr *&Lo20, MachineInstr *&Hi12,
40
41 bool detectAndFoldOffset(MachineInstr &Hi20, MachineInstr &Lo12,
42 MachineInstr *&Lo20, MachineInstr *&Hi12,
44 void foldOffset(MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
46 int64_t Offset);
47 bool foldLargeOffset(MachineInstr &Hi20, MachineInstr &Lo12,
48 MachineInstr *&Lo20, MachineInstr *&Hi12,
49 MachineInstr *&Last, MachineInstr &TailAdd,
50 Register GAReg);
51
52 bool foldIntoMemoryOps(MachineInstr &Hi20, MachineInstr &Lo12,
53 MachineInstr *&Lo20, MachineInstr *&Hi12,
55
56 LoongArchMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
57
60 MachineFunctionProperties::Property::IsSSA);
61 }
62
63 void getAnalysisUsage(AnalysisUsage &AU) const override {
64 AU.setPreservesCFG();
66 }
67
68 StringRef getPassName() const override {
70 }
71};
72} // end anonymous namespace
73
74char LoongArchMergeBaseOffsetOpt::ID = 0;
75INITIALIZE_PASS(LoongArchMergeBaseOffsetOpt, DEBUG_TYPE,
77
78// Detect either of the patterns:
79//
80// 1. (small/medium):
81// pcalau12i vreg1, %pc_hi20(s)
82// addi.d vreg2, vreg1, %pc_lo12(s)
83//
84// 2. (large):
85// pcalau12i vreg1, %pc_hi20(s)
86// addi.d vreg2, $zero, %pc_lo12(s)
87// lu32i.d vreg3, vreg2, %pc64_lo20(s)
88// lu52i.d vreg4, vreg3, %pc64_hi12(s)
89// add.d vreg5, vreg4, vreg1
90
91// The pattern is only accepted if:
92// 1) For small and medium pattern, the first instruction has only one use,
93// which is the ADDI.
94// 2) For large pattern, the first four instructions each have only one use,
95// and the user of the fourth instruction is ADD.
96// 3) The address operands have the appropriate type, reflecting the
97// lowering of a global address or constant pool using the pattern.
98// 4) The offset value in the Global Address or Constant Pool is 0.
99bool LoongArchMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi20,
100 MachineInstr *&Lo12,
101 MachineInstr *&Lo20,
102 MachineInstr *&Hi12,
103 MachineInstr *&Last) {
104 if (Hi20.getOpcode() != LoongArch::PCALAU12I)
105 return false;
106
107 const MachineOperand &Hi20Op1 = Hi20.getOperand(1);
109 return false;
110
111 auto isGlobalOrCPIOrBlockAddress = [](const MachineOperand &Op) {
112 return Op.isGlobal() || Op.isCPI() || Op.isBlockAddress();
113 };
114
115 if (!isGlobalOrCPIOrBlockAddress(Hi20Op1) || Hi20Op1.getOffset() != 0)
116 return false;
117
118 Register HiDestReg = Hi20.getOperand(0).getReg();
119 if (!MRI->hasOneUse(HiDestReg))
120 return false;
121
122 MachineInstr *UseInst = &*MRI->use_instr_begin(HiDestReg);
123 if (UseInst->getOpcode() != LoongArch::ADD_D) {
124 Lo12 = UseInst;
125 if ((ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_D) ||
126 (!ST->is64Bit() && Lo12->getOpcode() != LoongArch::ADDI_W))
127 return false;
128 } else {
129 assert(ST->is64Bit());
130 Last = UseInst;
131
132 Register LastOp1Reg = Last->getOperand(1).getReg();
133 if (!LastOp1Reg.isVirtual())
134 return false;
135 Hi12 = MRI->getVRegDef(LastOp1Reg);
136 const MachineOperand &Hi12Op2 = Hi12->getOperand(2);
138 return false;
139 if (!isGlobalOrCPIOrBlockAddress(Hi12Op2) || Hi12Op2.getOffset() != 0)
140 return false;
141 if (!MRI->hasOneUse(Hi12->getOperand(0).getReg()))
142 return false;
143
144 Lo20 = MRI->getVRegDef(Hi12->getOperand(1).getReg());
145 const MachineOperand &Lo20Op2 = Lo20->getOperand(2);
147 return false;
148 if (!isGlobalOrCPIOrBlockAddress(Lo20Op2) || Lo20Op2.getOffset() != 0)
149 return false;
150 if (!MRI->hasOneUse(Lo20->getOperand(0).getReg()))
151 return false;
152
153 Lo12 = MRI->getVRegDef(Lo20->getOperand(1).getReg());
154 if (!MRI->hasOneUse(Lo12->getOperand(0).getReg()))
155 return false;
156 }
157
158 const MachineOperand &Lo12Op2 = Lo12->getOperand(2);
159 assert(Hi20.getOpcode() == LoongArch::PCALAU12I);
160 if (Lo12Op2.getTargetFlags() != LoongArchII::MO_PCREL_LO ||
161 !(isGlobalOrCPIOrBlockAddress(Lo12Op2) || Lo12Op2.isMCSymbol()) ||
162 Lo12Op2.getOffset() != 0)
163 return false;
164
165 if (Hi20Op1.isGlobal()) {
166 LLVM_DEBUG(dbgs() << " Found lowered global address: "
167 << *Hi20Op1.getGlobal() << "\n");
168 } else if (Hi20Op1.isBlockAddress()) {
169 LLVM_DEBUG(dbgs() << " Found lowered basic address: "
170 << *Hi20Op1.getBlockAddress() << "\n");
171 } else if (Hi20Op1.isCPI()) {
172 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << Hi20Op1.getIndex()
173 << "\n");
174 }
175
176 return true;
177}
178
179// Update the offset in Hi20, Lo12, Lo20 and Hi12 instructions.
180// Delete the tail instruction and update all the uses to use the
181// output from Last.
182void LoongArchMergeBaseOffsetOpt::foldOffset(
183 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
185 int64_t Offset) {
186 assert(isInt<32>(Offset) && "Unexpected offset");
187 // Put the offset back in Hi and the Lo
188 Hi20.getOperand(1).setOffset(Offset);
189 Lo12.getOperand(2).setOffset(Offset);
190 if (Lo20 && Hi12) {
191 Lo20->getOperand(2).setOffset(Offset);
192 Hi12->getOperand(2).setOffset(Offset);
193 }
194 // Delete the tail instruction.
195 MachineInstr *Def = Last ? Last : &Lo12;
196 MRI->constrainRegClass(Def->getOperand(0).getReg(),
197 MRI->getRegClass(Tail.getOperand(0).getReg()));
198 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Def->getOperand(0).getReg());
199 Tail.eraseFromParent();
200 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
201 << " " << Hi20 << " " << Lo12;);
202 if (Lo20 && Hi12) {
203 LLVM_DEBUG(dbgs() << " " << *Lo20 << " " << *Hi12;);
204 }
205}
206
207// Detect patterns for large offsets that are passed into an ADD instruction.
208// If the pattern is found, updates the offset in Hi20, Lo12, Lo20 and Hi12
209// instructions and deletes TailAdd and the instructions that produced the
210// offset.
211//
212// Base address lowering is of the form:
213// Hi20: pcalau12i vreg1, %pc_hi20(s)
214// Lo12: addi.d vreg2, vreg1, %pc_lo12(s)
215// / \
216// / \
217// / \
218// / The large offset can be of two forms: \
219// 1) Offset that has non zero bits in lower 2) Offset that has non zero
220// 12 bits and upper 20 bits bits in upper 20 bits only
221// OffsetHi: lu12i.w vreg3, 4
222// OffsetLo: ori voff, vreg3, 188 OffsetHi: lu12i.w voff, 128
223// \ /
224// \ /
225// \ /
226// \ /
227// TailAdd: add.d vreg4, vreg2, voff
228bool LoongArchMergeBaseOffsetOpt::foldLargeOffset(
229 MachineInstr &Hi20, MachineInstr &Lo12, MachineInstr *&Lo20,
230 MachineInstr *&Hi12, MachineInstr *&Last, MachineInstr &TailAdd,
231 Register GAReg) {
232 assert((TailAdd.getOpcode() == LoongArch::ADD_W ||
233 TailAdd.getOpcode() == LoongArch::ADD_D) &&
234 "Expected ADD instruction!");
235 Register Rs = TailAdd.getOperand(1).getReg();
236 Register Rt = TailAdd.getOperand(2).getReg();
237 Register Reg = Rs == GAReg ? Rt : Rs;
238
239 // Can't fold if the register has more than one use.
240 if (!Reg.isVirtual() || !MRI->hasOneUse(Reg))
241 return false;
242 // This can point to an ORI or a LU12I.W:
243 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
244 if (OffsetTail.getOpcode() == LoongArch::ORI) {
245 // The offset value has non zero bits in both %hi and %lo parts.
246 // Detect an ORI that feeds from a LU12I.W instruction.
247 MachineOperand &OriImmOp = OffsetTail.getOperand(2);
248 if (OriImmOp.getTargetFlags() != LoongArchII::MO_None)
249 return false;
250 Register OriReg = OffsetTail.getOperand(1).getReg();
251 int64_t OffLo = OriImmOp.getImm();
252
253 // Handle rs1 of ORI is R0.
254 if (OriReg == LoongArch::R0) {
255 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail);
256 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, OffLo);
257 OffsetTail.eraseFromParent();
258 return true;
259 }
260
261 MachineInstr &OffsetLu12i = *MRI->getVRegDef(OriReg);
262 MachineOperand &Lu12iImmOp = OffsetLu12i.getOperand(1);
263 if (OffsetLu12i.getOpcode() != LoongArch::LU12I_W ||
264 Lu12iImmOp.getTargetFlags() != LoongArchII::MO_None ||
265 !MRI->hasOneUse(OffsetLu12i.getOperand(0).getReg()))
266 return false;
267 int64_t Offset = SignExtend64<32>(Lu12iImmOp.getImm() << 12);
268 Offset += OffLo;
269 // LU12I.W+ORI sign extends the result.
270 Offset = SignExtend64<32>(Offset);
271 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
272 << " " << OffsetLu12i);
273 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
274 OffsetTail.eraseFromParent();
275 OffsetLu12i.eraseFromParent();
276 return true;
277 } else if (OffsetTail.getOpcode() == LoongArch::LU12I_W) {
278 // The offset value has all zero bits in the lower 12 bits. Only LU12I.W
279 // exists.
280 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
281 int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
282 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailAdd, Offset);
283 OffsetTail.eraseFromParent();
284 return true;
285 }
286 return false;
287}
288
289bool LoongArchMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi20,
290 MachineInstr &Lo12,
291 MachineInstr *&Lo20,
292 MachineInstr *&Hi12,
293 MachineInstr *&Last) {
294 Register DestReg =
295 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
296
297 // Look for arithmetic instructions we can get an offset from.
298 // We might be able to remove the arithmetic instructions by folding the
299 // offset into the PCALAU12I+(ADDI/ADDI+LU32I+LU52I).
300 if (!MRI->hasOneUse(DestReg))
301 return false;
302
303 // DestReg has only one use.
304 MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
305 switch (Tail.getOpcode()) {
306 default:
307 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
308 << Tail);
309 break;
310 case LoongArch::ADDI_W:
311 if (ST->is64Bit())
312 return false;
313 [[fallthrough]];
314 case LoongArch::ADDI_D:
315 case LoongArch::ADDU16I_D: {
316 // Offset is simply an immediate operand.
317 int64_t Offset = Tail.getOperand(2).getImm();
318 if (Tail.getOpcode() == LoongArch::ADDU16I_D)
319 Offset = SignExtend64<32>(Offset << 16);
320
321 // We might have two ADDIs in a row.
322 Register TailDestReg = Tail.getOperand(0).getReg();
323 if (MRI->hasOneUse(TailDestReg)) {
324 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
325 if (ST->is64Bit() && TailTail.getOpcode() == LoongArch::ADDI_W)
326 return false;
327 if (TailTail.getOpcode() == LoongArch::ADDI_W ||
328 TailTail.getOpcode() == LoongArch::ADDI_D) {
329 Offset += TailTail.getOperand(2).getImm();
330 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
331 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, TailTail, Offset);
332 Tail.eraseFromParent();
333 return true;
334 }
335 }
336
337 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
338 foldOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, Offset);
339 return true;
340 }
341 case LoongArch::ADD_W:
342 if (ST->is64Bit())
343 return false;
344 [[fallthrough]];
345 case LoongArch::ADD_D:
346 // The offset is too large to fit in the immediate field of ADDI.
347 // This can be in two forms:
348 // 1) LU12I.W hi_offset followed by:
349 // ORI lo_offset
350 // This happens in case the offset has non zero bits in
351 // both hi 20 and lo 12 bits.
352 // 2) LU12I.W (offset20)
353 // This happens in case the lower 12 bits of the offset are zeros.
354 return foldLargeOffset(Hi20, Lo12, Lo20, Hi12, Last, Tail, DestReg);
355 break;
356 }
357
358 return false;
359}
360
361// Memory access opcode mapping for transforms.
362static unsigned getNewOpc(unsigned Op, bool isLarge) {
363 switch (Op) {
364 case LoongArch::LD_B:
365 return isLarge ? LoongArch::LDX_B : LoongArch::LD_B;
366 case LoongArch::LD_H:
367 return isLarge ? LoongArch::LDX_H : LoongArch::LD_H;
368 case LoongArch::LD_W:
369 case LoongArch::LDPTR_W:
370 return isLarge ? LoongArch::LDX_W : LoongArch::LD_W;
371 case LoongArch::LD_D:
372 case LoongArch::LDPTR_D:
373 return isLarge ? LoongArch::LDX_D : LoongArch::LD_D;
374 case LoongArch::LD_BU:
375 return isLarge ? LoongArch::LDX_BU : LoongArch::LD_BU;
376 case LoongArch::LD_HU:
377 return isLarge ? LoongArch::LDX_HU : LoongArch::LD_HU;
378 case LoongArch::LD_WU:
379 return isLarge ? LoongArch::LDX_WU : LoongArch::LD_WU;
380 case LoongArch::FLD_S:
381 return isLarge ? LoongArch::FLDX_S : LoongArch::FLD_S;
382 case LoongArch::FLD_D:
383 return isLarge ? LoongArch::FLDX_D : LoongArch::FLD_D;
384 case LoongArch::VLD:
385 return isLarge ? LoongArch::VLDX : LoongArch::VLD;
386 case LoongArch::XVLD:
387 return isLarge ? LoongArch::XVLDX : LoongArch::XVLD;
388 case LoongArch::VLDREPL_B:
389 return LoongArch::VLDREPL_B;
390 case LoongArch::XVLDREPL_B:
391 return LoongArch::XVLDREPL_B;
392 case LoongArch::ST_B:
393 return isLarge ? LoongArch::STX_B : LoongArch::ST_B;
394 case LoongArch::ST_H:
395 return isLarge ? LoongArch::STX_H : LoongArch::ST_H;
396 case LoongArch::ST_W:
397 case LoongArch::STPTR_W:
398 return isLarge ? LoongArch::STX_W : LoongArch::ST_W;
399 case LoongArch::ST_D:
400 case LoongArch::STPTR_D:
401 return isLarge ? LoongArch::STX_D : LoongArch::ST_D;
402 case LoongArch::FST_S:
403 return isLarge ? LoongArch::FSTX_S : LoongArch::FST_S;
404 case LoongArch::FST_D:
405 return isLarge ? LoongArch::FSTX_D : LoongArch::FST_D;
406 case LoongArch::VST:
407 return isLarge ? LoongArch::VSTX : LoongArch::VST;
408 case LoongArch::XVST:
409 return isLarge ? LoongArch::XVSTX : LoongArch::XVST;
410 default:
411 llvm_unreachable("Unexpected opcode for replacement");
412 }
413}
414
415bool LoongArchMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi20,
416 MachineInstr &Lo12,
417 MachineInstr *&Lo20,
418 MachineInstr *&Hi12,
419 MachineInstr *&Last) {
420 Register DestReg =
421 Last ? Last->getOperand(0).getReg() : Lo12.getOperand(0).getReg();
422
423 // If all the uses are memory ops with the same offset, we can transform:
424 //
425 // 1. (small/medium):
426 // pcalau12i vreg1, %pc_hi20(s)
427 // addi.d vreg2, vreg1, %pc_lo12(s)
428 // ld.w vreg3, 8(vreg2)
429 //
430 // =>
431 //
432 // pcalau12i vreg1, %pc_hi20(s+8)
433 // ld.w vreg3, vreg1, %pc_lo12(s+8)(vreg1)
434 //
435 // 2. (large):
436 // pcalau12i vreg1, %pc_hi20(s)
437 // addi.d vreg2, $zero, %pc_lo12(s)
438 // lu32i.d vreg3, vreg2, %pc64_lo20(s)
439 // lu52i.d vreg4, vreg3, %pc64_hi12(s)
440 // add.d vreg5, vreg4, vreg1
441 // ld.w vreg6, 8(vreg5)
442 //
443 // =>
444 //
445 // pcalau12i vreg1, %pc_hi20(s+8)
446 // addi.d vreg2, $zero, %pc_lo12(s+8)
447 // lu32i.d vreg3, vreg2, %pc64_lo20(s+8)
448 // lu52i.d vreg4, vreg3, %pc64_hi12(s+8)
449 // ldx.w vreg6, vreg4, vreg1
450
451 std::optional<int64_t> CommonOffset;
453 InlineAsmMemoryOpIndexesMap;
454 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
455 switch (UseMI.getOpcode()) {
456 default:
457 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
458 return false;
459 case LoongArch::VLDREPL_B:
460 case LoongArch::XVLDREPL_B:
461 // We can't do this for large pattern.
462 if (Last)
463 return false;
464 [[fallthrough]];
465 case LoongArch::LD_B:
466 case LoongArch::LD_H:
467 case LoongArch::LD_W:
468 case LoongArch::LD_D:
469 case LoongArch::LD_BU:
470 case LoongArch::LD_HU:
471 case LoongArch::LD_WU:
472 case LoongArch::LDPTR_W:
473 case LoongArch::LDPTR_D:
474 case LoongArch::FLD_S:
475 case LoongArch::FLD_D:
476 case LoongArch::VLD:
477 case LoongArch::XVLD:
478 case LoongArch::ST_B:
479 case LoongArch::ST_H:
480 case LoongArch::ST_W:
481 case LoongArch::ST_D:
482 case LoongArch::STPTR_W:
483 case LoongArch::STPTR_D:
484 case LoongArch::FST_S:
485 case LoongArch::FST_D:
486 case LoongArch::VST:
487 case LoongArch::XVST: {
488 if (UseMI.getOperand(1).isFI())
489 return false;
490 // Register defined by Lo should not be the value register.
491 if (DestReg == UseMI.getOperand(0).getReg())
492 return false;
493 assert(DestReg == UseMI.getOperand(1).getReg() &&
494 "Expected base address use");
495 // All load/store instructions must use the same offset.
496 int64_t Offset = UseMI.getOperand(2).getImm();
497 if (CommonOffset && Offset != CommonOffset)
498 return false;
499 CommonOffset = Offset;
500 break;
501 }
502 case LoongArch::INLINEASM:
503 case LoongArch::INLINEASM_BR: {
504 // We can't do this for large pattern.
505 if (Last)
506 return false;
507 SmallVector<unsigned> InlineAsmMemoryOpIndexes;
508 unsigned NumOps = 0;
509 for (unsigned I = InlineAsm::MIOp_FirstOperand;
510 I < UseMI.getNumOperands(); I += 1 + NumOps) {
511 const MachineOperand &FlagsMO = UseMI.getOperand(I);
512 // Should be an imm.
513 if (!FlagsMO.isImm())
514 continue;
515
516 const InlineAsm::Flag Flags(FlagsMO.getImm());
517 NumOps = Flags.getNumOperandRegisters();
518
519 // Memory constraints have two operands.
520 if (NumOps != 2 || !Flags.isMemKind()) {
521 // If the register is used by something other than a memory contraint,
522 // we should not fold.
523 for (unsigned J = 0; J < NumOps; ++J) {
524 const MachineOperand &MO = UseMI.getOperand(I + 1 + J);
525 if (MO.isReg() && MO.getReg() == DestReg)
526 return false;
527 }
528 continue;
529 }
530
531 // We can only do this for constraint m.
532 if (Flags.getMemoryConstraintID() != InlineAsm::ConstraintCode::m)
533 return false;
534
535 const MachineOperand &AddrMO = UseMI.getOperand(I + 1);
536 if (!AddrMO.isReg() || AddrMO.getReg() != DestReg)
537 continue;
538
539 const MachineOperand &OffsetMO = UseMI.getOperand(I + 2);
540 if (!OffsetMO.isImm())
541 continue;
542
543 // All inline asm memory operands must use the same offset.
544 int64_t Offset = OffsetMO.getImm();
545 if (CommonOffset && Offset != CommonOffset)
546 return false;
547 CommonOffset = Offset;
548 InlineAsmMemoryOpIndexes.push_back(I + 1);
549 }
550 InlineAsmMemoryOpIndexesMap.insert(
551 std::make_pair(&UseMI, InlineAsmMemoryOpIndexes));
552 break;
553 }
554 }
555 }
556
557 // We found a common offset.
558 // Update the offsets in global address lowering.
559 // We may have already folded some arithmetic so we need to add to any
560 // existing offset.
561 int64_t NewOffset = Hi20.getOperand(1).getOffset() + *CommonOffset;
562 // LA32 ignores the upper 32 bits.
563 if (!ST->is64Bit())
564 NewOffset = SignExtend64<32>(NewOffset);
565 // We can only fold simm32 offsets.
566 if (!isInt<32>(NewOffset))
567 return false;
568
569 Hi20.getOperand(1).setOffset(NewOffset);
570 MachineOperand &ImmOp = Lo12.getOperand(2);
571 ImmOp.setOffset(NewOffset);
572 if (Lo20 && Hi12) {
573 Lo20->getOperand(2).setOffset(NewOffset);
574 Hi12->getOperand(2).setOffset(NewOffset);
575 }
576
577 // Update the immediate in the load/store instructions to add the offset.
578 const LoongArchInstrInfo &TII = *ST->getInstrInfo();
579 for (MachineInstr &UseMI :
580 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
581 if (UseMI.getOpcode() == LoongArch::INLINEASM ||
582 UseMI.getOpcode() == LoongArch::INLINEASM_BR) {
583 auto &InlineAsmMemoryOpIndexes = InlineAsmMemoryOpIndexesMap[&UseMI];
584 for (unsigned I : InlineAsmMemoryOpIndexes) {
585 MachineOperand &MO = UseMI.getOperand(I + 1);
586 switch (ImmOp.getType()) {
588 MO.ChangeToGA(ImmOp.getGlobal(), ImmOp.getOffset(),
589 ImmOp.getTargetFlags());
590 break;
592 MO.ChangeToMCSymbol(ImmOp.getMCSymbol(), ImmOp.getTargetFlags());
593 MO.setOffset(ImmOp.getOffset());
594 break;
596 MO.ChangeToBA(ImmOp.getBlockAddress(), ImmOp.getOffset(),
597 ImmOp.getTargetFlags());
598 break;
599 default:
600 report_fatal_error("unsupported machine operand type");
601 break;
602 }
603 }
604 } else {
605 UseMI.setDesc(TII.get(getNewOpc(UseMI.getOpcode(), Last)));
606 if (Last) {
607 UseMI.removeOperand(2);
608 UseMI.removeOperand(1);
609 UseMI.addOperand(Last->getOperand(1));
610 UseMI.addOperand(Last->getOperand(2));
611 UseMI.getOperand(1).setIsKill(false);
612 UseMI.getOperand(2).setIsKill(false);
613 } else {
614 UseMI.removeOperand(2);
615 UseMI.addOperand(ImmOp);
616 }
617 }
618 }
619
620 if (Last) {
621 Last->eraseFromParent();
622 return true;
623 }
624
625 MRI->replaceRegWith(Lo12.getOperand(0).getReg(), Hi20.getOperand(0).getReg());
626 Lo12.eraseFromParent();
627 return true;
628}
629
630bool LoongArchMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
631 if (skipFunction(Fn.getFunction()))
632 return false;
633
635
636 bool MadeChange = false;
637 MRI = &Fn.getRegInfo();
638 for (MachineBasicBlock &MBB : Fn) {
639 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
640 for (MachineInstr &Hi20 : MBB) {
641 MachineInstr *Lo12 = nullptr;
642 MachineInstr *Lo20 = nullptr;
643 MachineInstr *Hi12 = nullptr;
644 MachineInstr *Last = nullptr;
645 if (!detectFoldable(Hi20, Lo12, Lo20, Hi12, Last))
646 continue;
647 MadeChange |= detectAndFoldOffset(Hi20, *Lo12, Lo20, Hi12, Last);
648 MadeChange |= foldIntoMemoryOps(Hi20, *Lo12, Lo20, Hi12, Last);
649 }
650 }
651
652 return MadeChange;
653}
654
655/// Returns an instance of the Merge Base Offset Optimization pass.
657 return new LoongArchMergeBaseOffsetOpt();
658}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
#define LLVM_DEBUG(X)
Definition: Debug.h:101
const HexagonInstrInfo * TII
static unsigned getNewOpc(unsigned Op, bool isLarge)
#define LoongArch_MERGE_BASE_OFFSET_NAME
#define DEBUG_TYPE
#define I(x, y, z)
Definition: MD5.cpp:58
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:256
This class represents an Operation in the Expression.
std::pair< iterator, bool > insert(const std::pair< KeyT, ValueT > &KV)
Definition: DenseMap.h:211
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:310
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:569
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:579
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
bool isMCSymbol() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
void ChangeToMCSymbol(MCSymbol *Sym, unsigned TargetFlags=0)
ChangeToMCSymbol - Replace this operand with a new MC symbol operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void ChangeToBA(const BlockAddress *BA, int64_t Offset, unsigned TargetFlags=0)
ChangeToBA - Replace this operand with a new block address operand.
const BlockAddress * getBlockAddress() const
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
bool isBlockAddress() const
isBlockAddress - Tests if this is a MO_BlockAddress operand.
Register getReg() const
getReg - Returns the register number.
MCSymbol * getMCSymbol() const
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
@ MO_GlobalAddress
Address of a global value.
@ MO_BlockAddress
Address of a basic block.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
DWARFExpression::Operation Op
FunctionPass * createLoongArchMergeBaseOffsetOptPass()
Returns an instance of the Merge Base Offset Optimization pass.