LLVM 18.0.0git
RISCVMergeBaseOffset.cpp
Go to the documentation of this file.
1//===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Merge the offset of address calculation into the offset field
10// of instructions in a global address lowering sequence.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCV.h"
15#include "RISCVTargetMachine.h"
17#include "llvm/CodeGen/Passes.h"
19#include "llvm/Support/Debug.h"
21#include <optional>
22#include <set>
23using namespace llvm;
24
25#define DEBUG_TYPE "riscv-merge-base-offset"
26#define RISCV_MERGE_BASE_OFFSET_NAME "RISC-V Merge Base Offset"
27namespace {
28
29class RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
30 const RISCVSubtarget *ST = nullptr;
32
33public:
34 static char ID;
35 bool runOnMachineFunction(MachineFunction &Fn) override;
36 bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
37
38 bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
39 void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
40 int64_t Offset);
41 bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
42 MachineInstr &TailAdd, Register GSReg);
43 bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
44 MachineInstr &TailShXAdd, Register GSReg);
45
46 bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
47
48 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
49
52 MachineFunctionProperties::Property::IsSSA);
53 }
54
55 void getAnalysisUsage(AnalysisUsage &AU) const override {
56 AU.setPreservesCFG();
58 }
59
60 StringRef getPassName() const override {
62 }
63};
64} // end anonymous namespace
65
66char RISCVMergeBaseOffsetOpt::ID = 0;
67INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
68 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
69
70// Detect either of the patterns:
71//
72// 1. (medlow pattern):
73// lui vreg1, %hi(s)
74// addi vreg2, vreg1, %lo(s)
75//
76// 2. (medany pattern):
77// .Lpcrel_hi1:
78// auipc vreg1, %pcrel_hi(s)
79// addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
80//
81// The pattern is only accepted if:
82// 1) The first instruction has only one use, which is the ADDI.
83// 2) The address operands have the appropriate type, reflecting the
84// lowering of a global address or constant pool using medlow or medany.
85// 3) The offset value in the Global Address or Constant Pool is 0.
86bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
87 MachineInstr *&Lo) {
88 if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
89 return false;
90
91 const MachineOperand &HiOp1 = Hi.getOperand(1);
92 unsigned ExpectedFlags =
93 Hi.getOpcode() == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI;
94 if (HiOp1.getTargetFlags() != ExpectedFlags)
95 return false;
96
97 if (!(HiOp1.isGlobal() || HiOp1.isCPI() || HiOp1.isBlockAddress()) ||
98 HiOp1.getOffset() != 0)
99 return false;
100
101 Register HiDestReg = Hi.getOperand(0).getReg();
102 if (!MRI->hasOneUse(HiDestReg))
103 return false;
104
105 Lo = &*MRI->use_instr_begin(HiDestReg);
106 if (Lo->getOpcode() != RISCV::ADDI)
107 return false;
108
109 const MachineOperand &LoOp2 = Lo->getOperand(2);
110 if (Hi.getOpcode() == RISCV::LUI) {
111 if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
112 !(LoOp2.isGlobal() || LoOp2.isCPI() || LoOp2.isBlockAddress()) ||
113 LoOp2.getOffset() != 0)
114 return false;
115 } else {
116 assert(Hi.getOpcode() == RISCV::AUIPC);
117 if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO ||
119 return false;
120 }
121
122 if (HiOp1.isGlobal()) {
123 LLVM_DEBUG(dbgs() << " Found lowered global address: "
124 << *HiOp1.getGlobal() << "\n");
125 } else if (HiOp1.isBlockAddress()) {
126 LLVM_DEBUG(dbgs() << " Found lowered basic address: "
127 << *HiOp1.getBlockAddress() << "\n");
128 } else if (HiOp1.isCPI()) {
129 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
130 << "\n");
131 }
132
133 return true;
134}
135
136// Update the offset in Hi and Lo instructions.
137// Delete the tail instruction and update all the uses to use the
138// output from Lo.
139void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
140 MachineInstr &Tail, int64_t Offset) {
141 assert(isInt<32>(Offset) && "Unexpected offset");
142 // Put the offset back in Hi and the Lo
143 Hi.getOperand(1).setOffset(Offset);
144 if (Hi.getOpcode() != RISCV::AUIPC)
145 Lo.getOperand(2).setOffset(Offset);
146 // Delete the tail instruction.
147 MRI->constrainRegClass(Lo.getOperand(0).getReg(),
148 MRI->getRegClass(Tail.getOperand(0).getReg()));
149 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Lo.getOperand(0).getReg());
150 Tail.eraseFromParent();
151 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
152 << " " << Hi << " " << Lo;);
153}
154
155// Detect patterns for large offsets that are passed into an ADD instruction.
156// If the pattern is found, updates the offset in Hi and Lo instructions
157// and deletes TailAdd and the instructions that produced the offset.
158//
159// Base address lowering is of the form:
160// Hi: lui vreg1, %hi(s)
161// Lo: addi vreg2, vreg1, %lo(s)
162// / \
163// / \
164// / \
165// / The large offset can be of two forms: \
166// 1) Offset that has non zero bits in lower 2) Offset that has non zero
167// 12 bits and upper 20 bits bits in upper 20 bits only
168// OffseLUI: lui vreg3, 4
169// OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
170// \ /
171// \ /
172// \ /
173// \ /
174// TailAdd: add vreg4, vreg2, voff
175bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
177 MachineInstr &TailAdd,
178 Register GAReg) {
179 assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
180 Register Rs = TailAdd.getOperand(1).getReg();
181 Register Rt = TailAdd.getOperand(2).getReg();
182 Register Reg = Rs == GAReg ? Rt : Rs;
183
184 // Can't fold if the register has more than one use.
185 if (!MRI->hasOneUse(Reg))
186 return false;
187 // This can point to an ADDI(W) or a LUI:
188 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
189 if (OffsetTail.getOpcode() == RISCV::ADDI ||
190 OffsetTail.getOpcode() == RISCV::ADDIW) {
191 // The offset value has non zero bits in both %hi and %lo parts.
192 // Detect an ADDI that feeds from a LUI instruction.
193 MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
194 if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
195 return false;
196 int64_t OffLo = AddiImmOp.getImm();
197 MachineInstr &OffsetLui =
198 *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
199 MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
200 if (OffsetLui.getOpcode() != RISCV::LUI ||
201 LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
202 !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
203 return false;
204 int64_t Offset = SignExtend64<32>(LuiImmOp.getImm() << 12);
205 Offset += OffLo;
206 // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
207 if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW)
208 Offset = SignExtend64<32>(Offset);
209 // We can only fold simm32 offsets.
210 if (!isInt<32>(Offset))
211 return false;
212 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
213 << " " << OffsetLui);
214 foldOffset(Hi, Lo, TailAdd, Offset);
215 OffsetTail.eraseFromParent();
216 OffsetLui.eraseFromParent();
217 return true;
218 } else if (OffsetTail.getOpcode() == RISCV::LUI) {
219 // The offset value has all zero bits in the lower 12 bits. Only LUI
220 // exists.
221 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
222 int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
223 foldOffset(Hi, Lo, TailAdd, Offset);
224 OffsetTail.eraseFromParent();
225 return true;
226 }
227 return false;
228}
229
230// Detect patterns for offsets that are passed into a SHXADD instruction.
231// The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
232// The constant is created with addi voff, x0, C, and shXadd is used to
233// fill insert the trailing zeros and do the addition.
234// If the pattern is found, updates the offset in Hi and Lo instructions
235// and deletes TailShXAdd and the instructions that produced the offset.
236//
237// Hi: lui vreg1, %hi(s)
238// Lo: addi vreg2, vreg1, %lo(s)
239// OffsetTail: addi voff, x0, C
240// TailAdd: shXadd vreg4, voff, vreg2
241bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
243 MachineInstr &TailShXAdd,
244 Register GAReg) {
245 assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
246 TailShXAdd.getOpcode() == RISCV::SH2ADD ||
247 TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
248 "Expected SHXADD instruction!");
249
250 // The first source is the shifted operand.
251 Register Rs1 = TailShXAdd.getOperand(1).getReg();
252
253 if (GAReg != TailShXAdd.getOperand(2).getReg())
254 return false;
255
256 // Can't fold if the register has more than one use.
257 if (!MRI->hasOneUse(Rs1))
258 return false;
259 // This can point to an ADDI X0, C.
260 MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
261 if (OffsetTail.getOpcode() != RISCV::ADDI)
262 return false;
263 if (!OffsetTail.getOperand(1).isReg() ||
264 OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
265 !OffsetTail.getOperand(2).isImm())
266 return false;
267
268 int64_t Offset = OffsetTail.getOperand(2).getImm();
269 assert(isInt<12>(Offset) && "Unexpected offset");
270
271 unsigned ShAmt;
272 switch (TailShXAdd.getOpcode()) {
273 default: llvm_unreachable("Unexpected opcode");
274 case RISCV::SH1ADD: ShAmt = 1; break;
275 case RISCV::SH2ADD: ShAmt = 2; break;
276 case RISCV::SH3ADD: ShAmt = 3; break;
277 }
278
279 Offset = (uint64_t)Offset << ShAmt;
280
281 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
282 foldOffset(Hi, Lo, TailShXAdd, Offset);
283 OffsetTail.eraseFromParent();
284 return true;
285}
286
287bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
288 MachineInstr &Lo) {
289 Register DestReg = Lo.getOperand(0).getReg();
290
291 // Look for arithmetic instructions we can get an offset from.
292 // We might be able to remove the arithmetic instructions by folding the
293 // offset into the LUI+ADDI.
294 if (!MRI->hasOneUse(DestReg))
295 return false;
296
297 // Lo has only one use.
298 MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
299 switch (Tail.getOpcode()) {
300 default:
301 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
302 << Tail);
303 break;
304 case RISCV::ADDI: {
305 // Offset is simply an immediate operand.
306 int64_t Offset = Tail.getOperand(2).getImm();
307
308 // We might have two ADDIs in a row.
309 Register TailDestReg = Tail.getOperand(0).getReg();
310 if (MRI->hasOneUse(TailDestReg)) {
311 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
312 if (TailTail.getOpcode() == RISCV::ADDI) {
313 Offset += TailTail.getOperand(2).getImm();
314 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
315 foldOffset(Hi, Lo, TailTail, Offset);
316 Tail.eraseFromParent();
317 return true;
318 }
319 }
320
321 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
322 foldOffset(Hi, Lo, Tail, Offset);
323 return true;
324 }
325 case RISCV::ADD:
326 // The offset is too large to fit in the immediate field of ADDI.
327 // This can be in two forms:
328 // 1) LUI hi_Offset followed by:
329 // ADDI lo_offset
330 // This happens in case the offset has non zero bits in
331 // both hi 20 and lo 12 bits.
332 // 2) LUI (offset20)
333 // This happens in case the lower 12 bits of the offset are zeros.
334 return foldLargeOffset(Hi, Lo, Tail, DestReg);
335 case RISCV::SH1ADD:
336 case RISCV::SH2ADD:
337 case RISCV::SH3ADD:
338 // The offset is too large to fit in the immediate field of ADDI.
339 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
340 // (SH3ADD (ADDI X0, C), DestReg).
341 return foldShiftedOffset(Hi, Lo, Tail, DestReg);
342 }
343
344 return false;
345}
346
347bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
348 MachineInstr &Lo) {
349 Register DestReg = Lo.getOperand(0).getReg();
350
351 // If all the uses are memory ops with the same offset, we can transform:
352 //
353 // 1. (medlow pattern):
354 // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
355 // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
356 // Tail: lw vreg3, 8(vreg2)
357 //
358 // 2. (medany pattern):
359 // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
360 // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
361 // Tail: lw vreg3, 8(vreg2)
362
363 std::optional<int64_t> CommonOffset;
364 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
365 switch (UseMI.getOpcode()) {
366 default:
367 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
368 return false;
369 case RISCV::LB:
370 case RISCV::LH:
371 case RISCV::LW:
372 case RISCV::LBU:
373 case RISCV::LHU:
374 case RISCV::LWU:
375 case RISCV::LD:
376 case RISCV::FLH:
377 case RISCV::FLW:
378 case RISCV::FLD:
379 case RISCV::SB:
380 case RISCV::SH:
381 case RISCV::SW:
382 case RISCV::SD:
383 case RISCV::FSH:
384 case RISCV::FSW:
385 case RISCV::FSD: {
386 if (UseMI.getOperand(1).isFI())
387 return false;
388 // Register defined by Lo should not be the value register.
389 if (DestReg == UseMI.getOperand(0).getReg())
390 return false;
391 assert(DestReg == UseMI.getOperand(1).getReg() &&
392 "Expected base address use");
393 // All load/store instructions must use the same offset.
394 int64_t Offset = UseMI.getOperand(2).getImm();
395 if (CommonOffset && Offset != CommonOffset)
396 return false;
397 CommonOffset = Offset;
398 }
399 }
400 }
401
402 // We found a common offset.
403 // Update the offsets in global address lowering.
404 // We may have already folded some arithmetic so we need to add to any
405 // existing offset.
406 int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset;
407 // RV32 ignores the upper 32 bits.
408 if (!ST->is64Bit())
409 NewOffset = SignExtend64<32>(NewOffset);
410 // We can only fold simm32 offsets.
411 if (!isInt<32>(NewOffset))
412 return false;
413
414 Hi.getOperand(1).setOffset(NewOffset);
415 MachineOperand &ImmOp = Lo.getOperand(2);
416 if (Hi.getOpcode() != RISCV::AUIPC)
417 ImmOp.setOffset(NewOffset);
418
419 // Update the immediate in the load/store instructions to add the offset.
420 for (MachineInstr &UseMI :
421 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
422 UseMI.removeOperand(2);
423 UseMI.addOperand(ImmOp);
424 // Update the base reg in the Tail instruction to feed from LUI.
425 // Output of Hi is only used in Lo, no need to use MRI->replaceRegWith().
426 UseMI.getOperand(1).setReg(Hi.getOperand(0).getReg());
427 }
428
429 Lo.eraseFromParent();
430 return true;
431}
432
433bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
434 if (skipFunction(Fn.getFunction()))
435 return false;
436
438
439 bool MadeChange = false;
440 MRI = &Fn.getRegInfo();
441 for (MachineBasicBlock &MBB : Fn) {
442 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
443 for (MachineInstr &Hi : MBB) {
444 MachineInstr *Lo = nullptr;
445 if (!detectFoldable(Hi, Lo))
446 continue;
447 MadeChange |= detectAndFoldOffset(Hi, *Lo);
448 MadeChange |= foldIntoMemoryOps(Hi, *Lo);
449 }
450 }
451
452 return MadeChange;
453}
454
455/// Returns an instance of the Merge Base Offset Optimization pass.
457 return new RISCVMergeBaseOffsetOpt();
458}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define RISCV_MERGE_BASE_OFFSET_NAME
#define DEBUG_TYPE
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
support::ulittle16_t & Lo
Definition: aarch32.cpp:205
support::ulittle16_t & Hi
Definition: aarch32.cpp:204
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
const BlockAddress * getBlockAddress() const
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
bool isBlockAddress() const
isBlockAddress - Tests if this is a MO_BlockAddress operand.
Register getReg() const
getReg - Returns the register number.
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:665
FunctionPass * createRISCVMergeBaseOffsetOptPass()
Returns an instance of the Merge Base Offset Optimization pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163