LLVM 17.0.0git
RISCVMergeBaseOffset.cpp
Go to the documentation of this file.
1//===----- RISCVMergeBaseOffset.cpp - Optimise address calculations ------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Merge the offset of address calculation into the offset field
10// of instructions in a global address lowering sequence.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCV.h"
15#include "RISCVTargetMachine.h"
17#include "llvm/CodeGen/Passes.h"
19#include "llvm/Support/Debug.h"
21#include <optional>
22#include <set>
23using namespace llvm;
24
25#define DEBUG_TYPE "riscv-merge-base-offset"
26#define RISCV_MERGE_BASE_OFFSET_NAME "RISCV Merge Base Offset"
27namespace {
28
29struct RISCVMergeBaseOffsetOpt : public MachineFunctionPass {
30private:
31 const RISCVSubtarget *ST = nullptr;
32
33public:
34 static char ID;
35 bool runOnMachineFunction(MachineFunction &Fn) override;
36 bool detectFoldable(MachineInstr &Hi, MachineInstr *&Lo);
37
38 bool detectAndFoldOffset(MachineInstr &Hi, MachineInstr &Lo);
39 void foldOffset(MachineInstr &Hi, MachineInstr &Lo, MachineInstr &Tail,
40 int64_t Offset);
41 bool foldLargeOffset(MachineInstr &Hi, MachineInstr &Lo,
42 MachineInstr &TailAdd, Register GSReg);
43 bool foldShiftedOffset(MachineInstr &Hi, MachineInstr &Lo,
44 MachineInstr &TailShXAdd, Register GSReg);
45
46 bool foldIntoMemoryOps(MachineInstr &Hi, MachineInstr &Lo);
47
48 RISCVMergeBaseOffsetOpt() : MachineFunctionPass(ID) {}
49
52 MachineFunctionProperties::Property::IsSSA);
53 }
54
55 void getAnalysisUsage(AnalysisUsage &AU) const override {
56 AU.setPreservesCFG();
58 }
59
60 StringRef getPassName() const override {
62 }
63
64private:
66};
67} // end anonymous namespace
68
69char RISCVMergeBaseOffsetOpt::ID = 0;
70INITIALIZE_PASS(RISCVMergeBaseOffsetOpt, DEBUG_TYPE,
71 RISCV_MERGE_BASE_OFFSET_NAME, false, false)
72
73// Detect either of the patterns:
74//
75// 1. (medlow pattern):
76// lui vreg1, %hi(s)
77// addi vreg2, vreg1, %lo(s)
78//
79// 2. (medany pattern):
80// .Lpcrel_hi1:
81// auipc vreg1, %pcrel_hi(s)
82// addi vreg2, vreg1, %pcrel_lo(.Lpcrel_hi1)
83//
84// The pattern is only accepted if:
85// 1) The first instruction has only one use, which is the ADDI.
86// 2) The address operands have the appropriate type, reflecting the
87// lowering of a global address or constant pool using medlow or medany.
88// 3) The offset value in the Global Address or Constant Pool is 0.
89bool RISCVMergeBaseOffsetOpt::detectFoldable(MachineInstr &Hi,
90 MachineInstr *&Lo) {
91 if (Hi.getOpcode() != RISCV::LUI && Hi.getOpcode() != RISCV::AUIPC)
92 return false;
93
94 const MachineOperand &HiOp1 = Hi.getOperand(1);
95 unsigned ExpectedFlags =
96 Hi.getOpcode() == RISCV::AUIPC ? RISCVII::MO_PCREL_HI : RISCVII::MO_HI;
97 if (HiOp1.getTargetFlags() != ExpectedFlags)
98 return false;
99
100 if (!(HiOp1.isGlobal() || HiOp1.isCPI()) || HiOp1.getOffset() != 0)
101 return false;
102
103 Register HiDestReg = Hi.getOperand(0).getReg();
104 if (!MRI->hasOneUse(HiDestReg))
105 return false;
106
107 Lo = &*MRI->use_instr_begin(HiDestReg);
108 if (Lo->getOpcode() != RISCV::ADDI)
109 return false;
110
111 const MachineOperand &LoOp2 = Lo->getOperand(2);
112 if (Hi.getOpcode() == RISCV::LUI) {
113 if (LoOp2.getTargetFlags() != RISCVII::MO_LO ||
114 !(LoOp2.isGlobal() || LoOp2.isCPI()) || LoOp2.getOffset() != 0)
115 return false;
116 } else {
117 assert(Hi.getOpcode() == RISCV::AUIPC);
118 if (LoOp2.getTargetFlags() != RISCVII::MO_PCREL_LO ||
120 return false;
121 }
122
123 if (HiOp1.isGlobal()) {
124 LLVM_DEBUG(dbgs() << " Found lowered global address: "
125 << *HiOp1.getGlobal() << "\n");
126 } else {
127 assert(HiOp1.isCPI());
128 LLVM_DEBUG(dbgs() << " Found lowered constant pool: " << HiOp1.getIndex()
129 << "\n");
130 }
131
132 return true;
133}
134
135// Update the offset in Hi and Lo instructions.
136// Delete the tail instruction and update all the uses to use the
137// output from Lo.
138void RISCVMergeBaseOffsetOpt::foldOffset(MachineInstr &Hi, MachineInstr &Lo,
139 MachineInstr &Tail, int64_t Offset) {
140 assert(isInt<32>(Offset) && "Unexpected offset");
141 // Put the offset back in Hi and the Lo
142 Hi.getOperand(1).setOffset(Offset);
143 if (Hi.getOpcode() != RISCV::AUIPC)
144 Lo.getOperand(2).setOffset(Offset);
145 // Delete the tail instruction.
146 MRI->replaceRegWith(Tail.getOperand(0).getReg(), Lo.getOperand(0).getReg());
147 Tail.eraseFromParent();
148 LLVM_DEBUG(dbgs() << " Merged offset " << Offset << " into base.\n"
149 << " " << Hi << " " << Lo;);
150}
151
152// Detect patterns for large offsets that are passed into an ADD instruction.
153// If the pattern is found, updates the offset in Hi and Lo instructions
154// and deletes TailAdd and the instructions that produced the offset.
155//
156// Base address lowering is of the form:
157// Hi: lui vreg1, %hi(s)
158// Lo: addi vreg2, vreg1, %lo(s)
159// / \
160// / \
161// / \
162// / The large offset can be of two forms: \
163// 1) Offset that has non zero bits in lower 2) Offset that has non zero
164// 12 bits and upper 20 bits bits in upper 20 bits only
165// OffseLUI: lui vreg3, 4
166// OffsetTail: addi voff, vreg3, 188 OffsetTail: lui voff, 128
167// \ /
168// \ /
169// \ /
170// \ /
171// TailAdd: add vreg4, vreg2, voff
172bool RISCVMergeBaseOffsetOpt::foldLargeOffset(MachineInstr &Hi,
174 MachineInstr &TailAdd,
175 Register GAReg) {
176 assert((TailAdd.getOpcode() == RISCV::ADD) && "Expected ADD instruction!");
177 Register Rs = TailAdd.getOperand(1).getReg();
178 Register Rt = TailAdd.getOperand(2).getReg();
179 Register Reg = Rs == GAReg ? Rt : Rs;
180
181 // Can't fold if the register has more than one use.
182 if (!MRI->hasOneUse(Reg))
183 return false;
184 // This can point to an ADDI(W) or a LUI:
185 MachineInstr &OffsetTail = *MRI->getVRegDef(Reg);
186 if (OffsetTail.getOpcode() == RISCV::ADDI ||
187 OffsetTail.getOpcode() == RISCV::ADDIW) {
188 // The offset value has non zero bits in both %hi and %lo parts.
189 // Detect an ADDI that feeds from a LUI instruction.
190 MachineOperand &AddiImmOp = OffsetTail.getOperand(2);
191 if (AddiImmOp.getTargetFlags() != RISCVII::MO_None)
192 return false;
193 int64_t OffLo = AddiImmOp.getImm();
194 MachineInstr &OffsetLui =
195 *MRI->getVRegDef(OffsetTail.getOperand(1).getReg());
196 MachineOperand &LuiImmOp = OffsetLui.getOperand(1);
197 if (OffsetLui.getOpcode() != RISCV::LUI ||
198 LuiImmOp.getTargetFlags() != RISCVII::MO_None ||
199 !MRI->hasOneUse(OffsetLui.getOperand(0).getReg()))
200 return false;
201 int64_t Offset = SignExtend64<32>(LuiImmOp.getImm() << 12);
202 Offset += OffLo;
203 // RV32 ignores the upper 32 bits. ADDIW sign extends the result.
204 if (!ST->is64Bit() || OffsetTail.getOpcode() == RISCV::ADDIW)
205 Offset = SignExtend64<32>(Offset);
206 // We can only fold simm32 offsets.
207 if (!isInt<32>(Offset))
208 return false;
209 LLVM_DEBUG(dbgs() << " Offset Instrs: " << OffsetTail
210 << " " << OffsetLui);
211 foldOffset(Hi, Lo, TailAdd, Offset);
212 OffsetTail.eraseFromParent();
213 OffsetLui.eraseFromParent();
214 return true;
215 } else if (OffsetTail.getOpcode() == RISCV::LUI) {
216 // The offset value has all zero bits in the lower 12 bits. Only LUI
217 // exists.
218 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
219 int64_t Offset = SignExtend64<32>(OffsetTail.getOperand(1).getImm() << 12);
220 foldOffset(Hi, Lo, TailAdd, Offset);
221 OffsetTail.eraseFromParent();
222 return true;
223 }
224 return false;
225}
226
227// Detect patterns for offsets that are passed into a SHXADD instruction.
228// The offset has 1, 2, or 3 trailing zeros and fits in simm13, simm14, simm15.
229// The constant is created with addi voff, x0, C, and shXadd is used to
230// fill insert the trailing zeros and do the addition.
231// If the pattern is found, updates the offset in Hi and Lo instructions
232// and deletes TailShXAdd and the instructions that produced the offset.
233//
234// Hi: lui vreg1, %hi(s)
235// Lo: addi vreg2, vreg1, %lo(s)
236// OffsetTail: addi voff, x0, C
237// TailAdd: shXadd vreg4, voff, vreg2
238bool RISCVMergeBaseOffsetOpt::foldShiftedOffset(MachineInstr &Hi,
240 MachineInstr &TailShXAdd,
241 Register GAReg) {
242 assert((TailShXAdd.getOpcode() == RISCV::SH1ADD ||
243 TailShXAdd.getOpcode() == RISCV::SH2ADD ||
244 TailShXAdd.getOpcode() == RISCV::SH3ADD) &&
245 "Expected SHXADD instruction!");
246
247 // The first source is the shifted operand.
248 Register Rs1 = TailShXAdd.getOperand(1).getReg();
249
250 if (GAReg != TailShXAdd.getOperand(2).getReg())
251 return false;
252
253 // Can't fold if the register has more than one use.
254 if (!MRI->hasOneUse(Rs1))
255 return false;
256 // This can point to an ADDI X0, C.
257 MachineInstr &OffsetTail = *MRI->getVRegDef(Rs1);
258 if (OffsetTail.getOpcode() != RISCV::ADDI)
259 return false;
260 if (!OffsetTail.getOperand(1).isReg() ||
261 OffsetTail.getOperand(1).getReg() != RISCV::X0 ||
262 !OffsetTail.getOperand(2).isImm())
263 return false;
264
265 int64_t Offset = OffsetTail.getOperand(2).getImm();
266 assert(isInt<12>(Offset) && "Unexpected offset");
267
268 unsigned ShAmt;
269 switch (TailShXAdd.getOpcode()) {
270 default: llvm_unreachable("Unexpected opcode");
271 case RISCV::SH1ADD: ShAmt = 1; break;
272 case RISCV::SH2ADD: ShAmt = 2; break;
273 case RISCV::SH3ADD: ShAmt = 3; break;
274 }
275
276 Offset = (uint64_t)Offset << ShAmt;
277
278 LLVM_DEBUG(dbgs() << " Offset Instr: " << OffsetTail);
279 foldOffset(Hi, Lo, TailShXAdd, Offset);
280 OffsetTail.eraseFromParent();
281 return true;
282}
283
284bool RISCVMergeBaseOffsetOpt::detectAndFoldOffset(MachineInstr &Hi,
285 MachineInstr &Lo) {
286 Register DestReg = Lo.getOperand(0).getReg();
287
288 // Look for arithmetic instructions we can get an offset from.
289 // We might be able to remove the arithmetic instructions by folding the
290 // offset into the LUI+ADDI.
291 if (!MRI->hasOneUse(DestReg))
292 return false;
293
294 // Lo has only one use.
295 MachineInstr &Tail = *MRI->use_instr_begin(DestReg);
296 switch (Tail.getOpcode()) {
297 default:
298 LLVM_DEBUG(dbgs() << "Don't know how to get offset from this instr:"
299 << Tail);
300 break;
301 case RISCV::ADDI: {
302 // Offset is simply an immediate operand.
303 int64_t Offset = Tail.getOperand(2).getImm();
304
305 // We might have two ADDIs in a row.
306 Register TailDestReg = Tail.getOperand(0).getReg();
307 if (MRI->hasOneUse(TailDestReg)) {
308 MachineInstr &TailTail = *MRI->use_instr_begin(TailDestReg);
309 if (TailTail.getOpcode() == RISCV::ADDI) {
310 Offset += TailTail.getOperand(2).getImm();
311 LLVM_DEBUG(dbgs() << " Offset Instrs: " << Tail << TailTail);
312 foldOffset(Hi, Lo, TailTail, Offset);
313 Tail.eraseFromParent();
314 return true;
315 }
316 }
317
318 LLVM_DEBUG(dbgs() << " Offset Instr: " << Tail);
319 foldOffset(Hi, Lo, Tail, Offset);
320 return true;
321 }
322 case RISCV::ADD:
323 // The offset is too large to fit in the immediate field of ADDI.
324 // This can be in two forms:
325 // 1) LUI hi_Offset followed by:
326 // ADDI lo_offset
327 // This happens in case the offset has non zero bits in
328 // both hi 20 and lo 12 bits.
329 // 2) LUI (offset20)
330 // This happens in case the lower 12 bits of the offset are zeros.
331 return foldLargeOffset(Hi, Lo, Tail, DestReg);
332 case RISCV::SH1ADD:
333 case RISCV::SH2ADD:
334 case RISCV::SH3ADD:
335 // The offset is too large to fit in the immediate field of ADDI.
336 // It may be encoded as (SH2ADD (ADDI X0, C), DestReg) or
337 // (SH3ADD (ADDI X0, C), DestReg).
338 return foldShiftedOffset(Hi, Lo, Tail, DestReg);
339 }
340
341 return false;
342}
343
344bool RISCVMergeBaseOffsetOpt::foldIntoMemoryOps(MachineInstr &Hi,
345 MachineInstr &Lo) {
346 Register DestReg = Lo.getOperand(0).getReg();
347
348 // If all the uses are memory ops with the same offset, we can transform:
349 //
350 // 1. (medlow pattern):
351 // Hi: lui vreg1, %hi(foo) ---> lui vreg1, %hi(foo+8)
352 // Lo: addi vreg2, vreg1, %lo(foo) ---> lw vreg3, lo(foo+8)(vreg1)
353 // Tail: lw vreg3, 8(vreg2)
354 //
355 // 2. (medany pattern):
356 // Hi: 1:auipc vreg1, %pcrel_hi(s) ---> auipc vreg1, %pcrel_hi(foo+8)
357 // Lo: addi vreg2, vreg1, %pcrel_lo(1b) ---> lw vreg3, %pcrel_lo(1b)(vreg1)
358 // Tail: lw vreg3, 8(vreg2)
359
360 std::optional<int64_t> CommonOffset;
361 for (const MachineInstr &UseMI : MRI->use_instructions(DestReg)) {
362 switch (UseMI.getOpcode()) {
363 default:
364 LLVM_DEBUG(dbgs() << "Not a load or store instruction: " << UseMI);
365 return false;
366 case RISCV::LB:
367 case RISCV::LH:
368 case RISCV::LW:
369 case RISCV::LBU:
370 case RISCV::LHU:
371 case RISCV::LWU:
372 case RISCV::LD:
373 case RISCV::FLH:
374 case RISCV::FLW:
375 case RISCV::FLD:
376 case RISCV::SB:
377 case RISCV::SH:
378 case RISCV::SW:
379 case RISCV::SD:
380 case RISCV::FSH:
381 case RISCV::FSW:
382 case RISCV::FSD: {
383 if (UseMI.getOperand(1).isFI())
384 return false;
385 // Register defined by Lo should not be the value register.
386 if (DestReg == UseMI.getOperand(0).getReg())
387 return false;
388 assert(DestReg == UseMI.getOperand(1).getReg() &&
389 "Expected base address use");
390 // All load/store instructions must use the same offset.
391 int64_t Offset = UseMI.getOperand(2).getImm();
392 if (CommonOffset && Offset != CommonOffset)
393 return false;
394 CommonOffset = Offset;
395 }
396 }
397 }
398
399 // We found a common offset.
400 // Update the offsets in global address lowering.
401 // We may have already folded some arithmetic so we need to add to any
402 // existing offset.
403 int64_t NewOffset = Hi.getOperand(1).getOffset() + *CommonOffset;
404 // RV32 ignores the upper 32 bits.
405 if (!ST->is64Bit())
406 NewOffset = SignExtend64<32>(NewOffset);
407 // We can only fold simm32 offsets.
408 if (!isInt<32>(NewOffset))
409 return false;
410
411 Hi.getOperand(1).setOffset(NewOffset);
412 MachineOperand &ImmOp = Lo.getOperand(2);
413 if (Hi.getOpcode() != RISCV::AUIPC)
414 ImmOp.setOffset(NewOffset);
415
416 // Update the immediate in the load/store instructions to add the offset.
417 for (MachineInstr &UseMI :
418 llvm::make_early_inc_range(MRI->use_instructions(DestReg))) {
419 UseMI.removeOperand(2);
420 UseMI.addOperand(ImmOp);
421 // Update the base reg in the Tail instruction to feed from LUI.
422 // Output of Hi is only used in Lo, no need to use MRI->replaceRegWith().
423 UseMI.getOperand(1).setReg(Hi.getOperand(0).getReg());
424 }
425
426 Lo.eraseFromParent();
427 return true;
428}
429
430bool RISCVMergeBaseOffsetOpt::runOnMachineFunction(MachineFunction &Fn) {
431 if (skipFunction(Fn.getFunction()))
432 return false;
433
435
436 bool MadeChange = false;
437 MRI = &Fn.getRegInfo();
438 for (MachineBasicBlock &MBB : Fn) {
439 LLVM_DEBUG(dbgs() << "MBB: " << MBB.getName() << "\n");
440 for (MachineInstr &Hi : MBB) {
441 MachineInstr *Lo = nullptr;
442 if (!detectFoldable(Hi, Lo))
443 continue;
444 MadeChange |= detectAndFoldOffset(Hi, *Lo);
445 MadeChange |= foldIntoMemoryOps(Hi, *Lo);
446 }
447 }
448
449 return MadeChange;
450}
451
452/// Returns an instance of the Merge Base Offset Optimization pass.
454 return new RISCVMergeBaseOffsetOpt();
455}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
MachineBasicBlock & MBB
#define LLVM_DEBUG(X)
Definition: Debug.h:101
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
#define RISCV_MERGE_BASE_OFFSET_NAME
#define DEBUG_TYPE
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:265
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
virtual MachineFunctionProperties getRequiredProperties() const
Properties which a MachineFunction may have at a given point in time.
MachineFunctionProperties & set(Property P)
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Register getReg(unsigned Idx) const
Get the register for the operand index.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isCPI() const
isCPI - Tests if this is a MO_ConstantPoolIndex operand.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void setOffset(int64_t Offset)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
MachineOperandType getType() const
getType - Returns the MachineOperandType for this operand.
Register getReg() const
getReg - Returns the register number.
@ MO_MCSymbol
MCSymbol reference (for debug/eh info)
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
Reg
All possible values of the reg field in the ModR/M byte.
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:406
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:732
FunctionPass * createRISCVMergeBaseOffsetOptPass()
Returns an instance of the Merge Base Offset Optimization pass.
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163