LLVM 19.0.0git
RISCVCodeGenPrepare.cpp
Go to the documentation of this file.
1//===----- RISCVCodeGenPrepare.cpp ----------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This is a RISC-V specific version of CodeGenPrepare.
10// It munges the code in the input function to better prepare it for
11// SelectionDAG-based code generation. This works around limitations in it's
12// basic-block-at-a-time approach.
13//
14//===----------------------------------------------------------------------===//
15
16#include "RISCV.h"
17#include "RISCVTargetMachine.h"
18#include "llvm/ADT/Statistic.h"
21#include "llvm/IR/Dominators.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/InstVisitor.h"
24#include "llvm/IR/Intrinsics.h"
25#include "llvm/IR/IntrinsicsRISCV.h"
28#include "llvm/Pass.h"
29
30using namespace llvm;
31
32#define DEBUG_TYPE "riscv-codegenprepare"
33#define PASS_NAME "RISC-V CodeGenPrepare"
34
35namespace {
36
37class RISCVCodeGenPrepare : public FunctionPass,
38 public InstVisitor<RISCVCodeGenPrepare, bool> {
39 const DataLayout *DL;
40 const DominatorTree *DT;
41 const RISCVSubtarget *ST;
42
43public:
44 static char ID;
45
46 RISCVCodeGenPrepare() : FunctionPass(ID) {}
47
48 bool runOnFunction(Function &F) override;
49
50 StringRef getPassName() const override { return PASS_NAME; }
51
52 void getAnalysisUsage(AnalysisUsage &AU) const override {
53 AU.setPreservesCFG();
56 }
57
58 bool visitInstruction(Instruction &I) { return false; }
59 bool visitAnd(BinaryOperator &BO);
61 bool expandVPStrideLoad(IntrinsicInst &I);
62};
63
64} // end anonymous namespace
65
66// Try to optimize (i64 (and (zext/sext (i32 X), C1))) if C1 has bit 31 set,
67// but bits 63:32 are zero. If we know that bit 31 of X is 0, we can fill
68// the upper 32 bits with ones.
69bool RISCVCodeGenPrepare::visitAnd(BinaryOperator &BO) {
70 if (!ST->is64Bit())
71 return false;
72
73 if (!BO.getType()->isIntegerTy(64))
74 return false;
75
76 using namespace PatternMatch;
77
78 // Left hand side should be a zext nneg.
79 Value *LHSSrc;
80 if (!match(BO.getOperand(0), m_NNegZExt(m_Value(LHSSrc))))
81 return false;
82
83 if (!LHSSrc->getType()->isIntegerTy(32))
84 return false;
85
86 // Right hand side should be a constant.
87 Value *RHS = BO.getOperand(1);
88
89 auto *CI = dyn_cast<ConstantInt>(RHS);
90 if (!CI)
91 return false;
92 uint64_t C = CI->getZExtValue();
93
94 // Look for constants that fit in 32 bits but not simm12, and can be made
95 // into simm12 by sign extending bit 31. This will allow use of ANDI.
96 // TODO: Is worth making simm32?
97 if (!isUInt<32>(C) || isInt<12>(C) || !isInt<12>(SignExtend64<32>(C)))
98 return false;
99
100 // Sign extend the constant and replace the And operand.
101 C = SignExtend64<32>(C);
102 BO.setOperand(1, ConstantInt::get(RHS->getType(), C));
103
104 return true;
105}
106
107// LLVM vector reduction intrinsics return a scalar result, but on RISC-V vector
108// reduction instructions write the result in the first element of a vector
109// register. So when a reduction in a loop uses a scalar phi, we end up with
110// unnecessary scalar moves:
111//
112// loop:
113// vfmv.s.f v10, fa0
114// vfredosum.vs v8, v8, v10
115// vfmv.f.s fa0, v8
116//
117// This mainly affects ordered fadd reductions, since other types of reduction
118// typically use element-wise vectorisation in the loop body. This tries to
119// vectorize any scalar phis that feed into a fadd reduction:
120//
121// loop:
122// %phi = phi <float> [ ..., %entry ], [ %acc, %loop ]
123// %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %phi, <vscale x 2 x float> %vec)
124//
125// ->
126//
127// loop:
128// %phi = phi <vscale x 2 x float> [ ..., %entry ], [ %acc.vec, %loop ]
129// %phi.scalar = extractelement <vscale x 2 x float> %phi, i64 0
130// %acc = call float @llvm.vector.reduce.fadd.nxv4f32(float %x, <vscale x 2 x float> %vec)
131// %acc.vec = insertelement <vscale x 2 x float> poison, float %acc.next, i64 0
132//
133// Which eliminates the scalar -> vector -> scalar crossing during instruction
134// selection.
135bool RISCVCodeGenPrepare::visitIntrinsicInst(IntrinsicInst &I) {
136 if (expandVPStrideLoad(I))
137 return true;
138
139 if (I.getIntrinsicID() != Intrinsic::vector_reduce_fadd)
140 return false;
141
142 auto *PHI = dyn_cast<PHINode>(I.getOperand(0));
143 if (!PHI || !PHI->hasOneUse() ||
144 !llvm::is_contained(PHI->incoming_values(), &I))
145 return false;
146
147 Type *VecTy = I.getOperand(1)->getType();
148 IRBuilder<> Builder(PHI);
149 auto *VecPHI = Builder.CreatePHI(VecTy, PHI->getNumIncomingValues());
150
151 for (auto *BB : PHI->blocks()) {
152 Builder.SetInsertPoint(BB->getTerminator());
153 Value *InsertElt = Builder.CreateInsertElement(
154 VecTy, PHI->getIncomingValueForBlock(BB), (uint64_t)0);
155 VecPHI->addIncoming(InsertElt, BB);
156 }
157
158 Builder.SetInsertPoint(&I);
159 I.setOperand(0, Builder.CreateExtractElement(VecPHI, (uint64_t)0));
160
161 PHI->eraseFromParent();
162
163 return true;
164}
165
166// Always expand zero strided loads so we match more .vx splat patterns, even if
167// we have +optimized-zero-stride-loads. RISCVDAGToDAGISel::Select will convert
168// it back to a strided load if it's optimized.
169bool RISCVCodeGenPrepare::expandVPStrideLoad(IntrinsicInst &II) {
170 Value *BasePtr, *VL;
171
172 using namespace PatternMatch;
173 if (!match(&II, m_Intrinsic<Intrinsic::experimental_vp_strided_load>(
174 m_Value(BasePtr), m_Zero(), m_AllOnes(), m_Value(VL))))
175 return false;
176
177 // If SEW>XLEN then a splat will get lowered as a zero strided load anyway, so
178 // avoid expanding here.
179 if (II.getType()->getScalarSizeInBits() > ST->getXLen())
180 return false;
181
182 if (!isKnownNonZero(VL, {*DL, DT, nullptr, &II}))
183 return false;
184
185 auto *VTy = cast<VectorType>(II.getType());
186
187 IRBuilder<> Builder(&II);
188
189 // Extend VL from i32 to XLen if needed.
190 if (ST->is64Bit())
191 VL = Builder.CreateZExt(VL, Builder.getInt64Ty());
192
193 Type *STy = VTy->getElementType();
194 Value *Val = Builder.CreateLoad(STy, BasePtr);
195 const auto &TLI = *ST->getTargetLowering();
196 Value *Res;
197
198 // TODO: Also support fixed/illegal vector types to splat with evl = vl.
199 if (isa<ScalableVectorType>(VTy) && TLI.isTypeLegal(EVT::getEVT(VTy))) {
200 unsigned VMVOp = STy->isFloatingPointTy() ? Intrinsic::riscv_vfmv_v_f
201 : Intrinsic::riscv_vmv_v_x;
202 Res = Builder.CreateIntrinsic(VMVOp, {VTy, VL->getType()},
203 {PoisonValue::get(VTy), Val, VL});
204 } else {
205 Res = Builder.CreateVectorSplat(VTy->getElementCount(), Val);
206 }
207
208 II.replaceAllUsesWith(Res);
209 II.eraseFromParent();
210 return true;
211}
212
213bool RISCVCodeGenPrepare::runOnFunction(Function &F) {
214 if (skipFunction(F))
215 return false;
216
217 auto &TPC = getAnalysis<TargetPassConfig>();
218 auto &TM = TPC.getTM<RISCVTargetMachine>();
219 ST = &TM.getSubtarget<RISCVSubtarget>(F);
220
221 DL = &F.getDataLayout();
222 DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
223
224 bool MadeChange = false;
225 for (auto &BB : F)
227 MadeChange |= visit(I);
228
229 return MadeChange;
230}
231
232INITIALIZE_PASS_BEGIN(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
234INITIALIZE_PASS_END(RISCVCodeGenPrepare, DEBUG_TYPE, PASS_NAME, false, false)
235
236char RISCVCodeGenPrepare::ID = 0;
237
239 return new RISCVCodeGenPrepare();
240}
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
uint64_t IntrinsicInst * II
const char LLVMTargetMachineRef TM
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition: PassSupport.h:55
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:59
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:52
#define PASS_NAME
#define DEBUG_TYPE
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
Target-Independent Code Generator Pass Configuration Options pass.
#define PASS_NAME
Value * RHS
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:317
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:162
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
virtual bool runOnFunction(Function &F)=0
runOnFunction - Virtual method overriden by subclasses to do the per-function processing of the pass.
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition: IRBuilder.h:2671
Base class for instruction visitors.
Definition: InstVisitor.h:78
RetTy visitIntrinsicInst(IntrinsicInst &I)
Definition: InstVisitor.h:219
void visitInstruction(Instruction &I)
Definition: InstVisitor.h:280
A wrapper class for inspecting calls to intrinsic functions.
Definition: IntrinsicInst.h:48
virtual void getAnalysisUsage(AnalysisUsage &) const
getAnalysisUsage - This function should be overriden by passes that need analysis information to do t...
Definition: Pass.cpp:98
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
static PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
Definition: Constants.cpp:1852
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
Target-Independent Code Generator Pass Configuration Options.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isFloatingPointTy() const
Return true if this is one of the floating-point types.
Definition: Type.h:185
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:228
void setOperand(unsigned i, Value *Val)
Definition: User.h:174
Value * getOperand(unsigned i) const
Definition: User.h:169
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
cst_pred_ty< is_all_ones > m_AllOnes()
Match an integer or vector with all bits set.
Definition: PatternMatch.h:524
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
NNegZExt_match< OpTy > m_NNegZExt(const OpTy &Op)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:92
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
Definition: PatternMatch.h:612
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:656
bool isKnownNonZero(const Value *V, const SimplifyQuery &Q, unsigned Depth=0)
Return true if the given value is known to be non-zero when defined.
FunctionPass * createRISCVCodeGenPreparePass()
bool is_contained(R &&Range, const E &Element)
Returns true if Element is found in Range.
Definition: STLExtras.h:1879
static EVT getEVT(Type *Ty, bool HandleUnknown=false)
Return the value type corresponding to the specified type.
Definition: ValueTypes.cpp:274