LLVM  14.0.0git
RISCVMatInt.cpp
Go to the documentation of this file.
1 //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "RISCVMatInt.h"
11 #include "llvm/ADT/APInt.h"
13 using namespace llvm;
14 
15 static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
16  if (!HasRVC)
17  return Res.size();
18 
19  int Cost = 0;
20  for (auto Instr : Res) {
21  bool Compressed;
22  switch (Instr.Opc) {
23  default:
24  llvm_unreachable("Unexpected opcode");
25  case RISCV::SLLI:
26  case RISCV::SRLI:
27  Compressed = true;
28  break;
29  case RISCV::ADDI:
30  case RISCV::ADDIW:
31  case RISCV::LUI:
32  Compressed = isInt<6>(Instr.Imm);
33  break;
34  case RISCV::ADDUW:
35  Compressed = false;
36  break;
37  }
38  // Two RVC instructions take the same space as one RVI instruction, but
39  // can take longer to execute than the single RVI instruction. Thus, we
40  // consider that two RVC instruction are slightly more costly than one
41  // RVI instruction. For longer sequences of RVC instructions the space
42  // savings can be worth it, though. The costs below try to model that.
43  if (!Compressed)
44  Cost += 100; // Baseline cost of one RVI instruction: 100%.
45  else
46  Cost += 70; // 70% cost of baseline.
47  }
48  return Cost;
49 }
50 
51 // Recursively generate a sequence for materializing an integer.
52 static void generateInstSeqImpl(int64_t Val,
53  const FeatureBitset &ActiveFeatures,
54  RISCVMatInt::InstSeq &Res) {
55  bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
56 
57  if (isInt<32>(Val)) {
58  // Depending on the active bits in the immediate Value v, the following
59  // instruction sequences are emitted:
60  //
61  // v == 0 : ADDI
62  // v[0,12) != 0 && v[12,32) == 0 : ADDI
63  // v[0,12) == 0 && v[12,32) != 0 : LUI
64  // v[0,32) != 0 : LUI+ADDI(W)
65  int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
66  int64_t Lo12 = SignExtend64<12>(Val);
67 
68  if (Hi20)
69  Res.push_back(RISCVMatInt::Inst(RISCV::LUI, Hi20));
70 
71  if (Lo12 || Hi20 == 0) {
72  unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
73  Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
74  }
75  return;
76  }
77 
78  assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
79 
80  // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
81  // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
82  // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
83  // while the following ADDI instructions contribute up to 12 bits each.
84  //
85  // On the first glance, implementing this seems to be possible by simply
86  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
87  // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
88  // fact that ADDI performs a sign extended addition, doing it like that would
89  // only be possible when at most 11 bits of the ADDI instructions are used.
90  // Using all 12 bits of the ADDI instructions, like done by GAS, actually
91  // requires that the constant is processed starting with the least significant
92  // bit.
93  //
94  // In the following, constants are processed from LSB to MSB but instruction
95  // emission is performed from MSB to LSB by recursively calling
96  // generateInstSeq. In each recursion, first the lowest 12 bits are removed
97  // from the constant and the optimal shift amount, which can be greater than
98  // 12 bits if the constant is sparse, is determined. Then, the shifted
99  // remaining constant is processed recursively and gets emitted as soon as it
100  // fits into 32 bits. The emission of the shifts and additions is subsequently
101  // performed when the recursion returns.
102 
103  int64_t Lo12 = SignExtend64<12>(Val);
104  int64_t Hi52 = ((uint64_t)Val + 0x800ull) >> 12;
105  int ShiftAmount = 12 + findFirstSet((uint64_t)Hi52);
106  Hi52 = SignExtend64(Hi52 >> (ShiftAmount - 12), 64 - ShiftAmount);
107 
108  // If the remaining bits don't fit in 12 bits, we might be able to reduce the
109  // shift amount in order to use LUI which will zero the lower 12 bits.
110  bool Unsigned = false;
111  if (ShiftAmount > 12 && !isInt<12>(Hi52)) {
112  if (isInt<32>((uint64_t)Hi52 << 12)) {
113  // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
114  ShiftAmount -= 12;
115  Hi52 = (uint64_t)Hi52 << 12;
116  } else if (isUInt<32>((uint64_t)Hi52 << 12) &&
117  ActiveFeatures[RISCV::FeatureStdExtZba]) {
118  // Reduce the shift amount and add zeros to the LSBs so it will match
119  // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
120  ShiftAmount -= 12;
121  Hi52 = ((uint64_t)Hi52 << 12) | (0xffffffffull << 32);
122  Unsigned = true;
123  }
124  }
125 
126  // Try to use SLLIUW for Hi52 when it is uint32 but not int32.
127  if (isUInt<32>((uint64_t)Hi52) && !isInt<32>((uint64_t)Hi52) &&
128  ActiveFeatures[RISCV::FeatureStdExtZba]) {
129  // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with SLLIUW.
130  Hi52 = ((uint64_t)Hi52) | (0xffffffffull << 32);
131  Unsigned = true;
132  }
133 
134  generateInstSeqImpl(Hi52, ActiveFeatures, Res);
135 
136  if (Unsigned)
137  Res.push_back(RISCVMatInt::Inst(RISCV::SLLIUW, ShiftAmount));
138  else
139  Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
140  if (Lo12)
141  Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
142 }
143 
144 namespace llvm {
145 namespace RISCVMatInt {
146 InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
148  generateInstSeqImpl(Val, ActiveFeatures, Res);
149 
150  // If the constant is positive we might be able to generate a shifted constant
151  // with no leading zeros and use a final SRLI to restore them.
152  if (Val > 0 && Res.size() > 2) {
153  assert(ActiveFeatures[RISCV::Feature64Bit] &&
154  "Expected RV32 to only need 2 instructions");
155  unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
156  uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
157  // Fill in the bits that will be shifted out with 1s. An example where this
158  // helps is trailing one masks with 32 or more ones. This will generate
159  // ADDI -1 and an SRLI.
160  ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
161 
162  RISCVMatInt::InstSeq TmpSeq;
163  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
164  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
165 
166  // Keep the new sequence if it is an improvement.
167  if (TmpSeq.size() < Res.size()) {
168  Res = TmpSeq;
169  // A 2 instruction sequence is the best we can do.
170  if (Res.size() <= 2)
171  return Res;
172  }
173 
174  // Some cases can benefit from filling the lower bits with zeros instead.
175  ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
176  TmpSeq.clear();
177  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
178  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
179 
180  // Keep the new sequence if it is an improvement.
181  if (TmpSeq.size() < Res.size()) {
182  Res = TmpSeq;
183  // A 2 instruction sequence is the best we can do.
184  if (Res.size() <= 2)
185  return Res;
186  }
187 
188  // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
189  // the end of the sequence.
190  if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
191  // Try replacing upper bits with 1.
192  uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
193  TmpSeq.clear();
194  generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
195  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDUW, 0));
196 
197  // Keep the new sequence if it is an improvement.
198  if (TmpSeq.size() < Res.size()) {
199  Res = TmpSeq;
200  // A 2 instruction sequence is the best we can do.
201  if (Res.size() <= 2)
202  return Res;
203  }
204  }
205  }
206 
207  // Perform optimization with BCLRI/BSETI in the Zbs extension.
208  if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
209  assert(ActiveFeatures[RISCV::Feature64Bit] &&
210  "Expected RV32 to only need 2 instructions");
211 
212  // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
213  // call generateInstSeqImpl with Val|0x80000000 (which is expected be
214  // an int32), then emit (BCLRI r, 31).
215  // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
216  // with Val&~0x80000000 (which is expected to be an int32), then
217  // emit (BSETI r, 31).
218  int64_t NewVal;
219  unsigned Opc;
220  if (Val < 0) {
221  Opc = RISCV::BCLRI;
222  NewVal = Val | 0x80000000ll;
223  } else {
224  Opc = RISCV::BSETI;
225  NewVal = Val & ~0x80000000ll;
226  }
227  if (isInt<32>(NewVal)) {
228  RISCVMatInt::InstSeq TmpSeq;
229  generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
230  TmpSeq.push_back(RISCVMatInt::Inst(Opc, 31));
231  if (TmpSeq.size() < Res.size())
232  Res = TmpSeq;
233  }
234 
235  // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
236  // negative int32, or use BSETI for upper 32 bits if the original lower
237  // 32 bits are positive int32.
238  int32_t Lo = Val;
239  uint32_t Hi = Val >> 32;
240  Opc = 0;
241  RISCVMatInt::InstSeq TmpSeq;
242  generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
243  // Check if it is profitable to use BCLRI/BSETI.
244  if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
245  Opc = RISCV::BSETI;
246  } else if (Lo < 0 && TmpSeq.size() + countPopulation(~Hi) < Res.size()) {
247  Opc = RISCV::BCLRI;
248  Hi = ~Hi;
249  }
250  // Search for each bit and build corresponding BCLRI/BSETI.
251  if (Opc > 0) {
252  while (Hi != 0) {
253  unsigned Bit = countTrailingZeros(Hi);
254  TmpSeq.push_back(RISCVMatInt::Inst(Opc, Bit + 32));
255  Hi &= ~(1 << Bit);
256  }
257  if (TmpSeq.size() < Res.size())
258  Res = TmpSeq;
259  }
260  }
261 
262  // Perform optimization with SH*ADD in the Zba extension.
263  if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
264  assert(ActiveFeatures[RISCV::Feature64Bit] &&
265  "Expected RV32 to only need 2 instructions");
266  int64_t Div = 0;
267  unsigned Opc = 0;
268  RISCVMatInt::InstSeq TmpSeq;
269  // Select the opcode and divisor.
270  if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
271  Div = 3;
272  Opc = RISCV::SH1ADD;
273  } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
274  Div = 5;
275  Opc = RISCV::SH2ADD;
276  } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
277  Div = 9;
278  Opc = RISCV::SH3ADD;
279  }
280  // Build the new instruction sequence.
281  if (Div > 0) {
282  generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
283  TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
284  if (TmpSeq.size() < Res.size())
285  Res = TmpSeq;
286  }
287  // Try to use LUI+SH*ADD+ADDI.
288  int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
289  int64_t Lo12 = SignExtend64<12>(Val);
290  Div = 0;
291  if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
292  Div = 3;
293  Opc = RISCV::SH1ADD;
294  } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
295  Div = 5;
296  Opc = RISCV::SH2ADD;
297  } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
298  Div = 9;
299  Opc = RISCV::SH3ADD;
300  }
301  // Build the new instruction sequence.
302  if (Div > 0) {
303  // For Val that has zero Lo12 (implies Val equals to Hi52) should has
304  // already been processed to LUI+SH*ADD by previous optimization.
305  assert(Lo12 != 0 &&
306  "unexpected instruction sequence for immediate materialisation");
307  generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
308  TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
309  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
310  if (TmpSeq.size() < Res.size())
311  Res = TmpSeq;
312  }
313  }
314 
315  return Res;
316 }
317 
318 int getIntMatCost(const APInt &Val, unsigned Size,
319  const FeatureBitset &ActiveFeatures, bool CompressionCost) {
320  bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
321  bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
322  int PlatRegSize = IsRV64 ? 64 : 32;
323 
324  // Split the constant into platform register sized chunks, and calculate cost
325  // of each chunk.
326  int Cost = 0;
327  for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
328  APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
329  InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
330  Cost += getInstSeqCost(MatSeq, HasRVC);
331  }
332  return std::max(1, Cost);
333 }
334 } // namespace RISCVMatInt
335 } // namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
llvm::RISCVMatInt::Inst
Definition: RISCVMatInt.h:21
llvm::findFirstSet
T findFirstSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the first set bit starting from the least significant bit.
Definition: MathExtras.h:239
llvm::RISCVMatInt::getIntMatCost
int getIntMatCost(const APInt &Val, unsigned Size, const FeatureBitset &ActiveFeatures, bool CompressionCost)
Definition: RISCVMatInt.cpp:318
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1474
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::RISCVMatInt::generateInstSeq
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures)
Definition: RISCVMatInt.cpp:146
APInt.h
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
RISCVMatInt.h
generateInstSeqImpl
static void generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures, RISCVMatInt::InstSeq &Res)
Definition: RISCVMatInt.cpp:52
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
getInstSeqCost
static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC)
Definition: RISCVMatInt.cpp:15
ll
Analysis the ScalarEvolution expression for r is< loop > Outside the this could be evaluated simply however ScalarEvolution currently evaluates it it involves i65 which is very inefficient when expanded into code In formatValue in test CodeGen X86 lsr delayed fold ll
Definition: README.txt:20
RISCVMCTargetDesc.h
llvm::APInt::ashr
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:791
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
uint64_t
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
uint32_t
Unsigned
@ Unsigned
Definition: NVPTXISelLowering.cpp:4632
llvm::SignExtend64
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:777
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:978
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50