LLVM  15.0.0git
RISCVMatInt.cpp
Go to the documentation of this file.
1 //===- RISCVMatInt.cpp - Immediate materialisation -------------*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #include "RISCVMatInt.h"
11 #include "llvm/ADT/APInt.h"
13 using namespace llvm;
14 
15 static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC) {
16  if (!HasRVC)
17  return Res.size();
18 
19  int Cost = 0;
20  for (auto Instr : Res) {
21  // Assume instructions that aren't listed aren't compressible.
22  bool Compressed = false;
23  switch (Instr.Opc) {
24  case RISCV::SLLI:
25  case RISCV::SRLI:
26  Compressed = true;
27  break;
28  case RISCV::ADDI:
29  case RISCV::ADDIW:
30  case RISCV::LUI:
31  Compressed = isInt<6>(Instr.Imm);
32  break;
33  }
34  // Two RVC instructions take the same space as one RVI instruction, but
35  // can take longer to execute than the single RVI instruction. Thus, we
36  // consider that two RVC instruction are slightly more costly than one
37  // RVI instruction. For longer sequences of RVC instructions the space
38  // savings can be worth it, though. The costs below try to model that.
39  if (!Compressed)
40  Cost += 100; // Baseline cost of one RVI instruction: 100%.
41  else
42  Cost += 70; // 70% cost of baseline.
43  }
44  return Cost;
45 }
46 
47 // Recursively generate a sequence for materializing an integer.
48 static void generateInstSeqImpl(int64_t Val,
49  const FeatureBitset &ActiveFeatures,
50  RISCVMatInt::InstSeq &Res) {
51  bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
52 
53  if (isInt<32>(Val)) {
54  // Depending on the active bits in the immediate Value v, the following
55  // instruction sequences are emitted:
56  //
57  // v == 0 : ADDI
58  // v[0,12) != 0 && v[12,32) == 0 : ADDI
59  // v[0,12) == 0 && v[12,32) != 0 : LUI
60  // v[0,32) != 0 : LUI+ADDI(W)
61  int64_t Hi20 = ((Val + 0x800) >> 12) & 0xFFFFF;
62  int64_t Lo12 = SignExtend64<12>(Val);
63 
64  if (Hi20)
65  Res.push_back(RISCVMatInt::Inst(RISCV::LUI, Hi20));
66 
67  if (Lo12 || Hi20 == 0) {
68  unsigned AddiOpc = (IsRV64 && Hi20) ? RISCV::ADDIW : RISCV::ADDI;
69  Res.push_back(RISCVMatInt::Inst(AddiOpc, Lo12));
70  }
71  return;
72  }
73 
74  assert(IsRV64 && "Can't emit >32-bit imm for non-RV64 target");
75 
76  // Use BSETI for a single bit.
77  if (ActiveFeatures[RISCV::FeatureStdExtZbs] && isPowerOf2_64(Val)) {
78  Res.push_back(RISCVMatInt::Inst(RISCV::BSETI, Log2_64(Val)));
79  return;
80  }
81 
82  // In the worst case, for a full 64-bit constant, a sequence of 8 instructions
83  // (i.e., LUI+ADDIW+SLLI+ADDI+SLLI+ADDI+SLLI+ADDI) has to be emitted. Note
84  // that the first two instructions (LUI+ADDIW) can contribute up to 32 bits
85  // while the following ADDI instructions contribute up to 12 bits each.
86  //
87  // On the first glance, implementing this seems to be possible by simply
88  // emitting the most significant 32 bits (LUI+ADDIW) followed by as many left
89  // shift (SLLI) and immediate additions (ADDI) as needed. However, due to the
90  // fact that ADDI performs a sign extended addition, doing it like that would
91  // only be possible when at most 11 bits of the ADDI instructions are used.
92  // Using all 12 bits of the ADDI instructions, like done by GAS, actually
93  // requires that the constant is processed starting with the least significant
94  // bit.
95  //
96  // In the following, constants are processed from LSB to MSB but instruction
97  // emission is performed from MSB to LSB by recursively calling
98  // generateInstSeq. In each recursion, first the lowest 12 bits are removed
99  // from the constant and the optimal shift amount, which can be greater than
100  // 12 bits if the constant is sparse, is determined. Then, the shifted
101  // remaining constant is processed recursively and gets emitted as soon as it
102  // fits into 32 bits. The emission of the shifts and additions is subsequently
103  // performed when the recursion returns.
104 
105  int64_t Lo12 = SignExtend64<12>(Val);
106  Val = (uint64_t)Val - (uint64_t)Lo12;
107 
108  int ShiftAmount = 0;
109  bool Unsigned = false;
110 
111  // Val might now be valid for LUI without needing a shift.
112  if (!isInt<32>(Val)) {
113  ShiftAmount = findFirstSet((uint64_t)Val);
114  Val >>= ShiftAmount;
115 
116  // If the remaining bits don't fit in 12 bits, we might be able to reduce the
117  // shift amount in order to use LUI which will zero the lower 12 bits.
118  if (ShiftAmount > 12 && !isInt<12>(Val)) {
119  if (isInt<32>((uint64_t)Val << 12)) {
120  // Reduce the shift amount and add zeros to the LSBs so it will match LUI.
121  ShiftAmount -= 12;
122  Val = (uint64_t)Val << 12;
123  } else if (isUInt<32>((uint64_t)Val << 12) &&
124  ActiveFeatures[RISCV::FeatureStdExtZba]) {
125  // Reduce the shift amount and add zeros to the LSBs so it will match
126  // LUI, then shift left with SLLI.UW to clear the upper 32 set bits.
127  ShiftAmount -= 12;
128  Val = ((uint64_t)Val << 12) | (0xffffffffull << 32);
129  Unsigned = true;
130  }
131  }
132 
133  // Try to use SLLI_UW for Val when it is uint32 but not int32.
134  if (isUInt<32>((uint64_t)Val) && !isInt<32>((uint64_t)Val) &&
135  ActiveFeatures[RISCV::FeatureStdExtZba]) {
136  // Use LUI+ADDI or LUI to compose, then clear the upper 32 bits with
137  // SLLI_UW.
138  Val = ((uint64_t)Val) | (0xffffffffull << 32);
139  Unsigned = true;
140  }
141  }
142 
143  generateInstSeqImpl(Val, ActiveFeatures, Res);
144 
145  // Skip shift if we were able to use LUI directly.
146  if (ShiftAmount) {
147  if (Unsigned)
148  Res.push_back(RISCVMatInt::Inst(RISCV::SLLI_UW, ShiftAmount));
149  else
150  Res.push_back(RISCVMatInt::Inst(RISCV::SLLI, ShiftAmount));
151  }
152 
153  if (Lo12)
154  Res.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
155 }
156 
157 static unsigned extractRotateInfo(int64_t Val) {
158  // for case: 0b111..1..xxxxxx1..1..
159  unsigned LeadingOnes = countLeadingOnes((uint64_t)Val);
160  unsigned TrailingOnes = countTrailingOnes((uint64_t)Val);
161  if (TrailingOnes > 0 && TrailingOnes < 64 &&
162  (LeadingOnes + TrailingOnes) > (64 - 12))
163  return 64 - TrailingOnes;
164 
165  // for case: 0bxxx1..1..1...xxx
166  unsigned UpperTrailingOnes = countTrailingOnes(Hi_32(Val));
167  unsigned LowerLeadingOnes = countLeadingOnes(Lo_32(Val));
168  if (UpperTrailingOnes < 32 &&
169  (UpperTrailingOnes + LowerLeadingOnes) > (64 - 12))
170  return 32 - UpperTrailingOnes;
171 
172  return 0;
173 }
174 
175 namespace llvm {
176 namespace RISCVMatInt {
177 InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) {
179  generateInstSeqImpl(Val, ActiveFeatures, Res);
180 
181  // If there are trailing zeros, try generating a sign extended constant with
182  // no trailing zeros and use a final SLLI to restore them.
183  if ((Val & 1) == 0 && Res.size() > 2) {
184  unsigned TrailingZeros = countTrailingZeros((uint64_t)Val);
185  int64_t ShiftedVal = Val >> TrailingZeros;
186  RISCVMatInt::InstSeq TmpSeq;
187  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
188  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SLLI, TrailingZeros));
189 
190  // Keep the new sequence if it is an improvement.
191  if (TmpSeq.size() < Res.size()) {
192  Res = TmpSeq;
193  // A 2 instruction sequence is the best we can do.
194  if (Res.size() <= 2)
195  return Res;
196  }
197  }
198 
199  // If the constant is positive we might be able to generate a shifted constant
200  // with no leading zeros and use a final SRLI to restore them.
201  if (Val > 0 && Res.size() > 2) {
202  assert(ActiveFeatures[RISCV::Feature64Bit] &&
203  "Expected RV32 to only need 2 instructions");
204  unsigned LeadingZeros = countLeadingZeros((uint64_t)Val);
205  uint64_t ShiftedVal = (uint64_t)Val << LeadingZeros;
206  // Fill in the bits that will be shifted out with 1s. An example where this
207  // helps is trailing one masks with 32 or more ones. This will generate
208  // ADDI -1 and an SRLI.
209  ShiftedVal |= maskTrailingOnes<uint64_t>(LeadingZeros);
210 
211  RISCVMatInt::InstSeq TmpSeq;
212  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
213  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
214 
215  // Keep the new sequence if it is an improvement.
216  if (TmpSeq.size() < Res.size()) {
217  Res = TmpSeq;
218  // A 2 instruction sequence is the best we can do.
219  if (Res.size() <= 2)
220  return Res;
221  }
222 
223  // Some cases can benefit from filling the lower bits with zeros instead.
224  ShiftedVal &= maskTrailingZeros<uint64_t>(LeadingZeros);
225  TmpSeq.clear();
226  generateInstSeqImpl(ShiftedVal, ActiveFeatures, TmpSeq);
227  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::SRLI, LeadingZeros));
228 
229  // Keep the new sequence if it is an improvement.
230  if (TmpSeq.size() < Res.size()) {
231  Res = TmpSeq;
232  // A 2 instruction sequence is the best we can do.
233  if (Res.size() <= 2)
234  return Res;
235  }
236 
237  // If we have exactly 32 leading zeros and Zba, we can try using zext.w at
238  // the end of the sequence.
239  if (LeadingZeros == 32 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
240  // Try replacing upper bits with 1.
241  uint64_t LeadingOnesVal = Val | maskLeadingOnes<uint64_t>(LeadingZeros);
242  TmpSeq.clear();
243  generateInstSeqImpl(LeadingOnesVal, ActiveFeatures, TmpSeq);
244  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADD_UW, 0));
245 
246  // Keep the new sequence if it is an improvement.
247  if (TmpSeq.size() < Res.size()) {
248  Res = TmpSeq;
249  // A 2 instruction sequence is the best we can do.
250  if (Res.size() <= 2)
251  return Res;
252  }
253  }
254  }
255 
256  // Perform optimization with BCLRI/BSETI in the Zbs extension.
257  if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbs]) {
258  assert(ActiveFeatures[RISCV::Feature64Bit] &&
259  "Expected RV32 to only need 2 instructions");
260 
261  // 1. For values in range 0xffffffff 7fffffff ~ 0xffffffff 00000000,
262  // call generateInstSeqImpl with Val|0x80000000 (which is expected be
263  // an int32), then emit (BCLRI r, 31).
264  // 2. For values in range 0x80000000 ~ 0xffffffff, call generateInstSeqImpl
265  // with Val&~0x80000000 (which is expected to be an int32), then
266  // emit (BSETI r, 31).
267  int64_t NewVal;
268  unsigned Opc;
269  if (Val < 0) {
270  Opc = RISCV::BCLRI;
271  NewVal = Val | 0x80000000ll;
272  } else {
273  Opc = RISCV::BSETI;
274  NewVal = Val & ~0x80000000ll;
275  }
276  if (isInt<32>(NewVal)) {
277  RISCVMatInt::InstSeq TmpSeq;
278  generateInstSeqImpl(NewVal, ActiveFeatures, TmpSeq);
279  TmpSeq.push_back(RISCVMatInt::Inst(Opc, 31));
280  if (TmpSeq.size() < Res.size())
281  Res = TmpSeq;
282  }
283 
284  // Try to use BCLRI for upper 32 bits if the original lower 32 bits are
285  // negative int32, or use BSETI for upper 32 bits if the original lower
286  // 32 bits are positive int32.
287  int32_t Lo = Val;
288  uint32_t Hi = Val >> 32;
289  Opc = 0;
290  RISCVMatInt::InstSeq TmpSeq;
291  generateInstSeqImpl(Lo, ActiveFeatures, TmpSeq);
292  // Check if it is profitable to use BCLRI/BSETI.
293  if (Lo > 0 && TmpSeq.size() + countPopulation(Hi) < Res.size()) {
294  Opc = RISCV::BSETI;
295  } else if (Lo < 0 && TmpSeq.size() + countPopulation(~Hi) < Res.size()) {
296  Opc = RISCV::BCLRI;
297  Hi = ~Hi;
298  }
299  // Search for each bit and build corresponding BCLRI/BSETI.
300  if (Opc > 0) {
301  while (Hi != 0) {
302  unsigned Bit = countTrailingZeros(Hi);
303  TmpSeq.push_back(RISCVMatInt::Inst(Opc, Bit + 32));
304  Hi &= ~(1 << Bit);
305  }
306  if (TmpSeq.size() < Res.size())
307  Res = TmpSeq;
308  }
309  }
310 
311  // Perform optimization with SH*ADD in the Zba extension.
312  if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZba]) {
313  assert(ActiveFeatures[RISCV::Feature64Bit] &&
314  "Expected RV32 to only need 2 instructions");
315  int64_t Div = 0;
316  unsigned Opc = 0;
317  RISCVMatInt::InstSeq TmpSeq;
318  // Select the opcode and divisor.
319  if ((Val % 3) == 0 && isInt<32>(Val / 3)) {
320  Div = 3;
321  Opc = RISCV::SH1ADD;
322  } else if ((Val % 5) == 0 && isInt<32>(Val / 5)) {
323  Div = 5;
324  Opc = RISCV::SH2ADD;
325  } else if ((Val % 9) == 0 && isInt<32>(Val / 9)) {
326  Div = 9;
327  Opc = RISCV::SH3ADD;
328  }
329  // Build the new instruction sequence.
330  if (Div > 0) {
331  generateInstSeqImpl(Val / Div, ActiveFeatures, TmpSeq);
332  TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
333  if (TmpSeq.size() < Res.size())
334  Res = TmpSeq;
335  } else {
336  // Try to use LUI+SH*ADD+ADDI.
337  int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull;
338  int64_t Lo12 = SignExtend64<12>(Val);
339  Div = 0;
340  if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) {
341  Div = 3;
342  Opc = RISCV::SH1ADD;
343  } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) {
344  Div = 5;
345  Opc = RISCV::SH2ADD;
346  } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) {
347  Div = 9;
348  Opc = RISCV::SH3ADD;
349  }
350  // Build the new instruction sequence.
351  if (Div > 0) {
352  // For Val that has zero Lo12 (implies Val equals to Hi52) should has
353  // already been processed to LUI+SH*ADD by previous optimization.
354  assert(Lo12 != 0 &&
355  "unexpected instruction sequence for immediate materialisation");
356  assert(TmpSeq.empty() && "Expected empty TmpSeq");
357  generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq);
358  TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0));
359  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12));
360  if (TmpSeq.size() < Res.size())
361  Res = TmpSeq;
362  }
363  }
364  }
365 
366  // Perform optimization with rori in the Zbb extension.
367  if (Res.size() > 2 && ActiveFeatures[RISCV::FeatureStdExtZbb]) {
368  if (unsigned Rotate = extractRotateInfo(Val)) {
369  RISCVMatInt::InstSeq TmpSeq;
370  uint64_t NegImm12 =
371  ((uint64_t)Val >> (64 - Rotate)) | ((uint64_t)Val << Rotate);
372  assert(isInt<12>(NegImm12));
373  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, NegImm12));
374  TmpSeq.push_back(RISCVMatInt::Inst(RISCV::RORI, Rotate));
375  Res = TmpSeq;
376  }
377  }
378  return Res;
379 }
380 
381 int getIntMatCost(const APInt &Val, unsigned Size,
382  const FeatureBitset &ActiveFeatures, bool CompressionCost) {
383  bool IsRV64 = ActiveFeatures[RISCV::Feature64Bit];
384  bool HasRVC = CompressionCost && ActiveFeatures[RISCV::FeatureStdExtC];
385  int PlatRegSize = IsRV64 ? 64 : 32;
386 
387  // Split the constant into platform register sized chunks, and calculate cost
388  // of each chunk.
389  int Cost = 0;
390  for (unsigned ShiftVal = 0; ShiftVal < Size; ShiftVal += PlatRegSize) {
391  APInt Chunk = Val.ashr(ShiftVal).sextOrTrunc(PlatRegSize);
392  InstSeq MatSeq = generateInstSeq(Chunk.getSExtValue(), ActiveFeatures);
393  Cost += getInstSeqCost(MatSeq, HasRVC);
394  }
395  return std::max(1, Cost);
396 }
397 } // namespace RISCVMatInt
398 } // namespace llvm
llvm::RISCVMatInt::Inst
Definition: RISCVMatInt.h:20
llvm::findFirstSet
T findFirstSet(T Val, ZeroBehavior ZB=ZB_Max)
Get the index of the first set bit starting from the least significant bit.
Definition: MathExtras.h:239
llvm::RISCVMatInt::getIntMatCost
int getIntMatCost(const APInt &Val, unsigned Size, const FeatureBitset &ActiveFeatures, bool CompressionCost)
Definition: RISCVMatInt.cpp:381
MathExtras.h
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:17
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1185
llvm::APInt::getSExtValue
int64_t getSExtValue() const
Get sign extended value.
Definition: APInt.h:1490
llvm::RISCVMatInt::generateInstSeq
InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures)
Definition: RISCVMatInt.cpp:177
APInt.h
llvm::FeatureBitset
Container class for subtarget features.
Definition: SubtargetFeature.h:40
llvm::countLeadingOnes
unsigned countLeadingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the most significant bit to the first zero bit.
Definition: MathExtras.h:509
llvm::Lo_32
constexpr uint32_t Lo_32(uint64_t Value)
Return the low 32 bits of a 64 bit value.
Definition: MathExtras.h:353
RISCVMatInt.h
generateInstSeqImpl
static void generateInstSeqImpl(int64_t Val, const FeatureBitset &ActiveFeatures, RISCVMatInt::InstSeq &Res)
Definition: RISCVMatInt.cpp:48
llvm::Log2_64
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:629
getInstSeqCost
static int getInstSeqCost(RISCVMatInt::InstSeq &Res, bool HasRVC)
Definition: RISCVMatInt.cpp:15
ll
Analysis the ScalarEvolution expression for r is< loop > Outside the this could be evaluated simply however ScalarEvolution currently evaluates it it involves i65 which is very inefficient when expanded into code In formatValue in test CodeGen X86 lsr delayed fold ll
Definition: README.txt:20
RISCVMCTargetDesc.h
llvm::Hi_32
constexpr uint32_t Hi_32(uint64_t Value)
Return the high 32 bits of a 64 bit value.
Definition: MathExtras.h:348
llvm::APInt::ashr
APInt ashr(unsigned ShiftAmt) const
Arithmetic right-shift function.
Definition: APInt.h:808
llvm::countPopulation
unsigned countPopulation(T Value)
Count the number of set bits in a value.
Definition: MathExtras.h:567
llvm::isInt< 32 >
constexpr bool isInt< 32 >(int64_t x)
Definition: MathExtras.h:373
uint64_t
llvm::countTrailingOnes
unsigned countTrailingOnes(T Value, ZeroBehavior ZB=ZB_Width)
Count the number of ones from the least significant bit to the first zero bit.
Definition: MathExtras.h:525
llvm::isUInt< 32 >
constexpr bool isUInt< 32 >(uint64_t x)
Definition: MathExtras.h:411
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
extractRotateInfo
static unsigned extractRotateInfo(int64_t Val)
Definition: RISCVMatInt.cpp:157
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
llvm::countTrailingZeros
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: MathExtras.h:156
uint32_t
Unsigned
@ Unsigned
Definition: NVPTXISelLowering.cpp:4638
llvm::APInt::sextOrTrunc
APInt sextOrTrunc(unsigned width) const
Sign extend or truncate to width.
Definition: APInt.cpp:1002
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:591
llvm::countLeadingZeros
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0's from the most significant bit to the least stopping at the first 1.
Definition: MathExtras.h:225
llvm::max
Align max(MaybeAlign Lhs, Align Rhs)
Definition: Alignment.h:340
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:496