LLVM  9.0.0svn
AArch64ExpandImm.cpp
Go to the documentation of this file.
1 //===- AArch64ExpandImm.h - AArch64 Immediate Expansion -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the AArch64ExpandImm stuff.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64.h"
14 #include "AArch64ExpandImm.h"
16 
17 namespace llvm {
18 
19 namespace AArch64_IMM {
20 
21 /// Helper function which extracts the specified 16-bit chunk from a
22 /// 64-bit value.
23 static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx) {
24  assert(ChunkIdx < 4 && "Out of range chunk index specified!");
25 
26  return (Imm >> (ChunkIdx * 16)) & 0xFFFF;
27 }
28 
29 /// Check whether the given 16-bit chunk replicated to full 64-bit width
30 /// can be materialized with an ORR instruction.
31 static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding) {
32  Chunk = (Chunk << 48) | (Chunk << 32) | (Chunk << 16) | Chunk;
33 
34  return AArch64_AM::processLogicalImmediate(Chunk, 64, Encoding);
35 }
36 
37 /// Check for identical 16-bit chunks within the constant and if so
38 /// materialize them with a single ORR instruction. The remaining one or two
39 /// 16-bit chunks will be materialized with MOVK instructions.
40 ///
41 /// This allows us to materialize constants like |A|B|A|A| or |A|B|C|A| (order
42 /// of the chunks doesn't matter), assuming |A|A|A|A| can be materialized with
43 /// an ORR instruction.
44 static bool tryToreplicateChunks(uint64_t UImm,
46  using CountMap = DenseMap<uint64_t, unsigned>;
47 
48  CountMap Counts;
49 
50  // Scan the constant and count how often every chunk occurs.
51  for (unsigned Idx = 0; Idx < 4; ++Idx)
52  ++Counts[getChunk(UImm, Idx)];
53 
54  // Traverse the chunks to find one which occurs more than once.
55  for (CountMap::const_iterator Chunk = Counts.begin(), End = Counts.end();
56  Chunk != End; ++Chunk) {
57  const uint64_t ChunkVal = Chunk->first;
58  const unsigned Count = Chunk->second;
59 
60  uint64_t Encoding = 0;
61 
62  // We are looking for chunks which have two or three instances and can be
63  // materialized with an ORR instruction.
64  if ((Count != 2 && Count != 3) || !canUseOrr(ChunkVal, Encoding))
65  continue;
66 
67  const bool CountThree = Count == 3;
68 
69  Insn.push_back({ AArch64::ORRXri, 0, Encoding });
70 
71  unsigned ShiftAmt = 0;
72  uint64_t Imm16 = 0;
73  // Find the first chunk not materialized with the ORR instruction.
74  for (; ShiftAmt < 64; ShiftAmt += 16) {
75  Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
76 
77  if (Imm16 != ChunkVal)
78  break;
79  }
80 
81  // Create the first MOVK instruction.
82  Insn.push_back({ AArch64::MOVKXi, Imm16,
84 
85  // In case we have three instances the whole constant is now materialized
86  // and we can exit.
87  if (CountThree)
88  return true;
89 
90  // Find the remaining chunk which needs to be materialized.
91  for (ShiftAmt += 16; ShiftAmt < 64; ShiftAmt += 16) {
92  Imm16 = (UImm >> ShiftAmt) & 0xFFFF;
93 
94  if (Imm16 != ChunkVal)
95  break;
96  }
97  Insn.push_back({ AArch64::MOVKXi, Imm16,
99  return true;
100  }
101 
102  return false;
103 }
104 
105 /// Check whether this chunk matches the pattern '1...0...'. This pattern
106 /// starts a contiguous sequence of ones if we look at the bits from the LSB
107 /// towards the MSB.
108 static bool isStartChunk(uint64_t Chunk) {
109  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
110  return false;
111 
112  return isMask_64(~Chunk);
113 }
114 
115 /// Check whether this chunk matches the pattern '0...1...' This pattern
116 /// ends a contiguous sequence of ones if we look at the bits from the LSB
117 /// towards the MSB.
118 static bool isEndChunk(uint64_t Chunk) {
119  if (Chunk == 0 || Chunk == std::numeric_limits<uint64_t>::max())
120  return false;
121 
122  return isMask_64(Chunk);
123 }
124 
125 /// Clear or set all bits in the chunk at the given index.
126 static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear) {
127  const uint64_t Mask = 0xFFFF;
128 
129  if (Clear)
130  // Clear chunk in the immediate.
131  Imm &= ~(Mask << (Idx * 16));
132  else
133  // Set all bits in the immediate for the particular chunk.
134  Imm |= Mask << (Idx * 16);
135 
136  return Imm;
137 }
138 
139 /// Check whether the constant contains a sequence of contiguous ones,
140 /// which might be interrupted by one or two chunks. If so, materialize the
141 /// sequence of contiguous ones with an ORR instruction.
142 /// Materialize the chunks which are either interrupting the sequence or outside
143 /// of the sequence with a MOVK instruction.
144 ///
145 /// Assuming S is a chunk which starts the sequence (1...0...), E is a chunk
146 /// which ends the sequence (0...1...). Then we are looking for constants which
147 /// contain at least one S and E chunk.
148 /// E.g. |E|A|B|S|, |A|E|B|S| or |A|B|E|S|.
149 ///
150 /// We are also looking for constants like |S|A|B|E| where the contiguous
151 /// sequence of ones wraps around the MSB into the LSB.
152 static bool trySequenceOfOnes(uint64_t UImm,
154  const int NotSet = -1;
155  const uint64_t Mask = 0xFFFF;
156 
157  int StartIdx = NotSet;
158  int EndIdx = NotSet;
159  // Try to find the chunks which start/end a contiguous sequence of ones.
160  for (int Idx = 0; Idx < 4; ++Idx) {
161  int64_t Chunk = getChunk(UImm, Idx);
162  // Sign extend the 16-bit chunk to 64-bit.
163  Chunk = (Chunk << 48) >> 48;
164 
165  if (isStartChunk(Chunk))
166  StartIdx = Idx;
167  else if (isEndChunk(Chunk))
168  EndIdx = Idx;
169  }
170 
171  // Early exit in case we can't find a start/end chunk.
172  if (StartIdx == NotSet || EndIdx == NotSet)
173  return false;
174 
175  // Outside of the contiguous sequence of ones everything needs to be zero.
176  uint64_t Outside = 0;
177  // Chunks between the start and end chunk need to have all their bits set.
178  uint64_t Inside = Mask;
179 
180  // If our contiguous sequence of ones wraps around from the MSB into the LSB,
181  // just swap indices and pretend we are materializing a contiguous sequence
182  // of zeros surrounded by a contiguous sequence of ones.
183  if (StartIdx > EndIdx) {
184  std::swap(StartIdx, EndIdx);
185  std::swap(Outside, Inside);
186  }
187 
188  uint64_t OrrImm = UImm;
189  int FirstMovkIdx = NotSet;
190  int SecondMovkIdx = NotSet;
191 
192  // Find out which chunks we need to patch up to obtain a contiguous sequence
193  // of ones.
194  for (int Idx = 0; Idx < 4; ++Idx) {
195  const uint64_t Chunk = getChunk(UImm, Idx);
196 
197  // Check whether we are looking at a chunk which is not part of the
198  // contiguous sequence of ones.
199  if ((Idx < StartIdx || EndIdx < Idx) && Chunk != Outside) {
200  OrrImm = updateImm(OrrImm, Idx, Outside == 0);
201 
202  // Remember the index we need to patch.
203  if (FirstMovkIdx == NotSet)
204  FirstMovkIdx = Idx;
205  else
206  SecondMovkIdx = Idx;
207 
208  // Check whether we are looking a chunk which is part of the contiguous
209  // sequence of ones.
210  } else if (Idx > StartIdx && Idx < EndIdx && Chunk != Inside) {
211  OrrImm = updateImm(OrrImm, Idx, Inside != Mask);
212 
213  // Remember the index we need to patch.
214  if (FirstMovkIdx == NotSet)
215  FirstMovkIdx = Idx;
216  else
217  SecondMovkIdx = Idx;
218  }
219  }
220  assert(FirstMovkIdx != NotSet && "Constant materializable with single ORR!");
221 
222  // Create the ORR-immediate instruction.
223  uint64_t Encoding = 0;
224  AArch64_AM::processLogicalImmediate(OrrImm, 64, Encoding);
225  Insn.push_back({ AArch64::ORRXri, 0, Encoding });
226 
227  const bool SingleMovk = SecondMovkIdx == NotSet;
228  Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, FirstMovkIdx),
230  FirstMovkIdx * 16) });
231 
232  // Early exit in case we only need to emit a single MOVK instruction.
233  if (SingleMovk)
234  return true;
235 
236  // Create the second MOVK instruction.
237  Insn.push_back({ AArch64::MOVKXi, getChunk(UImm, SecondMovkIdx),
239  SecondMovkIdx * 16) });
240 
241  return true;
242 }
243 
244 /// \brief Expand a MOVi32imm or MOVi64imm pseudo instruction to a
245 /// MOVZ or MOVN of width BitSize followed by up to 3 MOVK instructions.
246 static inline void expandMOVImmSimple(uint64_t Imm, unsigned BitSize,
247  unsigned OneChunks, unsigned ZeroChunks,
249  const unsigned Mask = 0xFFFF;
250 
251  // Use a MOVZ or MOVN instruction to set the high bits, followed by one or
252  // more MOVK instructions to insert additional 16-bit portions into the
253  // lower bits.
254  bool isNeg = false;
255 
256  // Use MOVN to materialize the high bits if we have more all one chunks
257  // than all zero chunks.
258  if (OneChunks > ZeroChunks) {
259  isNeg = true;
260  Imm = ~Imm;
261  }
262 
263  unsigned FirstOpc;
264  if (BitSize == 32) {
265  Imm &= (1LL << 32) - 1;
266  FirstOpc = (isNeg ? AArch64::MOVNWi : AArch64::MOVZWi);
267  } else {
268  FirstOpc = (isNeg ? AArch64::MOVNXi : AArch64::MOVZXi);
269  }
270  unsigned Shift = 0; // LSL amount for high bits with MOVZ/MOVN
271  unsigned LastShift = 0; // LSL amount for last MOVK
272  if (Imm != 0) {
273  unsigned LZ = countLeadingZeros(Imm);
274  unsigned TZ = countTrailingZeros(Imm);
275  Shift = (TZ / 16) * 16;
276  LastShift = ((63 - LZ) / 16) * 16;
277  }
278  unsigned Imm16 = (Imm >> Shift) & Mask;
279 
280  Insn.push_back({ FirstOpc, Imm16,
282 
283  if (Shift == LastShift)
284  return;
285 
286  // If a MOVN was used for the high bits of a negative value, flip the rest
287  // of the bits back for use with MOVK.
288  if (isNeg)
289  Imm = ~Imm;
290 
291  unsigned Opc = (BitSize == 32 ? AArch64::MOVKWi : AArch64::MOVKXi);
292  while (Shift < LastShift) {
293  Shift += 16;
294  Imm16 = (Imm >> Shift) & Mask;
295  if (Imm16 == (isNeg ? Mask : 0))
296  continue; // This 16-bit portion is already set correctly.
297 
298  Insn.push_back({ Opc, Imm16,
300  }
301 }
302 
303 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
304 /// real move-immediate instructions to synthesize the immediate.
305 void expandMOVImm(uint64_t Imm, unsigned BitSize,
307  const unsigned Mask = 0xFFFF;
308 
309  // Scan the immediate and count the number of 16-bit chunks which are either
310  // all ones or all zeros.
311  unsigned OneChunks = 0;
312  unsigned ZeroChunks = 0;
313  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
314  const unsigned Chunk = (Imm >> Shift) & Mask;
315  if (Chunk == Mask)
316  OneChunks++;
317  else if (Chunk == 0)
318  ZeroChunks++;
319  }
320 
321  // Prefer MOVZ/MOVN over ORR because of the rules for the "mov" alias.
322  if ((BitSize / 16) - OneChunks <= 1 || (BitSize / 16) - ZeroChunks <= 1) {
323  expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
324  return;
325  }
326 
327  // Try a single ORR.
328  uint64_t UImm = Imm << (64 - BitSize) >> (64 - BitSize);
329  uint64_t Encoding;
330  if (AArch64_AM::processLogicalImmediate(UImm, BitSize, Encoding)) {
331  unsigned Opc = (BitSize == 32 ? AArch64::ORRWri : AArch64::ORRXri);
332  Insn.push_back({ Opc, 0, Encoding });
333  return;
334  }
335 
336  // One to up three instruction sequences.
337  //
338  // Prefer MOVZ/MOVN followed by MOVK; it's more readable, and possibly the
339  // fastest sequence with fast literal generation.
340  if (OneChunks >= (BitSize / 16) - 2 || ZeroChunks >= (BitSize / 16) - 2) {
341  expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
342  return;
343  }
344 
345  assert(BitSize == 64 && "All 32-bit immediates can be expanded with a"
346  "MOVZ/MOVK pair");
347 
348  // Try other two-instruction sequences.
349 
350  // 64-bit ORR followed by MOVK.
351  // We try to construct the ORR immediate in three different ways: either we
352  // zero out the chunk which will be replaced, we fill the chunk which will
353  // be replaced with ones, or we take the bit pattern from the other half of
354  // the 64-bit immediate. This is comprehensive because of the way ORR
355  // immediates are constructed.
356  for (unsigned Shift = 0; Shift < BitSize; Shift += 16) {
357  uint64_t ShiftedMask = (0xFFFFULL << Shift);
358  uint64_t ZeroChunk = UImm & ~ShiftedMask;
359  uint64_t OneChunk = UImm | ShiftedMask;
360  uint64_t RotatedImm = (UImm << 32) | (UImm >> 32);
361  uint64_t ReplicateChunk = ZeroChunk | (RotatedImm & ShiftedMask);
362  if (AArch64_AM::processLogicalImmediate(ZeroChunk, BitSize, Encoding) ||
363  AArch64_AM::processLogicalImmediate(OneChunk, BitSize, Encoding) ||
364  AArch64_AM::processLogicalImmediate(ReplicateChunk, BitSize,
365  Encoding)) {
366  // Create the ORR-immediate instruction.
367  Insn.push_back({ AArch64::ORRXri, 0, Encoding });
368 
369  // Create the MOVK instruction.
370  const unsigned Imm16 = getChunk(UImm, Shift / 16);
371  Insn.push_back({ AArch64::MOVKXi, Imm16,
373  return;
374  }
375  }
376 
377  // FIXME: Add more two-instruction sequences.
378 
379  // Three instruction sequences.
380  //
381  // Prefer MOVZ/MOVN followed by two MOVK; it's more readable, and possibly
382  // the fastest sequence with fast literal generation. (If neither MOVK is
383  // part of a fast literal generation pair, it could be slower than the
384  // four-instruction sequence, but we won't worry about that for now.)
385  if (OneChunks || ZeroChunks) {
386  expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
387  return;
388  }
389 
390  // Check for identical 16-bit chunks within the constant and if so materialize
391  // them with a single ORR instruction. The remaining one or two 16-bit chunks
392  // will be materialized with MOVK instructions.
393  if (BitSize == 64 && tryToreplicateChunks(UImm, Insn))
394  return;
395 
396  // Check whether the constant contains a sequence of contiguous ones, which
397  // might be interrupted by one or two chunks. If so, materialize the sequence
398  // of contiguous ones with an ORR instruction. Materialize the chunks which
399  // are either interrupting the sequence or outside of the sequence with a
400  // MOVK instruction.
401  if (BitSize == 64 && trySequenceOfOnes(UImm, Insn))
402  return;
403 
404  // We found no possible two or three instruction sequence; use the general
405  // four-instruction sequence.
406  expandMOVImmSimple(Imm, BitSize, OneChunks, ZeroChunks, Insn);
407 }
408 
409 } // end namespace AArch64_AM
410 
411 } // end namespace llvm
GCNRegPressure max(const GCNRegPressure &P1, const GCNRegPressure &P2)
static bool isStartChunk(uint64_t Chunk)
Check whether this chunk matches the pattern &#39;1...0...&#39;.
This class represents lattice values for constants.
Definition: AllocatorList.h:23
static bool processLogicalImmediate(uint64_t Imm, unsigned RegSize, uint64_t &Encoding)
processLogicalImmediate - Determine if an immediate value can be encoded as the immediate operand of ...
unsigned countTrailingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the least significant bit to the most stopping at the first 1...
Definition: MathExtras.h:119
static uint64_t updateImm(uint64_t Imm, unsigned Idx, bool Clear)
Clear or set all bits in the chunk at the given index.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
constexpr bool isMask_64(uint64_t Value)
Return true if the argument is a non-empty sequence of ones starting at the least significant bit wit...
Definition: MathExtras.h:410
static bool tryToreplicateChunks(uint64_t UImm, SmallVectorImpl< ImmInsnModel > &Insn)
Check for identical 16-bit chunks within the constant and if so materialize them with a single ORR in...
static bool canUseOrr(uint64_t Chunk, uint64_t &Encoding)
Check whether the given 16-bit chunk replicated to full 64-bit width can be materialized with an ORR ...
static unsigned getShifterImm(AArch64_AM::ShiftExtendType ST, unsigned Imm)
getShifterImm - Encode the shift type and amount: imm: 6-bit shift amount shifter: 000 ==> lsl 001 ==...
unsigned countLeadingZeros(T Val, ZeroBehavior ZB=ZB_Width)
Count number of 0&#39;s from the most significant bit to the least stopping at the first 1...
Definition: MathExtras.h:188
static bool trySequenceOfOnes(uint64_t UImm, SmallVectorImpl< ImmInsnModel > &Insn)
Check whether the constant contains a sequence of contiguous ones, which might be interrupted by one ...
static uint64_t getChunk(uint64_t Imm, unsigned ChunkIdx)
Helper function which extracts the specified 16-bit chunk from a 64-bit value.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
static bool isEndChunk(uint64_t Chunk)
Check whether this chunk matches the pattern &#39;0...1...&#39; This pattern ends a contiguous sequence of on...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static void expandMOVImmSimple(uint64_t Imm, unsigned BitSize, unsigned OneChunks, unsigned ZeroChunks, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to a MOVZ or MOVN of width BitSize followed by up ...