LLVM  13.0.0git
X86ShuffleDecode.cpp
Go to the documentation of this file.
1 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Define several functions to decode x86 specific shuffle semantics into a
10 // generic vector mask.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "X86ShuffleDecode.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/SmallVector.h"
18 
19 //===----------------------------------------------------------------------===//
20 // Vector Mask Decoding
21 //===----------------------------------------------------------------------===//
22 
23 namespace llvm {
24 
25 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
26  // Defaults the copying the dest value.
27  ShuffleMask.push_back(0);
28  ShuffleMask.push_back(1);
29  ShuffleMask.push_back(2);
30  ShuffleMask.push_back(3);
31 
32  // Decode the immediate.
33  unsigned ZMask = Imm & 15;
34  unsigned CountD = (Imm >> 4) & 3;
35  unsigned CountS = (Imm >> 6) & 3;
36 
37  // CountS selects which input element to use.
38  unsigned InVal = 4 + CountS;
39  // CountD specifies which element of destination to update.
40  ShuffleMask[CountD] = InVal;
41  // ZMask zaps values, potentially overriding the CountD elt.
42  if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero;
43  if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero;
44  if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero;
45  if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero;
46 }
47 
48 void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len,
49  SmallVectorImpl<int> &ShuffleMask) {
50  assert((Idx + Len) <= NumElts && "Insertion out of range");
51 
52  for (unsigned i = 0; i != NumElts; ++i)
53  ShuffleMask.push_back(i);
54  for (unsigned i = 0; i != Len; ++i)
55  ShuffleMask[Idx + i] = NumElts + i;
56 }
57 
58 // <3,1> or <6,7,2,3>
59 void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
60  for (unsigned i = NElts / 2; i != NElts; ++i)
61  ShuffleMask.push_back(NElts + i);
62 
63  for (unsigned i = NElts / 2; i != NElts; ++i)
64  ShuffleMask.push_back(i);
65 }
66 
67 // <0,2> or <0,1,4,5>
68 void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) {
69  for (unsigned i = 0; i != NElts / 2; ++i)
70  ShuffleMask.push_back(i);
71 
72  for (unsigned i = 0; i != NElts / 2; ++i)
73  ShuffleMask.push_back(NElts + i);
74 }
75 
76 void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
77  for (int i = 0, e = NumElts / 2; i < e; ++i) {
78  ShuffleMask.push_back(2 * i);
79  ShuffleMask.push_back(2 * i);
80  }
81 }
82 
83 void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
84  for (int i = 0, e = NumElts / 2; i < e; ++i) {
85  ShuffleMask.push_back(2 * i + 1);
86  ShuffleMask.push_back(2 * i + 1);
87  }
88 }
89 
90 void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
91  const unsigned NumLaneElts = 2;
92 
93  for (unsigned l = 0; l < NumElts; l += NumLaneElts)
94  for (unsigned i = 0; i < NumLaneElts; ++i)
95  ShuffleMask.push_back(l);
96 }
97 
98 void DecodePSLLDQMask(unsigned NumElts, unsigned Imm,
99  SmallVectorImpl<int> &ShuffleMask) {
100  const unsigned NumLaneElts = 16;
101 
102  for (unsigned l = 0; l < NumElts; l += NumLaneElts)
103  for (unsigned i = 0; i < NumLaneElts; ++i) {
104  int M = SM_SentinelZero;
105  if (i >= Imm) M = i - Imm + l;
106  ShuffleMask.push_back(M);
107  }
108 }
109 
110 void DecodePSRLDQMask(unsigned NumElts, unsigned Imm,
111  SmallVectorImpl<int> &ShuffleMask) {
112  const unsigned NumLaneElts = 16;
113 
114  for (unsigned l = 0; l < NumElts; l += NumLaneElts)
115  for (unsigned i = 0; i < NumLaneElts; ++i) {
116  unsigned Base = i + Imm;
117  int M = Base + l;
118  if (Base >= NumLaneElts) M = SM_SentinelZero;
119  ShuffleMask.push_back(M);
120  }
121 }
122 
123 void DecodePALIGNRMask(unsigned NumElts, unsigned Imm,
124  SmallVectorImpl<int> &ShuffleMask) {
125  const unsigned NumLaneElts = 16;
126 
127  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
128  for (unsigned i = 0; i != NumLaneElts; ++i) {
129  unsigned Base = i + Imm;
130  // if i+imm is out of this lane then we actually need the other source
131  if (Base >= NumLaneElts) Base += NumElts - NumLaneElts;
132  ShuffleMask.push_back(Base + l);
133  }
134  }
135 }
136 
137 void DecodeVALIGNMask(unsigned NumElts, unsigned Imm,
138  SmallVectorImpl<int> &ShuffleMask) {
139  // Not all bits of the immediate are used so mask it.
140  assert(isPowerOf2_32(NumElts) && "NumElts should be power of 2");
141  Imm = Imm & (NumElts - 1);
142  for (unsigned i = 0; i != NumElts; ++i)
143  ShuffleMask.push_back(i + Imm);
144 }
145 
146 void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm,
147  SmallVectorImpl<int> &ShuffleMask) {
148  unsigned Size = NumElts * ScalarBits;
149  unsigned NumLanes = Size / 128;
150  if (NumLanes == 0) NumLanes = 1; // Handle MMX
151  unsigned NumLaneElts = NumElts / NumLanes;
152 
153  uint32_t SplatImm = (Imm & 0xff) * 0x01010101;
154  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
155  for (unsigned i = 0; i != NumLaneElts; ++i) {
156  ShuffleMask.push_back(SplatImm % NumLaneElts + l);
157  SplatImm /= NumLaneElts;
158  }
159  }
160 }
161 
162 void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm,
163  SmallVectorImpl<int> &ShuffleMask) {
164  for (unsigned l = 0; l != NumElts; l += 8) {
165  unsigned NewImm = Imm;
166  for (unsigned i = 0, e = 4; i != e; ++i) {
167  ShuffleMask.push_back(l + i);
168  }
169  for (unsigned i = 4, e = 8; i != e; ++i) {
170  ShuffleMask.push_back(l + 4 + (NewImm & 3));
171  NewImm >>= 2;
172  }
173  }
174 }
175 
176 void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm,
177  SmallVectorImpl<int> &ShuffleMask) {
178  for (unsigned l = 0; l != NumElts; l += 8) {
179  unsigned NewImm = Imm;
180  for (unsigned i = 0, e = 4; i != e; ++i) {
181  ShuffleMask.push_back(l + (NewImm & 3));
182  NewImm >>= 2;
183  }
184  for (unsigned i = 4, e = 8; i != e; ++i) {
185  ShuffleMask.push_back(l + i);
186  }
187  }
188 }
189 
190 void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl<int> &ShuffleMask) {
191  unsigned NumHalfElts = NumElts / 2;
192 
193  for (unsigned l = 0; l != NumHalfElts; ++l)
194  ShuffleMask.push_back(l + NumHalfElts);
195  for (unsigned h = 0; h != NumHalfElts; ++h)
196  ShuffleMask.push_back(h);
197 }
198 
199 void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits,
200  unsigned Imm, SmallVectorImpl<int> &ShuffleMask) {
201  unsigned NumLaneElts = 128 / ScalarBits;
202 
203  unsigned NewImm = Imm;
204  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
205  // each half of a lane comes from different source
206  for (unsigned s = 0; s != NumElts * 2; s += NumElts) {
207  for (unsigned i = 0; i != NumLaneElts / 2; ++i) {
208  ShuffleMask.push_back(NewImm % NumLaneElts + s + l);
209  NewImm /= NumLaneElts;
210  }
211  }
212  if (NumLaneElts == 4) NewImm = Imm; // reload imm
213  }
214 }
215 
216 void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits,
217  SmallVectorImpl<int> &ShuffleMask) {
218  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
219  // independently on 128-bit lanes.
220  unsigned NumLanes = (NumElts * ScalarBits) / 128;
221  if (NumLanes == 0) NumLanes = 1; // Handle MMX
222  unsigned NumLaneElts = NumElts / NumLanes;
223 
224  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
225  for (unsigned i = l + NumLaneElts / 2, e = l + NumLaneElts; i != e; ++i) {
226  ShuffleMask.push_back(i); // Reads from dest/src1
227  ShuffleMask.push_back(i + NumElts); // Reads from src/src2
228  }
229  }
230 }
231 
232 void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits,
233  SmallVectorImpl<int> &ShuffleMask) {
234  // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate
235  // independently on 128-bit lanes.
236  unsigned NumLanes = (NumElts * ScalarBits) / 128;
237  if (NumLanes == 0 ) NumLanes = 1; // Handle MMX
238  unsigned NumLaneElts = NumElts / NumLanes;
239 
240  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
241  for (unsigned i = l, e = l + NumLaneElts / 2; i != e; ++i) {
242  ShuffleMask.push_back(i); // Reads from dest/src1
243  ShuffleMask.push_back(i + NumElts); // Reads from src/src2
244  }
245  }
246 }
247 
248 void DecodeVectorBroadcast(unsigned NumElts,
249  SmallVectorImpl<int> &ShuffleMask) {
250  ShuffleMask.append(NumElts, 0);
251 }
252 
253 void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts,
254  SmallVectorImpl<int> &ShuffleMask) {
255  unsigned Scale = DstNumElts / SrcNumElts;
256 
257  for (unsigned i = 0; i != Scale; ++i)
258  for (unsigned j = 0; j != SrcNumElts; ++j)
259  ShuffleMask.push_back(j);
260 }
261 
262 void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize,
263  unsigned Imm,
264  SmallVectorImpl<int> &ShuffleMask) {
265  unsigned NumElementsInLane = 128 / ScalarSize;
266  unsigned NumLanes = NumElts / NumElementsInLane;
267 
268  for (unsigned l = 0; l != NumElts; l += NumElementsInLane) {
269  unsigned Index = (Imm % NumLanes) * NumElementsInLane;
270  Imm /= NumLanes; // Discard the bits we just used.
271  // We actually need the other source.
272  if (l >= (NumElts / 2))
273  Index += NumElts;
274  for (unsigned i = 0; i != NumElementsInLane; ++i)
275  ShuffleMask.push_back(Index + i);
276  }
277 }
278 
279 void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm,
280  SmallVectorImpl<int> &ShuffleMask) {
281  unsigned HalfSize = NumElts / 2;
282 
283  for (unsigned l = 0; l != 2; ++l) {
284  unsigned HalfMask = Imm >> (l * 4);
285  unsigned HalfBegin = (HalfMask & 0x3) * HalfSize;
286  for (unsigned i = HalfBegin, e = HalfBegin + HalfSize; i != e; ++i)
287  ShuffleMask.push_back((HalfMask & 8) ? SM_SentinelZero : (int)i);
288  }
289 }
290 
291 void DecodePSHUFBMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
292  SmallVectorImpl<int> &ShuffleMask) {
293  for (int i = 0, e = RawMask.size(); i < e; ++i) {
294  uint64_t M = RawMask[i];
295  if (UndefElts[i]) {
296  ShuffleMask.push_back(SM_SentinelUndef);
297  continue;
298  }
299  // For 256/512-bit vectors the base of the shuffle is the 128-bit
300  // subvector we're inside.
301  int Base = (i / 16) * 16;
302  // If the high bit (7) of the byte is set, the element is zeroed.
303  if (M & (1 << 7))
304  ShuffleMask.push_back(SM_SentinelZero);
305  else {
306  // Only the least significant 4 bits of the byte are used.
307  int Index = Base + (M & 0xf);
308  ShuffleMask.push_back(Index);
309  }
310  }
311 }
312 
313 void DecodeBLENDMask(unsigned NumElts, unsigned Imm,
314  SmallVectorImpl<int> &ShuffleMask) {
315  for (unsigned i = 0; i < NumElts; ++i) {
316  // If there are more than 8 elements in the vector, then any immediate blend
317  // mask wraps around.
318  unsigned Bit = i % 8;
319  ShuffleMask.push_back(((Imm >> Bit) & 1) ? NumElts + i : i);
320  }
321 }
322 
323 void DecodeVPPERMMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
324  SmallVectorImpl<int> &ShuffleMask) {
325  assert(RawMask.size() == 16 && "Illegal VPPERM shuffle mask size");
326 
327  // VPPERM Operation
328  // Bits[4:0] - Byte Index (0 - 31)
329  // Bits[7:5] - Permute Operation
330  //
331  // Permute Operation:
332  // 0 - Source byte (no logical operation).
333  // 1 - Invert source byte.
334  // 2 - Bit reverse of source byte.
335  // 3 - Bit reverse of inverted source byte.
336  // 4 - 00h (zero - fill).
337  // 5 - FFh (ones - fill).
338  // 6 - Most significant bit of source byte replicated in all bit positions.
339  // 7 - Invert most significant bit of source byte and replicate in all bit positions.
340  for (int i = 0, e = RawMask.size(); i < e; ++i) {
341  if (UndefElts[i]) {
342  ShuffleMask.push_back(SM_SentinelUndef);
343  continue;
344  }
345 
346  uint64_t M = RawMask[i];
347  uint64_t PermuteOp = (M >> 5) & 0x7;
348  if (PermuteOp == 4) {
349  ShuffleMask.push_back(SM_SentinelZero);
350  continue;
351  }
352  if (PermuteOp != 0) {
353  ShuffleMask.clear();
354  return;
355  }
356 
357  uint64_t Index = M & 0x1F;
358  ShuffleMask.push_back((int)Index);
359  }
360 }
361 
362 void DecodeVPERMMask(unsigned NumElts, unsigned Imm,
363  SmallVectorImpl<int> &ShuffleMask) {
364  for (unsigned l = 0; l != NumElts; l += 4)
365  for (unsigned i = 0; i != 4; ++i)
366  ShuffleMask.push_back(l + ((Imm >> (2 * i)) & 3));
367 }
368 
369 void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits,
370  unsigned NumDstElts, bool IsAnyExtend,
371  SmallVectorImpl<int> &ShuffleMask) {
372  unsigned Scale = DstScalarBits / SrcScalarBits;
373  assert(SrcScalarBits < DstScalarBits &&
374  "Expected zero extension mask to increase scalar size");
375 
376  int Sentinel = IsAnyExtend ? SM_SentinelUndef : SM_SentinelZero;
377  for (unsigned i = 0; i != NumDstElts; i++) {
378  ShuffleMask.push_back(i);
379  ShuffleMask.append(Scale - 1, Sentinel);
380  }
381 }
382 
383 void DecodeZeroMoveLowMask(unsigned NumElts,
384  SmallVectorImpl<int> &ShuffleMask) {
385  ShuffleMask.push_back(0);
386  ShuffleMask.append(NumElts - 1, SM_SentinelZero);
387 }
388 
389 void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad,
390  SmallVectorImpl<int> &ShuffleMask) {
391  // First element comes from the first element of second source.
392  // Remaining elements: Load zero extends / Move copies from first source.
393  ShuffleMask.push_back(NumElts);
394  for (unsigned i = 1; i < NumElts; i++)
395  ShuffleMask.push_back(IsLoad ? static_cast<int>(SM_SentinelZero) : i);
396 }
397 
398 void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
399  SmallVectorImpl<int> &ShuffleMask) {
400  unsigned HalfElts = NumElts / 2;
401 
402  // Only the bottom 6 bits are valid for each immediate.
403  Len &= 0x3F;
404  Idx &= 0x3F;
405 
406  // We can only decode this bit extraction instruction as a shuffle if both the
407  // length and index work with whole elements.
408  if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
409  return;
410 
411  // A length of zero is equivalent to a bit length of 64.
412  if (Len == 0)
413  Len = 64;
414 
415  // If the length + index exceeds the bottom 64 bits the result is undefined.
416  if ((Len + Idx) > 64) {
417  ShuffleMask.append(NumElts, SM_SentinelUndef);
418  return;
419  }
420 
421  // Convert index and index to work with elements.
422  Len /= EltSize;
423  Idx /= EltSize;
424 
425  // EXTRQ: Extract Len elements starting from Idx. Zero pad the remaining
426  // elements of the lower 64-bits. The upper 64-bits are undefined.
427  for (int i = 0; i != Len; ++i)
428  ShuffleMask.push_back(i + Idx);
429  for (int i = Len; i != (int)HalfElts; ++i)
430  ShuffleMask.push_back(SM_SentinelZero);
431  for (int i = HalfElts; i != (int)NumElts; ++i)
432  ShuffleMask.push_back(SM_SentinelUndef);
433 }
434 
435 void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx,
436  SmallVectorImpl<int> &ShuffleMask) {
437  unsigned HalfElts = NumElts / 2;
438 
439  // Only the bottom 6 bits are valid for each immediate.
440  Len &= 0x3F;
441  Idx &= 0x3F;
442 
443  // We can only decode this bit insertion instruction as a shuffle if both the
444  // length and index work with whole elements.
445  if (0 != (Len % EltSize) || 0 != (Idx % EltSize))
446  return;
447 
448  // A length of zero is equivalent to a bit length of 64.
449  if (Len == 0)
450  Len = 64;
451 
452  // If the length + index exceeds the bottom 64 bits the result is undefined.
453  if ((Len + Idx) > 64) {
454  ShuffleMask.append(NumElts, SM_SentinelUndef);
455  return;
456  }
457 
458  // Convert index and index to work with elements.
459  Len /= EltSize;
460  Idx /= EltSize;
461 
462  // INSERTQ: Extract lowest Len elements from lower half of second source and
463  // insert over first source starting at Idx element. The upper 64-bits are
464  // undefined.
465  for (int i = 0; i != Idx; ++i)
466  ShuffleMask.push_back(i);
467  for (int i = 0; i != Len; ++i)
468  ShuffleMask.push_back(i + NumElts);
469  for (int i = Idx + Len; i != (int)HalfElts; ++i)
470  ShuffleMask.push_back(i);
471  for (int i = HalfElts; i != (int)NumElts; ++i)
472  ShuffleMask.push_back(SM_SentinelUndef);
473 }
474 
475 void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits,
476  ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
477  SmallVectorImpl<int> &ShuffleMask) {
478  unsigned VecSize = NumElts * ScalarBits;
479  unsigned NumLanes = VecSize / 128;
480  unsigned NumEltsPerLane = NumElts / NumLanes;
481  assert((VecSize == 128 || VecSize == 256 || VecSize == 512) &&
482  "Unexpected vector size");
483  assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
484 
485  for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
486  if (UndefElts[i]) {
487  ShuffleMask.push_back(SM_SentinelUndef);
488  continue;
489  }
490  uint64_t M = RawMask[i];
491  M = (ScalarBits == 64 ? ((M >> 1) & 0x1) : (M & 0x3));
492  unsigned LaneOffset = i & ~(NumEltsPerLane - 1);
493  ShuffleMask.push_back((int)(LaneOffset + M));
494  }
495 }
496 
497 void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z,
498  ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
499  SmallVectorImpl<int> &ShuffleMask) {
500  unsigned VecSize = NumElts * ScalarBits;
501  unsigned NumLanes = VecSize / 128;
502  unsigned NumEltsPerLane = NumElts / NumLanes;
503  assert((VecSize == 128 || VecSize == 256) && "Unexpected vector size");
504  assert((ScalarBits == 32 || ScalarBits == 64) && "Unexpected element size");
505  assert((NumElts == RawMask.size()) && "Unexpected mask size");
506 
507  for (unsigned i = 0, e = RawMask.size(); i < e; ++i) {
508  if (UndefElts[i]) {
509  ShuffleMask.push_back(SM_SentinelUndef);
510  continue;
511  }
512 
513  // VPERMIL2 Operation.
514  // Bits[3] - Match Bit.
515  // Bits[2:1] - (Per Lane) PD Shuffle Mask.
516  // Bits[2:0] - (Per Lane) PS Shuffle Mask.
517  uint64_t Selector = RawMask[i];
518  unsigned MatchBit = (Selector >> 3) & 0x1;
519 
520  // M2Z[0:1] MatchBit
521  // 0Xb X Source selected by Selector index.
522  // 10b 0 Source selected by Selector index.
523  // 10b 1 Zero.
524  // 11b 0 Zero.
525  // 11b 1 Source selected by Selector index.
526  if ((M2Z & 0x2) != 0 && MatchBit != (M2Z & 0x1)) {
527  ShuffleMask.push_back(SM_SentinelZero);
528  continue;
529  }
530 
531  int Index = i & ~(NumEltsPerLane - 1);
532  if (ScalarBits == 64)
533  Index += (Selector >> 1) & 0x1;
534  else
535  Index += Selector & 0x3;
536 
537  int Src = (Selector >> 2) & 0x1;
538  Index += Src * NumElts;
539  ShuffleMask.push_back(Index);
540  }
541 }
542 
543 void DecodeVPERMVMask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
544  SmallVectorImpl<int> &ShuffleMask) {
545  uint64_t EltMaskSize = RawMask.size() - 1;
546  for (int i = 0, e = RawMask.size(); i != e; ++i) {
547  if (UndefElts[i]) {
548  ShuffleMask.push_back(SM_SentinelUndef);
549  continue;
550  }
551  uint64_t M = RawMask[i];
552  M &= EltMaskSize;
553  ShuffleMask.push_back((int)M);
554  }
555 }
556 
557 void DecodeVPERMV3Mask(ArrayRef<uint64_t> RawMask, const APInt &UndefElts,
558  SmallVectorImpl<int> &ShuffleMask) {
559  uint64_t EltMaskSize = (RawMask.size() * 2) - 1;
560  for (int i = 0, e = RawMask.size(); i != e; ++i) {
561  if (UndefElts[i]) {
562  ShuffleMask.push_back(SM_SentinelUndef);
563  continue;
564  }
565  uint64_t M = RawMask[i];
566  M &= EltMaskSize;
567  ShuffleMask.push_back((int)M);
568  }
569 }
570 
571 } // namespace llvm
llvm::Check::Size
@ Size
Definition: FileCheck.h:73
i
i
Definition: README.txt:29
llvm
This class represents lattice values for constants.
Definition: AllocatorList.h:23
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::DecodePALIGNRMask
void DecodePALIGNRMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:123
llvm::DecodeEXTRQIMask
void DecodeEXTRQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A EXTRQ instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:398
X86ShuffleDecode.h
llvm::DecodeUNPCKHMask
void DecodeUNPCKHMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpckhps/unpckhpd and punpckh*.
Definition: X86ShuffleDecode.cpp:216
llvm::DecodeMOVSHDUPMask
void DecodeMOVSHDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:83
APInt.h
llvm::DecodePSHUFLWMask
void DecodePSHUFLWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshuflw.
Definition: X86ShuffleDecode.cpp:176
llvm::DecodeINSERTPSMask
void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a 128-bit INSERTPS instruction as a v4f32 shuffle mask.
Definition: X86ShuffleDecode.cpp:25
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:492
llvm::DecodeVPERMV3Mask
void DecodeVPERMV3Mask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMT2 W/D/Q/PS/PD mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:557
llvm::DecodeMOVSLDUPMask
void DecodeMOVSLDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:76
llvm::DecodePSHUFBMask
void DecodePSHUFBMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a PSHUFB mask from a raw array of constants such as from BUILD_VECTOR.
Definition: X86ShuffleDecode.cpp:291
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::DecodePSWAPMask
void DecodePSWAPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a PSWAPD 3DNow! instruction.
Definition: X86ShuffleDecode.cpp:190
int
Clang compiles this i1 i64 store i64 i64 store i64 i64 store i64 i64 store i64 align Which gets codegen d xmm0 movaps rbp movaps rbp movaps rbp movaps rbp rbp rbp rbp rbp It would be better to have movq s of instead of the movaps s LLVM produces ret int
Definition: README.txt:536
llvm::DecodePSRLDQMask
void DecodePSRLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:110
llvm::DecodeVPERMMask
void DecodeVPERMMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for VPERMQ/VPERMPD.
Definition: X86ShuffleDecode.cpp:362
l
This requires reassociating to forms of expressions that are already something that reassoc doesn t think about yet These two functions should generate the same code on big endian int * l
Definition: README.txt:100
llvm::SM_SentinelZero
@ SM_SentinelZero
Definition: X86ShuffleDecode.h:28
llvm::DecodePSHUFHWMask
void DecodePSHUFHWMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufhw.
Definition: X86ShuffleDecode.cpp:162
llvm::DecodeVPPERMMask
void DecodeVPPERMMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPPERM mask from a raw array of constants such as from BUILD_VECTOR.
Definition: X86ShuffleDecode.cpp:323
llvm::DecodeMOVDDUPMask
void DecodeMOVDDUPMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:90
llvm::DecodePSHUFMask
void DecodePSHUFMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for pshufd/pshufw/vpermilpd/vpermilps.
Definition: X86ShuffleDecode.cpp:146
llvm::DecodeSHUFPMask
void DecodeSHUFPMask(unsigned NumElts, unsigned ScalarBits, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for shufp*.
Definition: X86ShuffleDecode.cpp:199
x2
gcc mainline compiles it x2(%rip)
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::DecodeInsertElementMask
void DecodeInsertElementMask(unsigned NumElts, unsigned Idx, unsigned Len, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:48
s
multiplies can be turned into SHL s
Definition: README.txt:370
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:58
ArrayRef.h
llvm::DecodeVectorBroadcast
void DecodeVectorBroadcast(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a broadcast of the first element of a vector.
Definition: X86ShuffleDecode.cpp:248
llvm::DecodeSubVectorBroadcast
void DecodeSubVectorBroadcast(unsigned DstNumElts, unsigned SrcNumElts, SmallVectorImpl< int > &ShuffleMask)
Decodes a broadcast of a subvector to a larger vector type.
Definition: X86ShuffleDecode.cpp:253
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::DecodeVPERMILPMask
void DecodeVPERMILPMask(unsigned NumElts, unsigned ScalarBits, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMILPD/VPERMILPS variable mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:475
llvm::DecodeScalarMoveMask
void DecodeScalarMoveMask(unsigned NumElts, bool IsLoad, SmallVectorImpl< int > &ShuffleMask)
Decode a scalar float move instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:389
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:70
llvm::ArrayRef< uint64_t >
uint32_t
llvm::DecodeMOVHLPSMask
void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVHLPS instruction as a v2f64/v4f32 shuffle mask.
Definition: X86ShuffleDecode.cpp:59
llvm::DecodeVPERM2X128Mask
void DecodeVPERM2X128Mask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:279
j
return j(j<< 16)
llvm::DecodeVPERMVMask
void DecodeVPERMVMask(ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERM W/D/Q/PS/PD mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:543
llvm::DecodeMOVLHPSMask
void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl< int > &ShuffleMask)
Decode a MOVLHPS instruction as a v2f64/v4f32 shuffle mask.
Definition: X86ShuffleDecode.cpp:68
llvm::SmallVectorImpl::clear
void clear()
Definition: SmallVector.h:585
llvm::DecodeVPERMIL2PMask
void DecodeVPERMIL2PMask(unsigned NumElts, unsigned ScalarBits, unsigned M2Z, ArrayRef< uint64_t > RawMask, const APInt &UndefElts, SmallVectorImpl< int > &ShuffleMask)
Decode a VPERMIL2PD/VPERMIL2PS variable mask from a raw array of constants.
Definition: X86ShuffleDecode.cpp:497
llvm::DecodeINSERTQIMask
void DecodeINSERTQIMask(unsigned NumElts, unsigned EltSize, int Len, int Idx, SmallVectorImpl< int > &ShuffleMask)
Decode a SSE4A INSERTQ instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:435
SmallVector.h
llvm::DecodePSLLDQMask
void DecodePSLLDQMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:98
llvm::ArrayRef::size
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
llvm::tgtok::Bit
@ Bit
Definition: TGLexer.h:50
llvm::SmallVectorImpl< int >
llvm::DecodeZeroMoveLowMask
void DecodeZeroMoveLowMask(unsigned NumElts, SmallVectorImpl< int > &ShuffleMask)
Decode a move lower and zero upper instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:383
h
the multiplication has a latency of four as opposed to two cycles for the movl lea variant It appears gcc place string data with linkonce linkage in section coalesced instead of section coalesced Take a look at darwin h
Definition: README.txt:261
llvm::SM_SentinelUndef
@ SM_SentinelUndef
Definition: X86ShuffleDecode.h:28
llvm::DecodeVALIGNMask
void DecodeVALIGNMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Definition: X86ShuffleDecode.cpp:137
llvm::DecodeBLENDMask
void DecodeBLENDMask(unsigned NumElts, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a BLEND immediate mask into a shuffle mask.
Definition: X86ShuffleDecode.cpp:313
llvm::decodeVSHUF64x2FamilyMask
void decodeVSHUF64x2FamilyMask(unsigned NumElts, unsigned ScalarSize, unsigned Imm, SmallVectorImpl< int > &ShuffleMask)
Decode a shuffle packed values at 128-bit granularity (SHUFF32x4/SHUFF64x2/SHUFI32x4/SHUFI64x2) immed...
Definition: X86ShuffleDecode.cpp:262
llvm::DecodeUNPCKLMask
void DecodeUNPCKLMask(unsigned NumElts, unsigned ScalarBits, SmallVectorImpl< int > &ShuffleMask)
Decodes the shuffle masks for unpcklps/unpcklpd and punpckl*.
Definition: X86ShuffleDecode.cpp:232
llvm::DecodeZeroExtendMask
void DecodeZeroExtendMask(unsigned SrcScalarBits, unsigned DstScalarBits, unsigned NumDstElts, bool IsAnyExtend, SmallVectorImpl< int > &ShuffleMask)
Decode a zero extension instruction as a shuffle mask.
Definition: X86ShuffleDecode.cpp:369