clang  9.0.0
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This contains code to emit Builtin calls as LLVM code.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "CGCXXABI.h"
14 #include "CGObjCRuntime.h"
15 #include "CGOpenCLRuntime.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "ConstantEmitter.h"
20 #include "PatternInit.h"
21 #include "TargetInfo.h"
22 #include "clang/AST/ASTContext.h"
23 #include "clang/AST/Decl.h"
24 #include "clang/AST/OSLog.h"
26 #include "clang/Basic/TargetInfo.h"
28 #include "llvm/ADT/SmallPtrSet.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/IR/DataLayout.h"
31 #include "llvm/IR/InlineAsm.h"
32 #include "llvm/IR/Intrinsics.h"
33 #include "llvm/IR/MDBuilder.h"
34 #include "llvm/Support/ConvertUTF.h"
35 #include "llvm/Support/ScopedPrinter.h"
36 #include "llvm/Support/TargetParser.h"
37 #include <sstream>
38 
39 using namespace clang;
40 using namespace CodeGen;
41 using namespace llvm;
42 
43 static
44 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
45  return std::min(High, std::max(Low, Value));
46 }
47 
48 static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes) {
49  ConstantInt *Byte;
50  switch (CGF.getLangOpts().getTrivialAutoVarInit()) {
52  // Nothing to initialize.
53  return;
55  Byte = CGF.Builder.getInt8(0x00);
56  break;
58  llvm::Type *Int8 = llvm::IntegerType::getInt8Ty(CGF.CGM.getLLVMContext());
59  Byte = llvm::dyn_cast<llvm::ConstantInt>(
60  initializationPatternFor(CGF.CGM, Int8));
61  break;
62  }
63  }
64  CGF.Builder.CreateMemSet(AI, Byte, Size, AlignmentInBytes);
65 }
66 
67 /// getBuiltinLibFunction - Given a builtin id for a function like
68 /// "__builtin_fabsf", return a Function* for "fabsf".
70  unsigned BuiltinID) {
71  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
72 
73  // Get the name, skip over the __builtin_ prefix (if necessary).
74  StringRef Name;
75  GlobalDecl D(FD);
76 
77  // If the builtin has been declared explicitly with an assembler label,
78  // use the mangled name. This differs from the plain label on platforms
79  // that prefix labels.
80  if (FD->hasAttr<AsmLabelAttr>())
81  Name = getMangledName(D);
82  else
83  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
84 
85  llvm::FunctionType *Ty =
86  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
87 
88  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
89 }
90 
91 /// Emit the conversions required to turn the given value into an
92 /// integer of the given size.
94  QualType T, llvm::IntegerType *IntType) {
95  V = CGF.EmitToMemory(V, T);
96 
97  if (V->getType()->isPointerTy())
98  return CGF.Builder.CreatePtrToInt(V, IntType);
99 
100  assert(V->getType() == IntType);
101  return V;
102 }
103 
105  QualType T, llvm::Type *ResultType) {
106  V = CGF.EmitFromMemory(V, T);
107 
108  if (ResultType->isPointerTy())
109  return CGF.Builder.CreateIntToPtr(V, ResultType);
110 
111  assert(V->getType() == ResultType);
112  return V;
113 }
114 
115 /// Utility to insert an atomic instruction based on Intrinsic::ID
116 /// and the expression node.
118  CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E,
119  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
120  QualType T = E->getType();
121  assert(E->getArg(0)->getType()->isPointerType());
122  assert(CGF.getContext().hasSameUnqualifiedType(T,
123  E->getArg(0)->getType()->getPointeeType()));
124  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
125 
126  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
127  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
128 
129  llvm::IntegerType *IntType =
130  llvm::IntegerType::get(CGF.getLLVMContext(),
131  CGF.getContext().getTypeSize(T));
132  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
133 
134  llvm::Value *Args[2];
135  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
136  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
137  llvm::Type *ValueType = Args[1]->getType();
138  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
139 
140  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
141  Kind, Args[0], Args[1], Ordering);
142  return EmitFromInt(CGF, Result, T, ValueType);
143 }
144 
146  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
147  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
148 
149  // Convert the type of the pointer to a pointer to the stored type.
150  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
151  Value *BC = CGF.Builder.CreateBitCast(
152  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
153  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
154  LV.setNontemporal(true);
155  CGF.EmitStoreOfScalar(Val, LV, false);
156  return nullptr;
157 }
158 
160  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
161 
162  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
163  LV.setNontemporal(true);
164  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
165 }
166 
168  llvm::AtomicRMWInst::BinOp Kind,
169  const CallExpr *E) {
170  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
171 }
172 
173 /// Utility to insert an atomic instruction based Intrinsic::ID and
174 /// the expression node, where the return value is the result of the
175 /// operation.
177  llvm::AtomicRMWInst::BinOp Kind,
178  const CallExpr *E,
179  Instruction::BinaryOps Op,
180  bool Invert = false) {
181  QualType T = E->getType();
182  assert(E->getArg(0)->getType()->isPointerType());
183  assert(CGF.getContext().hasSameUnqualifiedType(T,
184  E->getArg(0)->getType()->getPointeeType()));
185  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
186 
187  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
188  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
189 
190  llvm::IntegerType *IntType =
191  llvm::IntegerType::get(CGF.getLLVMContext(),
192  CGF.getContext().getTypeSize(T));
193  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
194 
195  llvm::Value *Args[2];
196  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
197  llvm::Type *ValueType = Args[1]->getType();
198  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
199  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
200 
201  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
202  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
203  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
204  if (Invert)
205  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
206  llvm::ConstantInt::get(IntType, -1));
207  Result = EmitFromInt(CGF, Result, T, ValueType);
208  return RValue::get(Result);
209 }
210 
211 /// Utility to insert an atomic cmpxchg instruction.
212 ///
213 /// @param CGF The current codegen function.
214 /// @param E Builtin call expression to convert to cmpxchg.
215 /// arg0 - address to operate on
216 /// arg1 - value to compare with
217 /// arg2 - new value
218 /// @param ReturnBool Specifies whether to return success flag of
219 /// cmpxchg result or the old value.
220 ///
221 /// @returns result of cmpxchg, according to ReturnBool
222 ///
223 /// Note: In order to lower Microsoft's _InterlockedCompareExchange* intrinsics
224 /// invoke the function EmitAtomicCmpXchgForMSIntrin.
226  bool ReturnBool) {
227  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
228  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
229  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
230 
231  llvm::IntegerType *IntType = llvm::IntegerType::get(
232  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
233  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
234 
235  Value *Args[3];
236  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
237  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
238  llvm::Type *ValueType = Args[1]->getType();
239  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
240  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
241 
242  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
243  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
244  llvm::AtomicOrdering::SequentiallyConsistent);
245  if (ReturnBool)
246  // Extract boolean success flag and zext it to int.
247  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
248  CGF.ConvertType(E->getType()));
249  else
250  // Extract old value and emit it using the same type as compare value.
251  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
252  ValueType);
253 }
254 
255 /// This function should be invoked to emit atomic cmpxchg for Microsoft's
256 /// _InterlockedCompareExchange* intrinsics which have the following signature:
257 /// T _InterlockedCompareExchange(T volatile *Destination,
258 /// T Exchange,
259 /// T Comparand);
260 ///
261 /// Whereas the llvm 'cmpxchg' instruction has the following syntax:
262 /// cmpxchg *Destination, Comparand, Exchange.
263 /// So we need to swap Comparand and Exchange when invoking
264 /// CreateAtomicCmpXchg. That is the reason we could not use the above utility
265 /// function MakeAtomicCmpXchgValue since it expects the arguments to be
266 /// already swapped.
267 
268 static
270  AtomicOrdering SuccessOrdering = AtomicOrdering::SequentiallyConsistent) {
271  assert(E->getArg(0)->getType()->isPointerType());
272  assert(CGF.getContext().hasSameUnqualifiedType(
273  E->getType(), E->getArg(0)->getType()->getPointeeType()));
274  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
275  E->getArg(1)->getType()));
276  assert(CGF.getContext().hasSameUnqualifiedType(E->getType(),
277  E->getArg(2)->getType()));
278 
279  auto *Destination = CGF.EmitScalarExpr(E->getArg(0));
280  auto *Comparand = CGF.EmitScalarExpr(E->getArg(2));
281  auto *Exchange = CGF.EmitScalarExpr(E->getArg(1));
282 
283  // For Release ordering, the failure ordering should be Monotonic.
284  auto FailureOrdering = SuccessOrdering == AtomicOrdering::Release ?
285  AtomicOrdering::Monotonic :
286  SuccessOrdering;
287 
288  auto *Result = CGF.Builder.CreateAtomicCmpXchg(
289  Destination, Comparand, Exchange,
290  SuccessOrdering, FailureOrdering);
291  Result->setVolatile(true);
292  return CGF.Builder.CreateExtractValue(Result, 0);
293 }
294 
296  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
297  assert(E->getArg(0)->getType()->isPointerType());
298 
299  auto *IntTy = CGF.ConvertType(E->getType());
300  auto *Result = CGF.Builder.CreateAtomicRMW(
301  AtomicRMWInst::Add,
302  CGF.EmitScalarExpr(E->getArg(0)),
303  ConstantInt::get(IntTy, 1),
304  Ordering);
305  return CGF.Builder.CreateAdd(Result, ConstantInt::get(IntTy, 1));
306 }
307 
309  AtomicOrdering Ordering = AtomicOrdering::SequentiallyConsistent) {
310  assert(E->getArg(0)->getType()->isPointerType());
311 
312  auto *IntTy = CGF.ConvertType(E->getType());
313  auto *Result = CGF.Builder.CreateAtomicRMW(
314  AtomicRMWInst::Sub,
315  CGF.EmitScalarExpr(E->getArg(0)),
316  ConstantInt::get(IntTy, 1),
317  Ordering);
318  return CGF.Builder.CreateSub(Result, ConstantInt::get(IntTy, 1));
319 }
320 
321 // Build a plain volatile load.
323  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
324  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
325  CharUnits LoadSize = CGF.getContext().getTypeSizeInChars(ElTy);
326  llvm::Type *ITy =
327  llvm::IntegerType::get(CGF.getLLVMContext(), LoadSize.getQuantity() * 8);
328  Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
329  llvm::LoadInst *Load = CGF.Builder.CreateAlignedLoad(Ptr, LoadSize);
330  Load->setVolatile(true);
331  return Load;
332 }
333 
334 // Build a plain volatile store.
336  Value *Ptr = CGF.EmitScalarExpr(E->getArg(0));
337  Value *Value = CGF.EmitScalarExpr(E->getArg(1));
338  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
339  CharUnits StoreSize = CGF.getContext().getTypeSizeInChars(ElTy);
340  llvm::Type *ITy =
341  llvm::IntegerType::get(CGF.getLLVMContext(), StoreSize.getQuantity() * 8);
342  Ptr = CGF.Builder.CreateBitCast(Ptr, ITy->getPointerTo());
343  llvm::StoreInst *Store =
344  CGF.Builder.CreateAlignedStore(Value, Ptr, StoreSize);
345  Store->setVolatile(true);
346  return Store;
347 }
348 
349 // Emit a simple mangled intrinsic that has 1 argument and a return type
350 // matching the argument type.
352  const CallExpr *E,
353  unsigned IntrinsicID) {
354  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
355 
356  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
357  return CGF.Builder.CreateCall(F, Src0);
358 }
359 
360 // Emit an intrinsic that has 2 operands of the same type as its result.
362  const CallExpr *E,
363  unsigned IntrinsicID) {
364  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
365  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
366 
367  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
368  return CGF.Builder.CreateCall(F, { Src0, Src1 });
369 }
370 
371 // Emit an intrinsic that has 3 operands of the same type as its result.
373  const CallExpr *E,
374  unsigned IntrinsicID) {
375  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
376  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
377  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
378 
379  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
380  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
381 }
382 
383 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
385  const CallExpr *E,
386  unsigned IntrinsicID) {
387  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
388  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
389 
390  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
391  return CGF.Builder.CreateCall(F, {Src0, Src1});
392 }
393 
394 // Emit an intrinsic that has overloaded integer result and fp operand.
396  const CallExpr *E,
397  unsigned IntrinsicID) {
398  llvm::Type *ResultType = CGF.ConvertType(E->getType());
399  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
400 
401  Function *F = CGF.CGM.getIntrinsic(IntrinsicID,
402  {ResultType, Src0->getType()});
403  return CGF.Builder.CreateCall(F, Src0);
404 }
405 
406 /// EmitFAbs - Emit a call to @llvm.fabs().
408  Function *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
409  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
410  Call->setDoesNotAccessMemory();
411  return Call;
412 }
413 
414 /// Emit the computation of the sign bit for a floating point value. Returns
415 /// the i1 sign bit value.
417  LLVMContext &C = CGF.CGM.getLLVMContext();
418 
419  llvm::Type *Ty = V->getType();
420  int Width = Ty->getPrimitiveSizeInBits();
421  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
422  V = CGF.Builder.CreateBitCast(V, IntTy);
423  if (Ty->isPPC_FP128Ty()) {
424  // We want the sign bit of the higher-order double. The bitcast we just
425  // did works as if the double-double was stored to memory and then
426  // read as an i128. The "store" will put the higher-order double in the
427  // lower address in both little- and big-Endian modes, but the "load"
428  // will treat those bits as a different part of the i128: the low bits in
429  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
430  // we need to shift the high bits down to the low before truncating.
431  Width >>= 1;
432  if (CGF.getTarget().isBigEndian()) {
433  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
434  V = CGF.Builder.CreateLShr(V, ShiftCst);
435  }
436  // We are truncating value in order to extract the higher-order
437  // double, which we will be using to extract the sign from.
438  IntTy = llvm::IntegerType::get(C, Width);
439  V = CGF.Builder.CreateTrunc(V, IntTy);
440  }
441  Value *Zero = llvm::Constant::getNullValue(IntTy);
442  return CGF.Builder.CreateICmpSLT(V, Zero);
443 }
444 
446  const CallExpr *E, llvm::Constant *calleeValue) {
447  CGCallee callee = CGCallee::forDirect(calleeValue, GlobalDecl(FD));
448  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
449 }
450 
451 /// Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
452 /// depending on IntrinsicID.
453 ///
454 /// \arg CGF The current codegen function.
455 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
456 /// \arg X The first argument to the llvm.*.with.overflow.*.
457 /// \arg Y The second argument to the llvm.*.with.overflow.*.
458 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
459 /// \returns The result (i.e. sum/product) returned by the intrinsic.
461  const llvm::Intrinsic::ID IntrinsicID,
463  llvm::Value *&Carry) {
464  // Make sure we have integers of the same width.
465  assert(X->getType() == Y->getType() &&
466  "Arguments must be the same type. (Did you forget to make sure both "
467  "arguments have the same integer width?)");
468 
469  Function *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
470  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
471  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
472  return CGF.Builder.CreateExtractValue(Tmp, 0);
473 }
474 
476  unsigned IntrinsicID,
477  int low, int high) {
478  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
479  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
480  Function *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
481  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
482  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
483  return Call;
484 }
485 
486 namespace {
487  struct WidthAndSignedness {
488  unsigned Width;
489  bool Signed;
490  };
491 }
492 
493 static WidthAndSignedness
495  const clang::QualType Type) {
496  assert(Type->isIntegerType() && "Given type is not an integer.");
497  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
498  bool Signed = Type->isSignedIntegerType();
499  return {Width, Signed};
500 }
501 
502 // Given one or more integer types, this function produces an integer type that
503 // encompasses them: any value in one of the given types could be expressed in
504 // the encompassing type.
505 static struct WidthAndSignedness
506 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
507  assert(Types.size() > 0 && "Empty list of types.");
508 
509  // If any of the given types is signed, we must return a signed type.
510  bool Signed = false;
511  for (const auto &Type : Types) {
512  Signed |= Type.Signed;
513  }
514 
515  // The encompassing type must have a width greater than or equal to the width
516  // of the specified types. Additionally, if the encompassing type is signed,
517  // its width must be strictly greater than the width of any unsigned types
518  // given.
519  unsigned Width = 0;
520  for (const auto &Type : Types) {
521  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
522  if (Width < MinWidth) {
523  Width = MinWidth;
524  }
525  }
526 
527  return {Width, Signed};
528 }
529 
530 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
531  llvm::Type *DestType = Int8PtrTy;
532  if (ArgValue->getType() != DestType)
533  ArgValue =
534  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
535 
536  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
537  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
538 }
539 
540 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
541 /// __builtin_object_size(p, @p To) is correct
542 static bool areBOSTypesCompatible(int From, int To) {
543  // Note: Our __builtin_object_size implementation currently treats Type=0 and
544  // Type=2 identically. Encoding this implementation detail here may make
545  // improving __builtin_object_size difficult in the future, so it's omitted.
546  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
547 }
548 
549 static llvm::Value *
550 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
551  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
552 }
553 
554 llvm::Value *
555 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
556  llvm::IntegerType *ResType,
557  llvm::Value *EmittedE,
558  bool IsDynamic) {
559  uint64_t ObjectSize;
560  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
561  return emitBuiltinObjectSize(E, Type, ResType, EmittedE, IsDynamic);
562  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
563 }
564 
565 /// Returns a Value corresponding to the size of the given expression.
566 /// This Value may be either of the following:
567 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
568 /// it)
569 /// - A call to the @llvm.objectsize intrinsic
570 ///
571 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
572 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
573 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
574 llvm::Value *
575 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
576  llvm::IntegerType *ResType,
577  llvm::Value *EmittedE, bool IsDynamic) {
578  // We need to reference an argument if the pointer is a parameter with the
579  // pass_object_size attribute.
580  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
581  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
582  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
583  if (Param != nullptr && PS != nullptr &&
584  areBOSTypesCompatible(PS->getType(), Type)) {
585  auto Iter = SizeArguments.find(Param);
586  assert(Iter != SizeArguments.end());
587 
588  const ImplicitParamDecl *D = Iter->second;
589  auto DIter = LocalDeclMap.find(D);
590  assert(DIter != LocalDeclMap.end());
591 
592  return EmitLoadOfScalar(DIter->second, /*Volatile=*/false,
593  getContext().getSizeType(), E->getBeginLoc());
594  }
595  }
596 
597  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
598  // evaluate E for side-effects. In either case, we shouldn't lower to
599  // @llvm.objectsize.
600  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
601  return getDefaultBuiltinObjectSizeResult(Type, ResType);
602 
603  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
604  assert(Ptr->getType()->isPointerTy() &&
605  "Non-pointer passed to __builtin_object_size?");
606 
607  Function *F =
608  CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
609 
610  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
611  Value *Min = Builder.getInt1((Type & 2) != 0);
612  // For GCC compatibility, __builtin_object_size treat NULL as unknown size.
613  Value *NullIsUnknown = Builder.getTrue();
614  Value *Dynamic = Builder.getInt1(IsDynamic);
615  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown, Dynamic});
616 }
617 
618 namespace {
619 /// A struct to generically describe a bit test intrinsic.
620 struct BitTest {
621  enum ActionKind : uint8_t { TestOnly, Complement, Reset, Set };
622  enum InterlockingKind : uint8_t {
623  Unlocked,
624  Sequential,
625  Acquire,
626  Release,
627  NoFence
628  };
629 
630  ActionKind Action;
631  InterlockingKind Interlocking;
632  bool Is64Bit;
633 
634  static BitTest decodeBitTestBuiltin(unsigned BuiltinID);
635 };
636 } // namespace
637 
638 BitTest BitTest::decodeBitTestBuiltin(unsigned BuiltinID) {
639  switch (BuiltinID) {
640  // Main portable variants.
641  case Builtin::BI_bittest:
642  return {TestOnly, Unlocked, false};
643  case Builtin::BI_bittestandcomplement:
644  return {Complement, Unlocked, false};
645  case Builtin::BI_bittestandreset:
646  return {Reset, Unlocked, false};
647  case Builtin::BI_bittestandset:
648  return {Set, Unlocked, false};
649  case Builtin::BI_interlockedbittestandreset:
650  return {Reset, Sequential, false};
651  case Builtin::BI_interlockedbittestandset:
652  return {Set, Sequential, false};
653 
654  // X86-specific 64-bit variants.
655  case Builtin::BI_bittest64:
656  return {TestOnly, Unlocked, true};
657  case Builtin::BI_bittestandcomplement64:
658  return {Complement, Unlocked, true};
659  case Builtin::BI_bittestandreset64:
660  return {Reset, Unlocked, true};
661  case Builtin::BI_bittestandset64:
662  return {Set, Unlocked, true};
663  case Builtin::BI_interlockedbittestandreset64:
664  return {Reset, Sequential, true};
665  case Builtin::BI_interlockedbittestandset64:
666  return {Set, Sequential, true};
667 
668  // ARM/AArch64-specific ordering variants.
669  case Builtin::BI_interlockedbittestandset_acq:
670  return {Set, Acquire, false};
671  case Builtin::BI_interlockedbittestandset_rel:
672  return {Set, Release, false};
673  case Builtin::BI_interlockedbittestandset_nf:
674  return {Set, NoFence, false};
675  case Builtin::BI_interlockedbittestandreset_acq:
676  return {Reset, Acquire, false};
677  case Builtin::BI_interlockedbittestandreset_rel:
678  return {Reset, Release, false};
679  case Builtin::BI_interlockedbittestandreset_nf:
680  return {Reset, NoFence, false};
681  }
682  llvm_unreachable("expected only bittest intrinsics");
683 }
684 
685 static char bitActionToX86BTCode(BitTest::ActionKind A) {
686  switch (A) {
687  case BitTest::TestOnly: return '\0';
688  case BitTest::Complement: return 'c';
689  case BitTest::Reset: return 'r';
690  case BitTest::Set: return 's';
691  }
692  llvm_unreachable("invalid action");
693 }
694 
696  BitTest BT,
697  const CallExpr *E, Value *BitBase,
698  Value *BitPos) {
699  char Action = bitActionToX86BTCode(BT.Action);
700  char SizeSuffix = BT.Is64Bit ? 'q' : 'l';
701 
702  // Build the assembly.
703  SmallString<64> Asm;
704  raw_svector_ostream AsmOS(Asm);
705  if (BT.Interlocking != BitTest::Unlocked)
706  AsmOS << "lock ";
707  AsmOS << "bt";
708  if (Action)
709  AsmOS << Action;
710  AsmOS << SizeSuffix << " $2, ($1)\n\tsetc ${0:b}";
711 
712  // Build the constraints. FIXME: We should support immediates when possible.
713  std::string Constraints = "=r,r,r,~{cc},~{flags},~{fpsr}";
714  llvm::IntegerType *IntType = llvm::IntegerType::get(
715  CGF.getLLVMContext(),
716  CGF.getContext().getTypeSize(E->getArg(1)->getType()));
717  llvm::Type *IntPtrType = IntType->getPointerTo();
718  llvm::FunctionType *FTy =
719  llvm::FunctionType::get(CGF.Int8Ty, {IntPtrType, IntType}, false);
720 
721  llvm::InlineAsm *IA =
722  llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
723  return CGF.Builder.CreateCall(IA, {BitBase, BitPos});
724 }
725 
726 static llvm::AtomicOrdering
727 getBitTestAtomicOrdering(BitTest::InterlockingKind I) {
728  switch (I) {
729  case BitTest::Unlocked: return llvm::AtomicOrdering::NotAtomic;
730  case BitTest::Sequential: return llvm::AtomicOrdering::SequentiallyConsistent;
731  case BitTest::Acquire: return llvm::AtomicOrdering::Acquire;
732  case BitTest::Release: return llvm::AtomicOrdering::Release;
733  case BitTest::NoFence: return llvm::AtomicOrdering::Monotonic;
734  }
735  llvm_unreachable("invalid interlocking");
736 }
737 
738 /// Emit a _bittest* intrinsic. These intrinsics take a pointer to an array of
739 /// bits and a bit position and read and optionally modify the bit at that
740 /// position. The position index can be arbitrarily large, i.e. it can be larger
741 /// than 31 or 63, so we need an indexed load in the general case.
743  unsigned BuiltinID,
744  const CallExpr *E) {
745  Value *BitBase = CGF.EmitScalarExpr(E->getArg(0));
746  Value *BitPos = CGF.EmitScalarExpr(E->getArg(1));
747 
748  BitTest BT = BitTest::decodeBitTestBuiltin(BuiltinID);
749 
750  // X86 has special BT, BTC, BTR, and BTS instructions that handle the array
751  // indexing operation internally. Use them if possible.
752  llvm::Triple::ArchType Arch = CGF.getTarget().getTriple().getArch();
753  if (Arch == llvm::Triple::x86 || Arch == llvm::Triple::x86_64)
754  return EmitX86BitTestIntrinsic(CGF, BT, E, BitBase, BitPos);
755 
756  // Otherwise, use generic code to load one byte and test the bit. Use all but
757  // the bottom three bits as the array index, and the bottom three bits to form
758  // a mask.
759  // Bit = BitBaseI8[BitPos >> 3] & (1 << (BitPos & 0x7)) != 0;
760  Value *ByteIndex = CGF.Builder.CreateAShr(
761  BitPos, llvm::ConstantInt::get(BitPos->getType(), 3), "bittest.byteidx");
762  Value *BitBaseI8 = CGF.Builder.CreatePointerCast(BitBase, CGF.Int8PtrTy);
763  Address ByteAddr(CGF.Builder.CreateInBoundsGEP(CGF.Int8Ty, BitBaseI8,
764  ByteIndex, "bittest.byteaddr"),
765  CharUnits::One());
766  Value *PosLow =
767  CGF.Builder.CreateAnd(CGF.Builder.CreateTrunc(BitPos, CGF.Int8Ty),
768  llvm::ConstantInt::get(CGF.Int8Ty, 0x7));
769 
770  // The updating instructions will need a mask.
771  Value *Mask = nullptr;
772  if (BT.Action != BitTest::TestOnly) {
773  Mask = CGF.Builder.CreateShl(llvm::ConstantInt::get(CGF.Int8Ty, 1), PosLow,
774  "bittest.mask");
775  }
776 
777  // Check the action and ordering of the interlocked intrinsics.
778  llvm::AtomicOrdering Ordering = getBitTestAtomicOrdering(BT.Interlocking);
779 
780  Value *OldByte = nullptr;
781  if (Ordering != llvm::AtomicOrdering::NotAtomic) {
782  // Emit a combined atomicrmw load/store operation for the interlocked
783  // intrinsics.
784  llvm::AtomicRMWInst::BinOp RMWOp = llvm::AtomicRMWInst::Or;
785  if (BT.Action == BitTest::Reset) {
786  Mask = CGF.Builder.CreateNot(Mask);
787  RMWOp = llvm::AtomicRMWInst::And;
788  }
789  OldByte = CGF.Builder.CreateAtomicRMW(RMWOp, ByteAddr.getPointer(), Mask,
790  Ordering);
791  } else {
792  // Emit a plain load for the non-interlocked intrinsics.
793  OldByte = CGF.Builder.CreateLoad(ByteAddr, "bittest.byte");
794  Value *NewByte = nullptr;
795  switch (BT.Action) {
796  case BitTest::TestOnly:
797  // Don't store anything.
798  break;
799  case BitTest::Complement:
800  NewByte = CGF.Builder.CreateXor(OldByte, Mask);
801  break;
802  case BitTest::Reset:
803  NewByte = CGF.Builder.CreateAnd(OldByte, CGF.Builder.CreateNot(Mask));
804  break;
805  case BitTest::Set:
806  NewByte = CGF.Builder.CreateOr(OldByte, Mask);
807  break;
808  }
809  if (NewByte)
810  CGF.Builder.CreateStore(NewByte, ByteAddr);
811  }
812 
813  // However we loaded the old byte, either by plain load or atomicrmw, shift
814  // the bit into the low position and mask it to 0 or 1.
815  Value *ShiftedByte = CGF.Builder.CreateLShr(OldByte, PosLow, "bittest.shr");
816  return CGF.Builder.CreateAnd(
817  ShiftedByte, llvm::ConstantInt::get(CGF.Int8Ty, 1), "bittest.res");
818 }
819 
820 namespace {
821 enum class MSVCSetJmpKind {
822  _setjmpex,
823  _setjmp3,
824  _setjmp
825 };
826 }
827 
828 /// MSVC handles setjmp a bit differently on different platforms. On every
829 /// architecture except 32-bit x86, the frame address is passed. On x86, extra
830 /// parameters can be passed as variadic arguments, but we always pass none.
832  const CallExpr *E) {
833  llvm::Value *Arg1 = nullptr;
834  llvm::Type *Arg1Ty = nullptr;
835  StringRef Name;
836  bool IsVarArg = false;
837  if (SJKind == MSVCSetJmpKind::_setjmp3) {
838  Name = "_setjmp3";
839  Arg1Ty = CGF.Int32Ty;
840  Arg1 = llvm::ConstantInt::get(CGF.IntTy, 0);
841  IsVarArg = true;
842  } else {
843  Name = SJKind == MSVCSetJmpKind::_setjmp ? "_setjmp" : "_setjmpex";
844  Arg1Ty = CGF.Int8PtrTy;
845  if (CGF.getTarget().getTriple().getArch() == llvm::Triple::aarch64) {
846  Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::sponentry));
847  } else
848  Arg1 = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(Intrinsic::frameaddress),
849  llvm::ConstantInt::get(CGF.Int32Ty, 0));
850  }
851 
852  // Mark the call site and declaration with ReturnsTwice.
853  llvm::Type *ArgTypes[2] = {CGF.Int8PtrTy, Arg1Ty};
854  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
855  CGF.getLLVMContext(), llvm::AttributeList::FunctionIndex,
856  llvm::Attribute::ReturnsTwice);
857  llvm::FunctionCallee SetJmpFn = CGF.CGM.CreateRuntimeFunction(
858  llvm::FunctionType::get(CGF.IntTy, ArgTypes, IsVarArg), Name,
859  ReturnsTwiceAttr, /*Local=*/true);
860 
861  llvm::Value *Buf = CGF.Builder.CreateBitOrPointerCast(
862  CGF.EmitScalarExpr(E->getArg(0)), CGF.Int8PtrTy);
863  llvm::Value *Args[] = {Buf, Arg1};
864  llvm::CallBase *CB = CGF.EmitRuntimeCallOrInvoke(SetJmpFn, Args);
865  CB->setAttributes(ReturnsTwiceAttr);
866  return RValue::get(CB);
867 }
868 
869 // Many of MSVC builtins are on x64, ARM and AArch64; to avoid repeating code,
870 // we handle them here.
872  _BitScanForward,
873  _BitScanReverse,
874  _InterlockedAnd,
875  _InterlockedDecrement,
876  _InterlockedExchange,
877  _InterlockedExchangeAdd,
878  _InterlockedExchangeSub,
879  _InterlockedIncrement,
880  _InterlockedOr,
881  _InterlockedXor,
882  _InterlockedExchangeAdd_acq,
883  _InterlockedExchangeAdd_rel,
884  _InterlockedExchangeAdd_nf,
885  _InterlockedExchange_acq,
886  _InterlockedExchange_rel,
887  _InterlockedExchange_nf,
888  _InterlockedCompareExchange_acq,
889  _InterlockedCompareExchange_rel,
890  _InterlockedCompareExchange_nf,
891  _InterlockedOr_acq,
892  _InterlockedOr_rel,
893  _InterlockedOr_nf,
894  _InterlockedXor_acq,
895  _InterlockedXor_rel,
896  _InterlockedXor_nf,
897  _InterlockedAnd_acq,
898  _InterlockedAnd_rel,
899  _InterlockedAnd_nf,
900  _InterlockedIncrement_acq,
901  _InterlockedIncrement_rel,
902  _InterlockedIncrement_nf,
903  _InterlockedDecrement_acq,
904  _InterlockedDecrement_rel,
905  _InterlockedDecrement_nf,
906  __fastfail,
907 };
908 
910  const CallExpr *E) {
911  switch (BuiltinID) {
912  case MSVCIntrin::_BitScanForward:
913  case MSVCIntrin::_BitScanReverse: {
914  Value *ArgValue = EmitScalarExpr(E->getArg(1));
915 
916  llvm::Type *ArgType = ArgValue->getType();
917  llvm::Type *IndexType =
918  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
919  llvm::Type *ResultType = ConvertType(E->getType());
920 
921  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
922  Value *ResZero = llvm::Constant::getNullValue(ResultType);
923  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
924 
925  BasicBlock *Begin = Builder.GetInsertBlock();
926  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
927  Builder.SetInsertPoint(End);
928  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
929 
930  Builder.SetInsertPoint(Begin);
931  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
932  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
933  Builder.CreateCondBr(IsZero, End, NotZero);
934  Result->addIncoming(ResZero, Begin);
935 
936  Builder.SetInsertPoint(NotZero);
937  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
938 
939  if (BuiltinID == MSVCIntrin::_BitScanForward) {
940  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
941  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
942  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
943  Builder.CreateStore(ZeroCount, IndexAddress, false);
944  } else {
945  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
946  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
947 
948  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
949  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
950  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
951  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
952  Builder.CreateStore(Index, IndexAddress, false);
953  }
954  Builder.CreateBr(End);
955  Result->addIncoming(ResOne, NotZero);
956 
957  Builder.SetInsertPoint(End);
958  return Result;
959  }
960  case MSVCIntrin::_InterlockedAnd:
961  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
962  case MSVCIntrin::_InterlockedExchange:
963  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
964  case MSVCIntrin::_InterlockedExchangeAdd:
965  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
966  case MSVCIntrin::_InterlockedExchangeSub:
967  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
968  case MSVCIntrin::_InterlockedOr:
969  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
970  case MSVCIntrin::_InterlockedXor:
971  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
972  case MSVCIntrin::_InterlockedExchangeAdd_acq:
973  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
974  AtomicOrdering::Acquire);
975  case MSVCIntrin::_InterlockedExchangeAdd_rel:
976  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
977  AtomicOrdering::Release);
978  case MSVCIntrin::_InterlockedExchangeAdd_nf:
979  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E,
980  AtomicOrdering::Monotonic);
981  case MSVCIntrin::_InterlockedExchange_acq:
982  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
983  AtomicOrdering::Acquire);
984  case MSVCIntrin::_InterlockedExchange_rel:
985  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
986  AtomicOrdering::Release);
987  case MSVCIntrin::_InterlockedExchange_nf:
988  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E,
989  AtomicOrdering::Monotonic);
990  case MSVCIntrin::_InterlockedCompareExchange_acq:
991  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Acquire);
992  case MSVCIntrin::_InterlockedCompareExchange_rel:
993  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Release);
994  case MSVCIntrin::_InterlockedCompareExchange_nf:
995  return EmitAtomicCmpXchgForMSIntrin(*this, E, AtomicOrdering::Monotonic);
996  case MSVCIntrin::_InterlockedOr_acq:
997  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
998  AtomicOrdering::Acquire);
999  case MSVCIntrin::_InterlockedOr_rel:
1000  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1001  AtomicOrdering::Release);
1002  case MSVCIntrin::_InterlockedOr_nf:
1003  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E,
1004  AtomicOrdering::Monotonic);
1005  case MSVCIntrin::_InterlockedXor_acq:
1006  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1007  AtomicOrdering::Acquire);
1008  case MSVCIntrin::_InterlockedXor_rel:
1009  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1010  AtomicOrdering::Release);
1011  case MSVCIntrin::_InterlockedXor_nf:
1012  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E,
1013  AtomicOrdering::Monotonic);
1014  case MSVCIntrin::_InterlockedAnd_acq:
1015  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1016  AtomicOrdering::Acquire);
1017  case MSVCIntrin::_InterlockedAnd_rel:
1018  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1019  AtomicOrdering::Release);
1020  case MSVCIntrin::_InterlockedAnd_nf:
1021  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E,
1022  AtomicOrdering::Monotonic);
1023  case MSVCIntrin::_InterlockedIncrement_acq:
1024  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Acquire);
1025  case MSVCIntrin::_InterlockedIncrement_rel:
1026  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Release);
1027  case MSVCIntrin::_InterlockedIncrement_nf:
1028  return EmitAtomicIncrementValue(*this, E, AtomicOrdering::Monotonic);
1029  case MSVCIntrin::_InterlockedDecrement_acq:
1030  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Acquire);
1031  case MSVCIntrin::_InterlockedDecrement_rel:
1032  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Release);
1033  case MSVCIntrin::_InterlockedDecrement_nf:
1034  return EmitAtomicDecrementValue(*this, E, AtomicOrdering::Monotonic);
1035 
1036  case MSVCIntrin::_InterlockedDecrement:
1037  return EmitAtomicDecrementValue(*this, E);
1038  case MSVCIntrin::_InterlockedIncrement:
1039  return EmitAtomicIncrementValue(*this, E);
1040 
1041  case MSVCIntrin::__fastfail: {
1042  // Request immediate process termination from the kernel. The instruction
1043  // sequences to do this are documented on MSDN:
1044  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
1045  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
1046  StringRef Asm, Constraints;
1047  switch (ISA) {
1048  default:
1049  ErrorUnsupported(E, "__fastfail call for this architecture");
1050  break;
1051  case llvm::Triple::x86:
1052  case llvm::Triple::x86_64:
1053  Asm = "int $$0x29";
1054  Constraints = "{cx}";
1055  break;
1056  case llvm::Triple::thumb:
1057  Asm = "udf #251";
1058  Constraints = "{r0}";
1059  break;
1060  case llvm::Triple::aarch64:
1061  Asm = "brk #0xF003";
1062  Constraints = "{w0}";
1063  }
1064  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
1065  llvm::InlineAsm *IA =
1066  llvm::InlineAsm::get(FTy, Asm, Constraints, /*hasSideEffects=*/true);
1067  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
1068  getLLVMContext(), llvm::AttributeList::FunctionIndex,
1069  llvm::Attribute::NoReturn);
1070  llvm::CallInst *CI = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
1071  CI->setAttributes(NoReturnAttr);
1072  return CI;
1073  }
1074  }
1075  llvm_unreachable("Incorrect MSVC intrinsic!");
1076 }
1077 
1078 namespace {
1079 // ARC cleanup for __builtin_os_log_format
1080 struct CallObjCArcUse final : EHScopeStack::Cleanup {
1081  CallObjCArcUse(llvm::Value *object) : object(object) {}
1082  llvm::Value *object;
1083 
1084  void Emit(CodeGenFunction &CGF, Flags flags) override {
1085  CGF.EmitARCIntrinsicUse(object);
1086  }
1087 };
1088 }
1089 
1092  assert((Kind == BCK_CLZPassedZero || Kind == BCK_CTZPassedZero)
1093  && "Unsupported builtin check kind");
1094 
1095  Value *ArgValue = EmitScalarExpr(E);
1096  if (!SanOpts.has(SanitizerKind::Builtin) || !getTarget().isCLZForZeroUndef())
1097  return ArgValue;
1098 
1099  SanitizerScope SanScope(this);
1100  Value *Cond = Builder.CreateICmpNE(
1101  ArgValue, llvm::Constant::getNullValue(ArgValue->getType()));
1102  EmitCheck(std::make_pair(Cond, SanitizerKind::Builtin),
1103  SanitizerHandler::InvalidBuiltin,
1104  {EmitCheckSourceLocation(E->getExprLoc()),
1105  llvm::ConstantInt::get(Builder.getInt8Ty(), Kind)},
1106  None);
1107  return ArgValue;
1108 }
1109 
1110 /// Get the argument type for arguments to os_log_helper.
1112  QualType UnsignedTy = C.getIntTypeForBitwidth(Size * 8, /*Signed=*/false);
1113  return C.getCanonicalType(UnsignedTy);
1114 }
1115 
1117  const analyze_os_log::OSLogBufferLayout &Layout,
1118  CharUnits BufferAlignment) {
1119  ASTContext &Ctx = getContext();
1120 
1121  llvm::SmallString<64> Name;
1122  {
1123  raw_svector_ostream OS(Name);
1124  OS << "__os_log_helper";
1125  OS << "_" << BufferAlignment.getQuantity();
1126  OS << "_" << int(Layout.getSummaryByte());
1127  OS << "_" << int(Layout.getNumArgsByte());
1128  for (const auto &Item : Layout.Items)
1129  OS << "_" << int(Item.getSizeByte()) << "_"
1130  << int(Item.getDescriptorByte());
1131  }
1132 
1133  if (llvm::Function *F = CGM.getModule().getFunction(Name))
1134  return F;
1135 
1137  FunctionArgList Args;
1138  Args.push_back(ImplicitParamDecl::Create(
1139  Ctx, nullptr, SourceLocation(), &Ctx.Idents.get("buffer"), Ctx.VoidPtrTy,
1141  ArgTys.emplace_back(Ctx.VoidPtrTy);
1142 
1143  for (unsigned int I = 0, E = Layout.Items.size(); I < E; ++I) {
1144  char Size = Layout.Items[I].getSizeByte();
1145  if (!Size)
1146  continue;
1147 
1148  QualType ArgTy = getOSLogArgType(Ctx, Size);
1149  Args.push_back(ImplicitParamDecl::Create(
1150  Ctx, nullptr, SourceLocation(),
1151  &Ctx.Idents.get(std::string("arg") + llvm::to_string(I)), ArgTy,
1153  ArgTys.emplace_back(ArgTy);
1154  }
1155 
1156  QualType ReturnTy = Ctx.VoidTy;
1157  QualType FuncionTy = Ctx.getFunctionType(ReturnTy, ArgTys, {});
1158 
1159  // The helper function has linkonce_odr linkage to enable the linker to merge
1160  // identical functions. To ensure the merging always happens, 'noinline' is
1161  // attached to the function when compiling with -Oz.
1162  const CGFunctionInfo &FI =
1163  CGM.getTypes().arrangeBuiltinFunctionDeclaration(ReturnTy, Args);
1164  llvm::FunctionType *FuncTy = CGM.getTypes().GetFunctionType(FI);
1165  llvm::Function *Fn = llvm::Function::Create(
1166  FuncTy, llvm::GlobalValue::LinkOnceODRLinkage, Name, &CGM.getModule());
1167  Fn->setVisibility(llvm::GlobalValue::HiddenVisibility);
1168  CGM.SetLLVMFunctionAttributes(GlobalDecl(), FI, Fn);
1169  CGM.SetLLVMFunctionAttributesForDefinition(nullptr, Fn);
1170  Fn->setDoesNotThrow();
1171 
1172  // Attach 'noinline' at -Oz.
1173  if (CGM.getCodeGenOpts().OptimizeSize == 2)
1174  Fn->addFnAttr(llvm::Attribute::NoInline);
1175 
1176  auto NL = ApplyDebugLocation::CreateEmpty(*this);
1177  IdentifierInfo *II = &Ctx.Idents.get(Name);
1180  FuncionTy, nullptr, SC_PrivateExtern, false, false);
1181 
1182  StartFunction(FD, ReturnTy, Fn, FI, Args);
1183 
1184  // Create a scope with an artificial location for the body of this function.
1185  auto AL = ApplyDebugLocation::CreateArtificial(*this);
1186 
1187  CharUnits Offset;
1188  Address BufAddr(Builder.CreateLoad(GetAddrOfLocalVar(Args[0]), "buf"),
1189  BufferAlignment);
1190  Builder.CreateStore(Builder.getInt8(Layout.getSummaryByte()),
1191  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
1192  Builder.CreateStore(Builder.getInt8(Layout.getNumArgsByte()),
1193  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
1194 
1195  unsigned I = 1;
1196  for (const auto &Item : Layout.Items) {
1197  Builder.CreateStore(
1198  Builder.getInt8(Item.getDescriptorByte()),
1199  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
1200  Builder.CreateStore(
1201  Builder.getInt8(Item.getSizeByte()),
1202  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
1203 
1204  CharUnits Size = Item.size();
1205  if (!Size.getQuantity())
1206  continue;
1207 
1208  Address Arg = GetAddrOfLocalVar(Args[I]);
1209  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset, "argData");
1210  Addr = Builder.CreateBitCast(Addr, Arg.getPointer()->getType(),
1211  "argDataCast");
1212  Builder.CreateStore(Builder.CreateLoad(Arg), Addr);
1213  Offset += Size;
1214  ++I;
1215  }
1216 
1217  FinishFunction();
1218 
1219  return Fn;
1220 }
1221 
1223  assert(E.getNumArgs() >= 2 &&
1224  "__builtin_os_log_format takes at least 2 arguments");
1225  ASTContext &Ctx = getContext();
1228  Address BufAddr = EmitPointerWithAlignment(E.getArg(0));
1229  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
1230 
1231  // Ignore argument 1, the format string. It is not currently used.
1232  CallArgList Args;
1233  Args.add(RValue::get(BufAddr.getPointer()), Ctx.VoidPtrTy);
1234 
1235  for (const auto &Item : Layout.Items) {
1236  int Size = Item.getSizeByte();
1237  if (!Size)
1238  continue;
1239 
1240  llvm::Value *ArgVal;
1241 
1242  if (Item.getKind() == analyze_os_log::OSLogBufferItem::MaskKind) {
1243  uint64_t Val = 0;
1244  for (unsigned I = 0, E = Item.getMaskType().size(); I < E; ++I)
1245  Val |= ((uint64_t)Item.getMaskType()[I]) << I * 8;
1246  ArgVal = llvm::Constant::getIntegerValue(Int64Ty, llvm::APInt(64, Val));
1247  } else if (const Expr *TheExpr = Item.getExpr()) {
1248  ArgVal = EmitScalarExpr(TheExpr, /*Ignore*/ false);
1249 
1250  // Check if this is a retainable type.
1251  if (TheExpr->getType()->isObjCRetainableType()) {
1252  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
1253  "Only scalar can be a ObjC retainable type");
1254  // Check if the object is constant, if not, save it in
1255  // RetainableOperands.
1256  if (!isa<Constant>(ArgVal))
1257  RetainableOperands.push_back(ArgVal);
1258  }
1259  } else {
1260  ArgVal = Builder.getInt32(Item.getConstValue().getQuantity());
1261  }
1262 
1263  unsigned ArgValSize =
1264  CGM.getDataLayout().getTypeSizeInBits(ArgVal->getType());
1265  llvm::IntegerType *IntTy = llvm::Type::getIntNTy(getLLVMContext(),
1266  ArgValSize);
1267  ArgVal = Builder.CreateBitOrPointerCast(ArgVal, IntTy);
1268  CanQualType ArgTy = getOSLogArgType(Ctx, Size);
1269  // If ArgVal has type x86_fp80, zero-extend ArgVal.
1270  ArgVal = Builder.CreateZExtOrBitCast(ArgVal, ConvertType(ArgTy));
1271  Args.add(RValue::get(ArgVal), ArgTy);
1272  }
1273 
1274  const CGFunctionInfo &FI =
1275  CGM.getTypes().arrangeBuiltinFunctionCall(Ctx.VoidTy, Args);
1276  llvm::Function *F = CodeGenFunction(CGM).generateBuiltinOSLogHelperFunction(
1277  Layout, BufAddr.getAlignment());
1278  EmitCall(FI, CGCallee::forDirect(F), ReturnValueSlot(), Args);
1279 
1280  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
1281  // cleanup will cause the use to appear after the final log call, keeping
1282  // the object valid while it’s held in the log buffer. Note that if there’s
1283  // a release cleanup on the object, it will already be active; since
1284  // cleanups are emitted in reverse order, the use will occur before the
1285  // object is released.
1286  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
1287  CGM.getCodeGenOpts().OptimizationLevel != 0)
1288  for (llvm::Value *Object : RetainableOperands)
1289  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), Object);
1290 
1291  return RValue::get(BufAddr.getPointer());
1292 }
1293 
1294 /// Determine if a binop is a checked mixed-sign multiply we can specialize.
1295 static bool isSpecialMixedSignMultiply(unsigned BuiltinID,
1296  WidthAndSignedness Op1Info,
1297  WidthAndSignedness Op2Info,
1298  WidthAndSignedness ResultInfo) {
1299  return BuiltinID == Builtin::BI__builtin_mul_overflow &&
1300  std::max(Op1Info.Width, Op2Info.Width) >= ResultInfo.Width &&
1301  Op1Info.Signed != Op2Info.Signed;
1302 }
1303 
1304 /// Emit a checked mixed-sign multiply. This is a cheaper specialization of
1305 /// the generic checked-binop irgen.
1306 static RValue
1308  WidthAndSignedness Op1Info, const clang::Expr *Op2,
1309  WidthAndSignedness Op2Info,
1310  const clang::Expr *ResultArg, QualType ResultQTy,
1311  WidthAndSignedness ResultInfo) {
1312  assert(isSpecialMixedSignMultiply(Builtin::BI__builtin_mul_overflow, Op1Info,
1313  Op2Info, ResultInfo) &&
1314  "Not a mixed-sign multipliction we can specialize");
1315 
1316  // Emit the signed and unsigned operands.
1317  const clang::Expr *SignedOp = Op1Info.Signed ? Op1 : Op2;
1318  const clang::Expr *UnsignedOp = Op1Info.Signed ? Op2 : Op1;
1319  llvm::Value *Signed = CGF.EmitScalarExpr(SignedOp);
1320  llvm::Value *Unsigned = CGF.EmitScalarExpr(UnsignedOp);
1321  unsigned SignedOpWidth = Op1Info.Signed ? Op1Info.Width : Op2Info.Width;
1322  unsigned UnsignedOpWidth = Op1Info.Signed ? Op2Info.Width : Op1Info.Width;
1323 
1324  // One of the operands may be smaller than the other. If so, [s|z]ext it.
1325  if (SignedOpWidth < UnsignedOpWidth)
1326  Signed = CGF.Builder.CreateSExt(Signed, Unsigned->getType(), "op.sext");
1327  if (UnsignedOpWidth < SignedOpWidth)
1328  Unsigned = CGF.Builder.CreateZExt(Unsigned, Signed->getType(), "op.zext");
1329 
1330  llvm::Type *OpTy = Signed->getType();
1331  llvm::Value *Zero = llvm::Constant::getNullValue(OpTy);
1332  Address ResultPtr = CGF.EmitPointerWithAlignment(ResultArg);
1333  llvm::Type *ResTy = ResultPtr.getElementType();
1334  unsigned OpWidth = std::max(Op1Info.Width, Op2Info.Width);
1335 
1336  // Take the absolute value of the signed operand.
1337  llvm::Value *IsNegative = CGF.Builder.CreateICmpSLT(Signed, Zero);
1338  llvm::Value *AbsOfNegative = CGF.Builder.CreateSub(Zero, Signed);
1339  llvm::Value *AbsSigned =
1340  CGF.Builder.CreateSelect(IsNegative, AbsOfNegative, Signed);
1341 
1342  // Perform a checked unsigned multiplication.
1343  llvm::Value *UnsignedOverflow;
1344  llvm::Value *UnsignedResult =
1345  EmitOverflowIntrinsic(CGF, llvm::Intrinsic::umul_with_overflow, AbsSigned,
1346  Unsigned, UnsignedOverflow);
1347 
1348  llvm::Value *Overflow, *Result;
1349  if (ResultInfo.Signed) {
1350  // Signed overflow occurs if the result is greater than INT_MAX or lesser
1351  // than INT_MIN, i.e when |Result| > (INT_MAX + IsNegative).
1352  auto IntMax =
1353  llvm::APInt::getSignedMaxValue(ResultInfo.Width).zextOrSelf(OpWidth);
1354  llvm::Value *MaxResult =
1355  CGF.Builder.CreateAdd(llvm::ConstantInt::get(OpTy, IntMax),
1356  CGF.Builder.CreateZExt(IsNegative, OpTy));
1357  llvm::Value *SignedOverflow =
1358  CGF.Builder.CreateICmpUGT(UnsignedResult, MaxResult);
1359  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, SignedOverflow);
1360 
1361  // Prepare the signed result (possibly by negating it).
1362  llvm::Value *NegativeResult = CGF.Builder.CreateNeg(UnsignedResult);
1363  llvm::Value *SignedResult =
1364  CGF.Builder.CreateSelect(IsNegative, NegativeResult, UnsignedResult);
1365  Result = CGF.Builder.CreateTrunc(SignedResult, ResTy);
1366  } else {
1367  // Unsigned overflow occurs if the result is < 0 or greater than UINT_MAX.
1368  llvm::Value *Underflow = CGF.Builder.CreateAnd(
1369  IsNegative, CGF.Builder.CreateIsNotNull(UnsignedResult));
1370  Overflow = CGF.Builder.CreateOr(UnsignedOverflow, Underflow);
1371  if (ResultInfo.Width < OpWidth) {
1372  auto IntMax =
1373  llvm::APInt::getMaxValue(ResultInfo.Width).zext(OpWidth);
1374  llvm::Value *TruncOverflow = CGF.Builder.CreateICmpUGT(
1375  UnsignedResult, llvm::ConstantInt::get(OpTy, IntMax));
1376  Overflow = CGF.Builder.CreateOr(Overflow, TruncOverflow);
1377  }
1378 
1379  // Negate the product if it would be negative in infinite precision.
1380  Result = CGF.Builder.CreateSelect(
1381  IsNegative, CGF.Builder.CreateNeg(UnsignedResult), UnsignedResult);
1382 
1383  Result = CGF.Builder.CreateTrunc(Result, ResTy);
1384  }
1385  assert(Overflow && Result && "Missing overflow or result");
1386 
1387  bool isVolatile =
1388  ResultArg->getType()->getPointeeType().isVolatileQualified();
1389  CGF.Builder.CreateStore(CGF.EmitToMemory(Result, ResultQTy), ResultPtr,
1390  isVolatile);
1391  return RValue::get(Overflow);
1392 }
1393 
1395  Value *&RecordPtr, CharUnits Align,
1396  llvm::FunctionCallee Func, int Lvl) {
1397  const auto *RT = RType->getAs<RecordType>();
1398  ASTContext &Context = CGF.getContext();
1399  RecordDecl *RD = RT->getDecl()->getDefinition();
1400  std::string Pad = std::string(Lvl * 4, ' ');
1401 
1402  Value *GString =
1403  CGF.Builder.CreateGlobalStringPtr(RType.getAsString() + " {\n");
1404  Value *Res = CGF.Builder.CreateCall(Func, {GString});
1405 
1406  static llvm::DenseMap<QualType, const char *> Types;
1407  if (Types.empty()) {
1408  Types[Context.CharTy] = "%c";
1409  Types[Context.BoolTy] = "%d";
1410  Types[Context.SignedCharTy] = "%hhd";
1411  Types[Context.UnsignedCharTy] = "%hhu";
1412  Types[Context.IntTy] = "%d";
1413  Types[Context.UnsignedIntTy] = "%u";
1414  Types[Context.LongTy] = "%ld";
1415  Types[Context.UnsignedLongTy] = "%lu";
1416  Types[Context.LongLongTy] = "%lld";
1417  Types[Context.UnsignedLongLongTy] = "%llu";
1418  Types[Context.ShortTy] = "%hd";
1419  Types[Context.UnsignedShortTy] = "%hu";
1420  Types[Context.VoidPtrTy] = "%p";
1421  Types[Context.FloatTy] = "%f";
1422  Types[Context.DoubleTy] = "%f";
1423  Types[Context.LongDoubleTy] = "%Lf";
1424  Types[Context.getPointerType(Context.CharTy)] = "%s";
1425  Types[Context.getPointerType(Context.getConstType(Context.CharTy))] = "%s";
1426  }
1427 
1428  for (const auto *FD : RD->fields()) {
1429  Value *FieldPtr = RecordPtr;
1430  if (RD->isUnion())
1431  FieldPtr = CGF.Builder.CreatePointerCast(
1432  FieldPtr, CGF.ConvertType(Context.getPointerType(FD->getType())));
1433  else
1434  FieldPtr = CGF.Builder.CreateStructGEP(CGF.ConvertType(RType), FieldPtr,
1435  FD->getFieldIndex());
1436 
1437  GString = CGF.Builder.CreateGlobalStringPtr(
1438  llvm::Twine(Pad)
1439  .concat(FD->getType().getAsString())
1440  .concat(llvm::Twine(' '))
1441  .concat(FD->getNameAsString())
1442  .concat(" : ")
1443  .str());
1444  Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1445  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1446 
1447  QualType CanonicalType =
1448  FD->getType().getUnqualifiedType().getCanonicalType();
1449 
1450  // We check whether we are in a recursive type
1451  if (CanonicalType->isRecordType()) {
1452  Value *TmpRes =
1453  dumpRecord(CGF, CanonicalType, FieldPtr, Align, Func, Lvl + 1);
1454  Res = CGF.Builder.CreateAdd(TmpRes, Res);
1455  continue;
1456  }
1457 
1458  // We try to determine the best format to print the current field
1459  llvm::Twine Format = Types.find(CanonicalType) == Types.end()
1460  ? Types[Context.VoidPtrTy]
1461  : Types[CanonicalType];
1462 
1463  Address FieldAddress = Address(FieldPtr, Align);
1464  FieldPtr = CGF.Builder.CreateLoad(FieldAddress);
1465 
1466  // FIXME Need to handle bitfield here
1467  GString = CGF.Builder.CreateGlobalStringPtr(
1468  Format.concat(llvm::Twine('\n')).str());
1469  TmpRes = CGF.Builder.CreateCall(Func, {GString, FieldPtr});
1470  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1471  }
1472 
1473  GString = CGF.Builder.CreateGlobalStringPtr(Pad + "}\n");
1474  Value *TmpRes = CGF.Builder.CreateCall(Func, {GString});
1475  Res = CGF.Builder.CreateAdd(Res, TmpRes);
1476  return Res;
1477 }
1478 
1479 static bool
1481  llvm::SmallPtrSetImpl<const Decl *> &Seen) {
1482  if (const auto *Arr = Ctx.getAsArrayType(Ty))
1483  Ty = Ctx.getBaseElementType(Arr);
1484 
1485  const auto *Record = Ty->getAsCXXRecordDecl();
1486  if (!Record)
1487  return false;
1488 
1489  // We've already checked this type, or are in the process of checking it.
1490  if (!Seen.insert(Record).second)
1491  return false;
1492 
1493  assert(Record->hasDefinition() &&
1494  "Incomplete types should already be diagnosed");
1495 
1496  if (Record->isDynamicClass())
1497  return true;
1498 
1499  for (FieldDecl *F : Record->fields()) {
1500  if (TypeRequiresBuiltinLaunderImp(Ctx, F->getType(), Seen))
1501  return true;
1502  }
1503  return false;
1504 }
1505 
1506 /// Determine if the specified type requires laundering by checking if it is a
1507 /// dynamic class type or contains a subobject which is a dynamic class type.
1509  if (!CGM.getCodeGenOpts().StrictVTablePointers)
1510  return false;
1511  llvm::SmallPtrSet<const Decl *, 16> Seen;
1512  return TypeRequiresBuiltinLaunderImp(CGM.getContext(), Ty, Seen);
1513 }
1514 
1515 RValue CodeGenFunction::emitRotate(const CallExpr *E, bool IsRotateRight) {
1516  llvm::Value *Src = EmitScalarExpr(E->getArg(0));
1517  llvm::Value *ShiftAmt = EmitScalarExpr(E->getArg(1));
1518 
1519  // The builtin's shift arg may have a different type than the source arg and
1520  // result, but the LLVM intrinsic uses the same type for all values.
1521  llvm::Type *Ty = Src->getType();
1522  ShiftAmt = Builder.CreateIntCast(ShiftAmt, Ty, false);
1523 
1524  // Rotate is a special case of LLVM funnel shift - 1st 2 args are the same.
1525  unsigned IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
1526  Function *F = CGM.getIntrinsic(IID, Ty);
1527  return RValue::get(Builder.CreateCall(F, { Src, Src, ShiftAmt }));
1528 }
1529 
1531  const CallExpr *E,
1532  ReturnValueSlot ReturnValue) {
1533  const FunctionDecl *FD = GD.getDecl()->getAsFunction();
1534  // See if we can constant fold this builtin. If so, don't emit it at all.
1535  Expr::EvalResult Result;
1536  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
1537  !Result.hasSideEffects()) {
1538  if (Result.Val.isInt())
1539  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
1540  Result.Val.getInt()));
1541  if (Result.Val.isFloat())
1542  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
1543  Result.Val.getFloat()));
1544  }
1545 
1546  // There are LLVM math intrinsics/instructions corresponding to math library
1547  // functions except the LLVM op will never set errno while the math library
1548  // might. Also, math builtins have the same semantics as their math library
1549  // twins. Thus, we can transform math library and builtin calls to their
1550  // LLVM counterparts if the call is marked 'const' (known to never set errno).
1551  if (FD->hasAttr<ConstAttr>()) {
1552  switch (BuiltinID) {
1553  case Builtin::BIceil:
1554  case Builtin::BIceilf:
1555  case Builtin::BIceill:
1556  case Builtin::BI__builtin_ceil:
1557  case Builtin::BI__builtin_ceilf:
1558  case Builtin::BI__builtin_ceill:
1559  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
1560 
1561  case Builtin::BIcopysign:
1562  case Builtin::BIcopysignf:
1563  case Builtin::BIcopysignl:
1564  case Builtin::BI__builtin_copysign:
1565  case Builtin::BI__builtin_copysignf:
1566  case Builtin::BI__builtin_copysignl:
1567  case Builtin::BI__builtin_copysignf128:
1569 
1570  case Builtin::BIcos:
1571  case Builtin::BIcosf:
1572  case Builtin::BIcosl:
1573  case Builtin::BI__builtin_cos:
1574  case Builtin::BI__builtin_cosf:
1575  case Builtin::BI__builtin_cosl:
1576  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::cos));
1577 
1578  case Builtin::BIexp:
1579  case Builtin::BIexpf:
1580  case Builtin::BIexpl:
1581  case Builtin::BI__builtin_exp:
1582  case Builtin::BI__builtin_expf:
1583  case Builtin::BI__builtin_expl:
1584  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp));
1585 
1586  case Builtin::BIexp2:
1587  case Builtin::BIexp2f:
1588  case Builtin::BIexp2l:
1589  case Builtin::BI__builtin_exp2:
1590  case Builtin::BI__builtin_exp2f:
1591  case Builtin::BI__builtin_exp2l:
1592  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::exp2));
1593 
1594  case Builtin::BIfabs:
1595  case Builtin::BIfabsf:
1596  case Builtin::BIfabsl:
1597  case Builtin::BI__builtin_fabs:
1598  case Builtin::BI__builtin_fabsf:
1599  case Builtin::BI__builtin_fabsl:
1600  case Builtin::BI__builtin_fabsf128:
1601  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
1602 
1603  case Builtin::BIfloor:
1604  case Builtin::BIfloorf:
1605  case Builtin::BIfloorl:
1606  case Builtin::BI__builtin_floor:
1607  case Builtin::BI__builtin_floorf:
1608  case Builtin::BI__builtin_floorl:
1609  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
1610 
1611  case Builtin::BIfma:
1612  case Builtin::BIfmaf:
1613  case Builtin::BIfmal:
1614  case Builtin::BI__builtin_fma:
1615  case Builtin::BI__builtin_fmaf:
1616  case Builtin::BI__builtin_fmal:
1617  return RValue::get(emitTernaryBuiltin(*this, E, Intrinsic::fma));
1618 
1619  case Builtin::BIfmax:
1620  case Builtin::BIfmaxf:
1621  case Builtin::BIfmaxl:
1622  case Builtin::BI__builtin_fmax:
1623  case Builtin::BI__builtin_fmaxf:
1624  case Builtin::BI__builtin_fmaxl:
1625  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
1626 
1627  case Builtin::BIfmin:
1628  case Builtin::BIfminf:
1629  case Builtin::BIfminl:
1630  case Builtin::BI__builtin_fmin:
1631  case Builtin::BI__builtin_fminf:
1632  case Builtin::BI__builtin_fminl:
1633  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
1634 
1635  // fmod() is a special-case. It maps to the frem instruction rather than an
1636  // LLVM intrinsic.
1637  case Builtin::BIfmod:
1638  case Builtin::BIfmodf:
1639  case Builtin::BIfmodl:
1640  case Builtin::BI__builtin_fmod:
1641  case Builtin::BI__builtin_fmodf:
1642  case Builtin::BI__builtin_fmodl: {
1643  Value *Arg1 = EmitScalarExpr(E->getArg(0));
1644  Value *Arg2 = EmitScalarExpr(E->getArg(1));
1645  return RValue::get(Builder.CreateFRem(Arg1, Arg2, "fmod"));
1646  }
1647 
1648  case Builtin::BIlog:
1649  case Builtin::BIlogf:
1650  case Builtin::BIlogl:
1651  case Builtin::BI__builtin_log:
1652  case Builtin::BI__builtin_logf:
1653  case Builtin::BI__builtin_logl:
1654  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log));
1655 
1656  case Builtin::BIlog10:
1657  case Builtin::BIlog10f:
1658  case Builtin::BIlog10l:
1659  case Builtin::BI__builtin_log10:
1660  case Builtin::BI__builtin_log10f:
1661  case Builtin::BI__builtin_log10l:
1662  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log10));
1663 
1664  case Builtin::BIlog2:
1665  case Builtin::BIlog2f:
1666  case Builtin::BIlog2l:
1667  case Builtin::BI__builtin_log2:
1668  case Builtin::BI__builtin_log2f:
1669  case Builtin::BI__builtin_log2l:
1670  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::log2));
1671 
1672  case Builtin::BInearbyint:
1673  case Builtin::BInearbyintf:
1674  case Builtin::BInearbyintl:
1675  case Builtin::BI__builtin_nearbyint:
1676  case Builtin::BI__builtin_nearbyintf:
1677  case Builtin::BI__builtin_nearbyintl:
1679 
1680  case Builtin::BIpow:
1681  case Builtin::BIpowf:
1682  case Builtin::BIpowl:
1683  case Builtin::BI__builtin_pow:
1684  case Builtin::BI__builtin_powf:
1685  case Builtin::BI__builtin_powl:
1686  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::pow));
1687 
1688  case Builtin::BIrint:
1689  case Builtin::BIrintf:
1690  case Builtin::BIrintl:
1691  case Builtin::BI__builtin_rint:
1692  case Builtin::BI__builtin_rintf:
1693  case Builtin::BI__builtin_rintl:
1694  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
1695 
1696  case Builtin::BIround:
1697  case Builtin::BIroundf:
1698  case Builtin::BIroundl:
1699  case Builtin::BI__builtin_round:
1700  case Builtin::BI__builtin_roundf:
1701  case Builtin::BI__builtin_roundl:
1702  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
1703 
1704  case Builtin::BIsin:
1705  case Builtin::BIsinf:
1706  case Builtin::BIsinl:
1707  case Builtin::BI__builtin_sin:
1708  case Builtin::BI__builtin_sinf:
1709  case Builtin::BI__builtin_sinl:
1710  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sin));
1711 
1712  case Builtin::BIsqrt:
1713  case Builtin::BIsqrtf:
1714  case Builtin::BIsqrtl:
1715  case Builtin::BI__builtin_sqrt:
1716  case Builtin::BI__builtin_sqrtf:
1717  case Builtin::BI__builtin_sqrtl:
1718  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::sqrt));
1719 
1720  case Builtin::BItrunc:
1721  case Builtin::BItruncf:
1722  case Builtin::BItruncl:
1723  case Builtin::BI__builtin_trunc:
1724  case Builtin::BI__builtin_truncf:
1725  case Builtin::BI__builtin_truncl:
1726  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
1727 
1728  case Builtin::BIlround:
1729  case Builtin::BIlroundf:
1730  case Builtin::BIlroundl:
1731  case Builtin::BI__builtin_lround:
1732  case Builtin::BI__builtin_lroundf:
1733  case Builtin::BI__builtin_lroundl:
1735 
1736  case Builtin::BIllround:
1737  case Builtin::BIllroundf:
1738  case Builtin::BIllroundl:
1739  case Builtin::BI__builtin_llround:
1740  case Builtin::BI__builtin_llroundf:
1741  case Builtin::BI__builtin_llroundl:
1743 
1744  case Builtin::BIlrint:
1745  case Builtin::BIlrintf:
1746  case Builtin::BIlrintl:
1747  case Builtin::BI__builtin_lrint:
1748  case Builtin::BI__builtin_lrintf:
1749  case Builtin::BI__builtin_lrintl:
1751 
1752  case Builtin::BIllrint:
1753  case Builtin::BIllrintf:
1754  case Builtin::BIllrintl:
1755  case Builtin::BI__builtin_llrint:
1756  case Builtin::BI__builtin_llrintf:
1757  case Builtin::BI__builtin_llrintl:
1759 
1760  default:
1761  break;
1762  }
1763  }
1764 
1765  switch (BuiltinID) {
1766  default: break;
1767  case Builtin::BI__builtin___CFStringMakeConstantString:
1768  case Builtin::BI__builtin___NSStringMakeConstantString:
1769  return RValue::get(ConstantEmitter(*this).emitAbstract(E, E->getType()));
1770  case Builtin::BI__builtin_stdarg_start:
1771  case Builtin::BI__builtin_va_start:
1772  case Builtin::BI__va_start:
1773  case Builtin::BI__builtin_va_end:
1774  return RValue::get(
1775  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
1776  ? EmitScalarExpr(E->getArg(0))
1777  : EmitVAListRef(E->getArg(0)).getPointer(),
1778  BuiltinID != Builtin::BI__builtin_va_end));
1779  case Builtin::BI__builtin_va_copy: {
1780  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
1781  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
1782 
1783  llvm::Type *Type = Int8PtrTy;
1784 
1785  DstPtr = Builder.CreateBitCast(DstPtr, Type);
1786  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
1787  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
1788  {DstPtr, SrcPtr}));
1789  }
1790  case Builtin::BI__builtin_abs:
1791  case Builtin::BI__builtin_labs:
1792  case Builtin::BI__builtin_llabs: {
1793  // X < 0 ? -X : X
1794  // The negation has 'nsw' because abs of INT_MIN is undefined.
1795  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1796  Value *NegOp = Builder.CreateNSWNeg(ArgValue, "neg");
1797  Constant *Zero = llvm::Constant::getNullValue(ArgValue->getType());
1798  Value *CmpResult = Builder.CreateICmpSLT(ArgValue, Zero, "abscond");
1799  Value *Result = Builder.CreateSelect(CmpResult, NegOp, ArgValue, "abs");
1800  return RValue::get(Result);
1801  }
1802  case Builtin::BI__builtin_conj:
1803  case Builtin::BI__builtin_conjf:
1804  case Builtin::BI__builtin_conjl: {
1805  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1806  Value *Real = ComplexVal.first;
1807  Value *Imag = ComplexVal.second;
1808  Value *Zero =
1809  Imag->getType()->isFPOrFPVectorTy()
1810  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
1811  : llvm::Constant::getNullValue(Imag->getType());
1812 
1813  Imag = Builder.CreateFSub(Zero, Imag, "sub");
1814  return RValue::getComplex(std::make_pair(Real, Imag));
1815  }
1816  case Builtin::BI__builtin_creal:
1817  case Builtin::BI__builtin_crealf:
1818  case Builtin::BI__builtin_creall:
1819  case Builtin::BIcreal:
1820  case Builtin::BIcrealf:
1821  case Builtin::BIcreall: {
1822  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1823  return RValue::get(ComplexVal.first);
1824  }
1825 
1826  case Builtin::BI__builtin_dump_struct: {
1827  llvm::Type *LLVMIntTy = getTypes().ConvertType(getContext().IntTy);
1828  llvm::FunctionType *LLVMFuncType = llvm::FunctionType::get(
1829  LLVMIntTy, {llvm::Type::getInt8PtrTy(getLLVMContext())}, true);
1830 
1831  Value *Func = EmitScalarExpr(E->getArg(1)->IgnoreImpCasts());
1832  CharUnits Arg0Align = EmitPointerWithAlignment(E->getArg(0)).getAlignment();
1833 
1834  const Expr *Arg0 = E->getArg(0)->IgnoreImpCasts();
1835  QualType Arg0Type = Arg0->getType()->getPointeeType();
1836 
1837  Value *RecordPtr = EmitScalarExpr(Arg0);
1838  Value *Res = dumpRecord(*this, Arg0Type, RecordPtr, Arg0Align,
1839  {LLVMFuncType, Func}, 0);
1840  return RValue::get(Res);
1841  }
1842 
1843  case Builtin::BI__builtin_preserve_access_index: {
1844  // Only enabled preserved access index region when debuginfo
1845  // is available as debuginfo is needed to preserve user-level
1846  // access pattern.
1847  if (!getDebugInfo()) {
1848  CGM.Error(E->getExprLoc(), "using builtin_preserve_access_index() without -g");
1849  return RValue::get(EmitScalarExpr(E->getArg(0)));
1850  }
1851 
1852  // Nested builtin_preserve_access_index() not supported
1853  if (IsInPreservedAIRegion) {
1854  CGM.Error(E->getExprLoc(), "nested builtin_preserve_access_index() not supported");
1855  return RValue::get(EmitScalarExpr(E->getArg(0)));
1856  }
1857 
1858  IsInPreservedAIRegion = true;
1859  Value *Res = EmitScalarExpr(E->getArg(0));
1860  IsInPreservedAIRegion = false;
1861  return RValue::get(Res);
1862  }
1863 
1864  case Builtin::BI__builtin_cimag:
1865  case Builtin::BI__builtin_cimagf:
1866  case Builtin::BI__builtin_cimagl:
1867  case Builtin::BIcimag:
1868  case Builtin::BIcimagf:
1869  case Builtin::BIcimagl: {
1870  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
1871  return RValue::get(ComplexVal.second);
1872  }
1873 
1874  case Builtin::BI__builtin_clrsb:
1875  case Builtin::BI__builtin_clrsbl:
1876  case Builtin::BI__builtin_clrsbll: {
1877  // clrsb(x) -> clz(x < 0 ? ~x : x) - 1 or
1878  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1879 
1880  llvm::Type *ArgType = ArgValue->getType();
1881  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1882 
1883  llvm::Type *ResultType = ConvertType(E->getType());
1884  Value *Zero = llvm::Constant::getNullValue(ArgType);
1885  Value *IsNeg = Builder.CreateICmpSLT(ArgValue, Zero, "isneg");
1886  Value *Inverse = Builder.CreateNot(ArgValue, "not");
1887  Value *Tmp = Builder.CreateSelect(IsNeg, Inverse, ArgValue);
1888  Value *Ctlz = Builder.CreateCall(F, {Tmp, Builder.getFalse()});
1889  Value *Result = Builder.CreateSub(Ctlz, llvm::ConstantInt::get(ArgType, 1));
1890  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1891  "cast");
1892  return RValue::get(Result);
1893  }
1894  case Builtin::BI__builtin_ctzs:
1895  case Builtin::BI__builtin_ctz:
1896  case Builtin::BI__builtin_ctzl:
1897  case Builtin::BI__builtin_ctzll: {
1898  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CTZPassedZero);
1899 
1900  llvm::Type *ArgType = ArgValue->getType();
1901  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1902 
1903  llvm::Type *ResultType = ConvertType(E->getType());
1904  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1905  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1906  if (Result->getType() != ResultType)
1907  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1908  "cast");
1909  return RValue::get(Result);
1910  }
1911  case Builtin::BI__builtin_clzs:
1912  case Builtin::BI__builtin_clz:
1913  case Builtin::BI__builtin_clzl:
1914  case Builtin::BI__builtin_clzll: {
1915  Value *ArgValue = EmitCheckedArgForBuiltin(E->getArg(0), BCK_CLZPassedZero);
1916 
1917  llvm::Type *ArgType = ArgValue->getType();
1918  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1919 
1920  llvm::Type *ResultType = ConvertType(E->getType());
1921  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
1922  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
1923  if (Result->getType() != ResultType)
1924  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1925  "cast");
1926  return RValue::get(Result);
1927  }
1928  case Builtin::BI__builtin_ffs:
1929  case Builtin::BI__builtin_ffsl:
1930  case Builtin::BI__builtin_ffsll: {
1931  // ffs(x) -> x ? cttz(x) + 1 : 0
1932  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1933 
1934  llvm::Type *ArgType = ArgValue->getType();
1935  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
1936 
1937  llvm::Type *ResultType = ConvertType(E->getType());
1938  Value *Tmp =
1939  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
1940  llvm::ConstantInt::get(ArgType, 1));
1941  Value *Zero = llvm::Constant::getNullValue(ArgType);
1942  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
1943  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
1944  if (Result->getType() != ResultType)
1945  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1946  "cast");
1947  return RValue::get(Result);
1948  }
1949  case Builtin::BI__builtin_parity:
1950  case Builtin::BI__builtin_parityl:
1951  case Builtin::BI__builtin_parityll: {
1952  // parity(x) -> ctpop(x) & 1
1953  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1954 
1955  llvm::Type *ArgType = ArgValue->getType();
1956  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1957 
1958  llvm::Type *ResultType = ConvertType(E->getType());
1959  Value *Tmp = Builder.CreateCall(F, ArgValue);
1960  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
1961  if (Result->getType() != ResultType)
1962  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1963  "cast");
1964  return RValue::get(Result);
1965  }
1966  case Builtin::BI__lzcnt16:
1967  case Builtin::BI__lzcnt:
1968  case Builtin::BI__lzcnt64: {
1969  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1970 
1971  llvm::Type *ArgType = ArgValue->getType();
1972  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
1973 
1974  llvm::Type *ResultType = ConvertType(E->getType());
1975  Value *Result = Builder.CreateCall(F, {ArgValue, Builder.getFalse()});
1976  if (Result->getType() != ResultType)
1977  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1978  "cast");
1979  return RValue::get(Result);
1980  }
1981  case Builtin::BI__popcnt16:
1982  case Builtin::BI__popcnt:
1983  case Builtin::BI__popcnt64:
1984  case Builtin::BI__builtin_popcount:
1985  case Builtin::BI__builtin_popcountl:
1986  case Builtin::BI__builtin_popcountll: {
1987  Value *ArgValue = EmitScalarExpr(E->getArg(0));
1988 
1989  llvm::Type *ArgType = ArgValue->getType();
1990  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
1991 
1992  llvm::Type *ResultType = ConvertType(E->getType());
1993  Value *Result = Builder.CreateCall(F, ArgValue);
1994  if (Result->getType() != ResultType)
1995  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
1996  "cast");
1997  return RValue::get(Result);
1998  }
1999  case Builtin::BI__builtin_unpredictable: {
2000  // Always return the argument of __builtin_unpredictable. LLVM does not
2001  // handle this builtin. Metadata for this builtin should be added directly
2002  // to instructions such as branches or switches that use it.
2003  return RValue::get(EmitScalarExpr(E->getArg(0)));
2004  }
2005  case Builtin::BI__builtin_expect: {
2006  Value *ArgValue = EmitScalarExpr(E->getArg(0));
2007  llvm::Type *ArgType = ArgValue->getType();
2008 
2009  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
2010  // Don't generate llvm.expect on -O0 as the backend won't use it for
2011  // anything.
2012  // Note, we still IRGen ExpectedValue because it could have side-effects.
2013  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
2014  return RValue::get(ArgValue);
2015 
2016  Function *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
2017  Value *Result =
2018  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
2019  return RValue::get(Result);
2020  }
2021  case Builtin::BI__builtin_assume_aligned: {
2022  const Expr *Ptr = E->getArg(0);
2023  Value *PtrValue = EmitScalarExpr(Ptr);
2024  Value *OffsetValue =
2025  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
2026 
2027  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
2028  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
2029  unsigned Alignment = (unsigned)AlignmentCI->getZExtValue();
2030 
2031  EmitAlignmentAssumption(PtrValue, Ptr,
2032  /*The expr loc is sufficient.*/ SourceLocation(),
2033  Alignment, OffsetValue);
2034  return RValue::get(PtrValue);
2035  }
2036  case Builtin::BI__assume:
2037  case Builtin::BI__builtin_assume: {
2038  if (E->getArg(0)->HasSideEffects(getContext()))
2039  return RValue::get(nullptr);
2040 
2041  Value *ArgValue = EmitScalarExpr(E->getArg(0));
2042  Function *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
2043  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
2044  }
2045  case Builtin::BI__builtin_bswap16:
2046  case Builtin::BI__builtin_bswap32:
2047  case Builtin::BI__builtin_bswap64: {
2048  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
2049  }
2050  case Builtin::BI__builtin_bitreverse8:
2051  case Builtin::BI__builtin_bitreverse16:
2052  case Builtin::BI__builtin_bitreverse32:
2053  case Builtin::BI__builtin_bitreverse64: {
2054  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
2055  }
2056  case Builtin::BI__builtin_rotateleft8:
2057  case Builtin::BI__builtin_rotateleft16:
2058  case Builtin::BI__builtin_rotateleft32:
2059  case Builtin::BI__builtin_rotateleft64:
2060  case Builtin::BI_rotl8: // Microsoft variants of rotate left
2061  case Builtin::BI_rotl16:
2062  case Builtin::BI_rotl:
2063  case Builtin::BI_lrotl:
2064  case Builtin::BI_rotl64:
2065  return emitRotate(E, false);
2066 
2067  case Builtin::BI__builtin_rotateright8:
2068  case Builtin::BI__builtin_rotateright16:
2069  case Builtin::BI__builtin_rotateright32:
2070  case Builtin::BI__builtin_rotateright64:
2071  case Builtin::BI_rotr8: // Microsoft variants of rotate right
2072  case Builtin::BI_rotr16:
2073  case Builtin::BI_rotr:
2074  case Builtin::BI_lrotr:
2075  case Builtin::BI_rotr64:
2076  return emitRotate(E, true);
2077 
2078  case Builtin::BI__builtin_constant_p: {
2079  llvm::Type *ResultType = ConvertType(E->getType());
2080  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
2081  // At -O0, we don't perform inlining, so we don't need to delay the
2082  // processing.
2083  return RValue::get(ConstantInt::get(ResultType, 0));
2084 
2085  const Expr *Arg = E->getArg(0);
2086  QualType ArgType = Arg->getType();
2087  // FIXME: The allowance for Obj-C pointers and block pointers is historical
2088  // and likely a mistake.
2089  if (!ArgType->isIntegralOrEnumerationType() && !ArgType->isFloatingType() &&
2090  !ArgType->isObjCObjectPointerType() && !ArgType->isBlockPointerType())
2091  // Per the GCC documentation, only numeric constants are recognized after
2092  // inlining.
2093  return RValue::get(ConstantInt::get(ResultType, 0));
2094 
2095  if (Arg->HasSideEffects(getContext()))
2096  // The argument is unevaluated, so be conservative if it might have
2097  // side-effects.
2098  return RValue::get(ConstantInt::get(ResultType, 0));
2099 
2100  Value *ArgValue = EmitScalarExpr(Arg);
2101  if (ArgType->isObjCObjectPointerType()) {
2102  // Convert Objective-C objects to id because we cannot distinguish between
2103  // LLVM types for Obj-C classes as they are opaque.
2104  ArgType = CGM.getContext().getObjCIdType();
2105  ArgValue = Builder.CreateBitCast(ArgValue, ConvertType(ArgType));
2106  }
2107  Function *F =
2108  CGM.getIntrinsic(Intrinsic::is_constant, ConvertType(ArgType));
2109  Value *Result = Builder.CreateCall(F, ArgValue);
2110  if (Result->getType() != ResultType)
2111  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/false);
2112  return RValue::get(Result);
2113  }
2114  case Builtin::BI__builtin_dynamic_object_size:
2115  case Builtin::BI__builtin_object_size: {
2116  unsigned Type =
2117  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
2118  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
2119 
2120  // We pass this builtin onto the optimizer so that it can figure out the
2121  // object size in more complex cases.
2122  bool IsDynamic = BuiltinID == Builtin::BI__builtin_dynamic_object_size;
2123  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
2124  /*EmittedE=*/nullptr, IsDynamic));
2125  }
2126  case Builtin::BI__builtin_prefetch: {
2127  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
2128  // FIXME: Technically these constants should of type 'int', yes?
2129  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
2130  llvm::ConstantInt::get(Int32Ty, 0);
2131  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
2132  llvm::ConstantInt::get(Int32Ty, 3);
2133  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
2134  Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
2135  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
2136  }
2137  case Builtin::BI__builtin_readcyclecounter: {
2138  Function *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
2139  return RValue::get(Builder.CreateCall(F));
2140  }
2141  case Builtin::BI__builtin___clear_cache: {
2142  Value *Begin = EmitScalarExpr(E->getArg(0));
2143  Value *End = EmitScalarExpr(E->getArg(1));
2144  Function *F = CGM.getIntrinsic(Intrinsic::clear_cache);
2145  return RValue::get(Builder.CreateCall(F, {Begin, End}));
2146  }
2147  case Builtin::BI__builtin_trap:
2148  return RValue::get(EmitTrapCall(Intrinsic::trap));
2149  case Builtin::BI__debugbreak:
2150  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
2151  case Builtin::BI__builtin_unreachable: {
2152  EmitUnreachable(E->getExprLoc());
2153 
2154  // We do need to preserve an insertion point.
2155  EmitBlock(createBasicBlock("unreachable.cont"));
2156 
2157  return RValue::get(nullptr);
2158  }
2159 
2160  case Builtin::BI__builtin_powi:
2161  case Builtin::BI__builtin_powif:
2162  case Builtin::BI__builtin_powil: {
2163  Value *Base = EmitScalarExpr(E->getArg(0));
2164  Value *Exponent = EmitScalarExpr(E->getArg(1));
2165  llvm::Type *ArgType = Base->getType();
2166  Function *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
2167  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
2168  }
2169 
2170  case Builtin::BI__builtin_isgreater:
2171  case Builtin::BI__builtin_isgreaterequal:
2172  case Builtin::BI__builtin_isless:
2173  case Builtin::BI__builtin_islessequal:
2174  case Builtin::BI__builtin_islessgreater:
2175  case Builtin::BI__builtin_isunordered: {
2176  // Ordered comparisons: we know the arguments to these are matching scalar
2177  // floating point values.
2178  Value *LHS = EmitScalarExpr(E->getArg(0));
2179  Value *RHS = EmitScalarExpr(E->getArg(1));
2180 
2181  switch (BuiltinID) {
2182  default: llvm_unreachable("Unknown ordered comparison");
2183  case Builtin::BI__builtin_isgreater:
2184  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
2185  break;
2186  case Builtin::BI__builtin_isgreaterequal:
2187  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
2188  break;
2189  case Builtin::BI__builtin_isless:
2190  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
2191  break;
2192  case Builtin::BI__builtin_islessequal:
2193  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
2194  break;
2195  case Builtin::BI__builtin_islessgreater:
2196  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
2197  break;
2198  case Builtin::BI__builtin_isunordered:
2199  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
2200  break;
2201  }
2202  // ZExt bool to int type.
2203  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
2204  }
2205  case Builtin::BI__builtin_isnan: {
2206  Value *V = EmitScalarExpr(E->getArg(0));
2207  V = Builder.CreateFCmpUNO(V, V, "cmp");
2208  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2209  }
2210 
2211  case Builtin::BIfinite:
2212  case Builtin::BI__finite:
2213  case Builtin::BIfinitef:
2214  case Builtin::BI__finitef:
2215  case Builtin::BIfinitel:
2216  case Builtin::BI__finitel:
2217  case Builtin::BI__builtin_isinf:
2218  case Builtin::BI__builtin_isfinite: {
2219  // isinf(x) --> fabs(x) == infinity
2220  // isfinite(x) --> fabs(x) != infinity
2221  // x != NaN via the ordered compare in either case.
2222  Value *V = EmitScalarExpr(E->getArg(0));
2223  Value *Fabs = EmitFAbs(*this, V);
2224  Constant *Infinity = ConstantFP::getInfinity(V->getType());
2225  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
2226  ? CmpInst::FCMP_OEQ
2227  : CmpInst::FCMP_ONE;
2228  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
2229  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
2230  }
2231 
2232  case Builtin::BI__builtin_isinf_sign: {
2233  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
2234  Value *Arg = EmitScalarExpr(E->getArg(0));
2235  Value *AbsArg = EmitFAbs(*this, Arg);
2236  Value *IsInf = Builder.CreateFCmpOEQ(
2237  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
2238  Value *IsNeg = EmitSignBit(*this, Arg);
2239 
2240  llvm::Type *IntTy = ConvertType(E->getType());
2241  Value *Zero = Constant::getNullValue(IntTy);
2242  Value *One = ConstantInt::get(IntTy, 1);
2243  Value *NegativeOne = ConstantInt::get(IntTy, -1);
2244  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
2245  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
2246  return RValue::get(Result);
2247  }
2248 
2249  case Builtin::BI__builtin_isnormal: {
2250  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
2251  Value *V = EmitScalarExpr(E->getArg(0));
2252  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
2253 
2254  Value *Abs = EmitFAbs(*this, V);
2255  Value *IsLessThanInf =
2256  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
2257  APFloat Smallest = APFloat::getSmallestNormalized(
2258  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
2259  Value *IsNormal =
2260  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
2261  "isnormal");
2262  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
2263  V = Builder.CreateAnd(V, IsNormal, "and");
2264  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
2265  }
2266 
2267  case Builtin::BI__builtin_flt_rounds: {
2268  Function *F = CGM.getIntrinsic(Intrinsic::flt_rounds);
2269 
2270  llvm::Type *ResultType = ConvertType(E->getType());
2271  Value *Result = Builder.CreateCall(F);
2272  if (Result->getType() != ResultType)
2273  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
2274  "cast");
2275  return RValue::get(Result);
2276  }
2277 
2278  case Builtin::BI__builtin_fpclassify: {
2279  Value *V = EmitScalarExpr(E->getArg(5));
2280  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
2281 
2282  // Create Result
2283  BasicBlock *Begin = Builder.GetInsertBlock();
2284  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
2285  Builder.SetInsertPoint(End);
2286  PHINode *Result =
2287  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
2288  "fpclassify_result");
2289 
2290  // if (V==0) return FP_ZERO
2291  Builder.SetInsertPoint(Begin);
2292  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
2293  "iszero");
2294  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
2295  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
2296  Builder.CreateCondBr(IsZero, End, NotZero);
2297  Result->addIncoming(ZeroLiteral, Begin);
2298 
2299  // if (V != V) return FP_NAN
2300  Builder.SetInsertPoint(NotZero);
2301  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
2302  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
2303  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
2304  Builder.CreateCondBr(IsNan, End, NotNan);
2305  Result->addIncoming(NanLiteral, NotZero);
2306 
2307  // if (fabs(V) == infinity) return FP_INFINITY
2308  Builder.SetInsertPoint(NotNan);
2309  Value *VAbs = EmitFAbs(*this, V);
2310  Value *IsInf =
2311  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
2312  "isinf");
2313  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
2314  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
2315  Builder.CreateCondBr(IsInf, End, NotInf);
2316  Result->addIncoming(InfLiteral, NotNan);
2317 
2318  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
2319  Builder.SetInsertPoint(NotInf);
2320  APFloat Smallest = APFloat::getSmallestNormalized(
2321  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
2322  Value *IsNormal =
2323  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
2324  "isnormal");
2325  Value *NormalResult =
2326  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
2327  EmitScalarExpr(E->getArg(3)));
2328  Builder.CreateBr(End);
2329  Result->addIncoming(NormalResult, NotInf);
2330 
2331  // return Result
2332  Builder.SetInsertPoint(End);
2333  return RValue::get(Result);
2334  }
2335 
2336  case Builtin::BIalloca:
2337  case Builtin::BI_alloca:
2338  case Builtin::BI__builtin_alloca: {
2339  Value *Size = EmitScalarExpr(E->getArg(0));
2340  const TargetInfo &TI = getContext().getTargetInfo();
2341  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
2342  unsigned SuitableAlignmentInBytes =
2343  CGM.getContext()
2344  .toCharUnitsFromBits(TI.getSuitableAlign())
2345  .getQuantity();
2346  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2347  AI->setAlignment(SuitableAlignmentInBytes);
2348  initializeAlloca(*this, AI, Size, SuitableAlignmentInBytes);
2349  return RValue::get(AI);
2350  }
2351 
2352  case Builtin::BI__builtin_alloca_with_align: {
2353  Value *Size = EmitScalarExpr(E->getArg(0));
2354  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
2355  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
2356  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
2357  unsigned AlignmentInBytes =
2358  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
2359  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
2360  AI->setAlignment(AlignmentInBytes);
2361  initializeAlloca(*this, AI, Size, AlignmentInBytes);
2362  return RValue::get(AI);
2363  }
2364 
2365  case Builtin::BIbzero:
2366  case Builtin::BI__builtin_bzero: {
2367  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2368  Value *SizeVal = EmitScalarExpr(E->getArg(1));
2369  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2370  E->getArg(0)->getExprLoc(), FD, 0);
2371  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
2372  return RValue::get(nullptr);
2373  }
2374  case Builtin::BImemcpy:
2375  case Builtin::BI__builtin_memcpy: {
2376  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2377  Address Src = EmitPointerWithAlignment(E->getArg(1));
2378  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2379  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2380  E->getArg(0)->getExprLoc(), FD, 0);
2381  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2382  E->getArg(1)->getExprLoc(), FD, 1);
2383  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2384  return RValue::get(Dest.getPointer());
2385  }
2386 
2387  case Builtin::BI__builtin_char_memchr:
2388  BuiltinID = Builtin::BI__builtin_memchr;
2389  break;
2390 
2391  case Builtin::BI__builtin___memcpy_chk: {
2392  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
2393  Expr::EvalResult SizeResult, DstSizeResult;
2394  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2395  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2396  break;
2397  llvm::APSInt Size = SizeResult.Val.getInt();
2398  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2399  if (Size.ugt(DstSize))
2400  break;
2401  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2402  Address Src = EmitPointerWithAlignment(E->getArg(1));
2403  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2404  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
2405  return RValue::get(Dest.getPointer());
2406  }
2407 
2408  case Builtin::BI__builtin_objc_memmove_collectable: {
2409  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
2410  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
2411  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2412  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
2413  DestAddr, SrcAddr, SizeVal);
2414  return RValue::get(DestAddr.getPointer());
2415  }
2416 
2417  case Builtin::BI__builtin___memmove_chk: {
2418  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
2419  Expr::EvalResult SizeResult, DstSizeResult;
2420  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2421  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2422  break;
2423  llvm::APSInt Size = SizeResult.Val.getInt();
2424  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2425  if (Size.ugt(DstSize))
2426  break;
2427  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2428  Address Src = EmitPointerWithAlignment(E->getArg(1));
2429  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2430  Builder.CreateMemMove(Dest, Src, SizeVal, false);
2431  return RValue::get(Dest.getPointer());
2432  }
2433 
2434  case Builtin::BImemmove:
2435  case Builtin::BI__builtin_memmove: {
2436  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2437  Address Src = EmitPointerWithAlignment(E->getArg(1));
2438  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2439  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2440  E->getArg(0)->getExprLoc(), FD, 0);
2441  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
2442  E->getArg(1)->getExprLoc(), FD, 1);
2443  Builder.CreateMemMove(Dest, Src, SizeVal, false);
2444  return RValue::get(Dest.getPointer());
2445  }
2446  case Builtin::BImemset:
2447  case Builtin::BI__builtin_memset: {
2448  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2449  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2450  Builder.getInt8Ty());
2451  Value *SizeVal = EmitScalarExpr(E->getArg(2));
2452  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
2453  E->getArg(0)->getExprLoc(), FD, 0);
2454  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2455  return RValue::get(Dest.getPointer());
2456  }
2457  case Builtin::BI__builtin___memset_chk: {
2458  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
2459  Expr::EvalResult SizeResult, DstSizeResult;
2460  if (!E->getArg(2)->EvaluateAsInt(SizeResult, CGM.getContext()) ||
2461  !E->getArg(3)->EvaluateAsInt(DstSizeResult, CGM.getContext()))
2462  break;
2463  llvm::APSInt Size = SizeResult.Val.getInt();
2464  llvm::APSInt DstSize = DstSizeResult.Val.getInt();
2465  if (Size.ugt(DstSize))
2466  break;
2467  Address Dest = EmitPointerWithAlignment(E->getArg(0));
2468  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
2469  Builder.getInt8Ty());
2470  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
2471  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
2472  return RValue::get(Dest.getPointer());
2473  }
2474  case Builtin::BI__builtin_wmemcmp: {
2475  // The MSVC runtime library does not provide a definition of wmemcmp, so we
2476  // need an inline implementation.
2477  if (!getTarget().getTriple().isOSMSVCRT())
2478  break;
2479 
2480  llvm::Type *WCharTy = ConvertType(getContext().WCharTy);
2481 
2482  Value *Dst = EmitScalarExpr(E->getArg(0));
2483  Value *Src = EmitScalarExpr(E->getArg(1));
2484  Value *Size = EmitScalarExpr(E->getArg(2));
2485 
2486  BasicBlock *Entry = Builder.GetInsertBlock();
2487  BasicBlock *CmpGT = createBasicBlock("wmemcmp.gt");
2488  BasicBlock *CmpLT = createBasicBlock("wmemcmp.lt");
2489  BasicBlock *Next = createBasicBlock("wmemcmp.next");
2490  BasicBlock *Exit = createBasicBlock("wmemcmp.exit");
2491  Value *SizeEq0 = Builder.CreateICmpEQ(Size, ConstantInt::get(SizeTy, 0));
2492  Builder.CreateCondBr(SizeEq0, Exit, CmpGT);
2493 
2494  EmitBlock(CmpGT);
2495  PHINode *DstPhi = Builder.CreatePHI(Dst->getType(), 2);
2496  DstPhi->addIncoming(Dst, Entry);
2497  PHINode *SrcPhi = Builder.CreatePHI(Src->getType(), 2);
2498  SrcPhi->addIncoming(Src, Entry);
2499  PHINode *SizePhi = Builder.CreatePHI(SizeTy, 2);
2500  SizePhi->addIncoming(Size, Entry);
2501  CharUnits WCharAlign =
2502  getContext().getTypeAlignInChars(getContext().WCharTy);
2503  Value *DstCh = Builder.CreateAlignedLoad(WCharTy, DstPhi, WCharAlign);
2504  Value *SrcCh = Builder.CreateAlignedLoad(WCharTy, SrcPhi, WCharAlign);
2505  Value *DstGtSrc = Builder.CreateICmpUGT(DstCh, SrcCh);
2506  Builder.CreateCondBr(DstGtSrc, Exit, CmpLT);
2507 
2508  EmitBlock(CmpLT);
2509  Value *DstLtSrc = Builder.CreateICmpULT(DstCh, SrcCh);
2510  Builder.CreateCondBr(DstLtSrc, Exit, Next);
2511 
2512  EmitBlock(Next);
2513  Value *NextDst = Builder.CreateConstInBoundsGEP1_32(WCharTy, DstPhi, 1);
2514  Value *NextSrc = Builder.CreateConstInBoundsGEP1_32(WCharTy, SrcPhi, 1);
2515  Value *NextSize = Builder.CreateSub(SizePhi, ConstantInt::get(SizeTy, 1));
2516  Value *NextSizeEq0 =
2517  Builder.CreateICmpEQ(NextSize, ConstantInt::get(SizeTy, 0));
2518  Builder.CreateCondBr(NextSizeEq0, Exit, CmpGT);
2519  DstPhi->addIncoming(NextDst, Next);
2520  SrcPhi->addIncoming(NextSrc, Next);
2521  SizePhi->addIncoming(NextSize, Next);
2522 
2523  EmitBlock(Exit);
2524  PHINode *Ret = Builder.CreatePHI(IntTy, 4);
2525  Ret->addIncoming(ConstantInt::get(IntTy, 0), Entry);
2526  Ret->addIncoming(ConstantInt::get(IntTy, 1), CmpGT);
2527  Ret->addIncoming(ConstantInt::get(IntTy, -1), CmpLT);
2528  Ret->addIncoming(ConstantInt::get(IntTy, 0), Next);
2529  return RValue::get(Ret);
2530  }
2531  case Builtin::BI__builtin_dwarf_cfa: {
2532  // The offset in bytes from the first argument to the CFA.
2533  //
2534  // Why on earth is this in the frontend? Is there any reason at
2535  // all that the backend can't reasonably determine this while
2536  // lowering llvm.eh.dwarf.cfa()?
2537  //
2538  // TODO: If there's a satisfactory reason, add a target hook for
2539  // this instead of hard-coding 0, which is correct for most targets.
2540  int32_t Offset = 0;
2541 
2542  Function *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
2543  return RValue::get(Builder.CreateCall(F,
2544  llvm::ConstantInt::get(Int32Ty, Offset)));
2545  }
2546  case Builtin::BI__builtin_return_address: {
2547  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2548  getContext().UnsignedIntTy);
2549  Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2550  return RValue::get(Builder.CreateCall(F, Depth));
2551  }
2552  case Builtin::BI_ReturnAddress: {
2553  Function *F = CGM.getIntrinsic(Intrinsic::returnaddress);
2554  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
2555  }
2556  case Builtin::BI__builtin_frame_address: {
2557  Value *Depth = ConstantEmitter(*this).emitAbstract(E->getArg(0),
2558  getContext().UnsignedIntTy);
2559  Function *F = CGM.getIntrinsic(Intrinsic::frameaddress);
2560  return RValue::get(Builder.CreateCall(F, Depth));
2561  }
2562  case Builtin::BI__builtin_extract_return_addr: {
2563  Value *Address = EmitScalarExpr(E->getArg(0));
2564  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
2565  return RValue::get(Result);
2566  }
2567  case Builtin::BI__builtin_frob_return_addr: {
2568  Value *Address = EmitScalarExpr(E->getArg(0));
2569  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
2570  return RValue::get(Result);
2571  }
2572  case Builtin::BI__builtin_dwarf_sp_column: {
2573  llvm::IntegerType *Ty
2574  = cast<llvm::IntegerType>(ConvertType(E->getType()));
2575  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
2576  if (Column == -1) {
2577  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
2578  return RValue::get(llvm::UndefValue::get(Ty));
2579  }
2580  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
2581  }
2582  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
2583  Value *Address = EmitScalarExpr(E->getArg(0));
2584  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
2585  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
2586  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
2587  }
2588  case Builtin::BI__builtin_eh_return: {
2589  Value *Int = EmitScalarExpr(E->getArg(0));
2590  Value *Ptr = EmitScalarExpr(E->getArg(1));
2591 
2592  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
2593  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
2594  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
2595  Function *F =
2596  CGM.getIntrinsic(IntTy->getBitWidth() == 32 ? Intrinsic::eh_return_i32
2597  : Intrinsic::eh_return_i64);
2598  Builder.CreateCall(F, {Int, Ptr});
2599  Builder.CreateUnreachable();
2600 
2601  // We do need to preserve an insertion point.
2602  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
2603 
2604  return RValue::get(nullptr);
2605  }
2606  case Builtin::BI__builtin_unwind_init: {
2607  Function *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
2608  return RValue::get(Builder.CreateCall(F));
2609  }
2610  case Builtin::BI__builtin_extend_pointer: {
2611  // Extends a pointer to the size of an _Unwind_Word, which is
2612  // uint64_t on all platforms. Generally this gets poked into a
2613  // register and eventually used as an address, so if the
2614  // addressing registers are wider than pointers and the platform
2615  // doesn't implicitly ignore high-order bits when doing
2616  // addressing, we need to make sure we zext / sext based on
2617  // the platform's expectations.
2618  //
2619  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
2620 
2621  // Cast the pointer to intptr_t.
2622  Value *Ptr = EmitScalarExpr(E->getArg(0));
2623  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
2624 
2625  // If that's 64 bits, we're done.
2626  if (IntPtrTy->getBitWidth() == 64)
2627  return RValue::get(Result);
2628 
2629  // Otherwise, ask the codegen data what to do.
2630  if (getTargetHooks().extendPointerWithSExt())
2631  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
2632  else
2633  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
2634  }
2635  case Builtin::BI__builtin_setjmp: {
2636  // Buffer is a void**.
2637  Address Buf = EmitPointerWithAlignment(E->getArg(0));
2638 
2639  // Store the frame pointer to the setjmp buffer.
2640  Value *FrameAddr =
2641  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2642  ConstantInt::get(Int32Ty, 0));
2643  Builder.CreateStore(FrameAddr, Buf);
2644 
2645  // Store the stack pointer to the setjmp buffer.
2646  Value *StackAddr =
2647  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
2648  Address StackSaveSlot = Builder.CreateConstInBoundsGEP(Buf, 2);
2649  Builder.CreateStore(StackAddr, StackSaveSlot);
2650 
2651  // Call LLVM's EH setjmp, which is lightweight.
2652  Function *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
2653  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2654  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
2655  }
2656  case Builtin::BI__builtin_longjmp: {
2657  Value *Buf = EmitScalarExpr(E->getArg(0));
2658  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
2659 
2660  // Call LLVM's EH longjmp, which is lightweight.
2661  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
2662 
2663  // longjmp doesn't return; mark this as unreachable.
2664  Builder.CreateUnreachable();
2665 
2666  // We do need to preserve an insertion point.
2667  EmitBlock(createBasicBlock("longjmp.cont"));
2668 
2669  return RValue::get(nullptr);
2670  }
2671  case Builtin::BI__builtin_launder: {
2672  const Expr *Arg = E->getArg(0);
2673  QualType ArgTy = Arg->getType()->getPointeeType();
2674  Value *Ptr = EmitScalarExpr(Arg);
2675  if (TypeRequiresBuiltinLaunder(CGM, ArgTy))
2676  Ptr = Builder.CreateLaunderInvariantGroup(Ptr);
2677 
2678  return RValue::get(Ptr);
2679  }
2680  case Builtin::BI__sync_fetch_and_add:
2681  case Builtin::BI__sync_fetch_and_sub:
2682  case Builtin::BI__sync_fetch_and_or:
2683  case Builtin::BI__sync_fetch_and_and:
2684  case Builtin::BI__sync_fetch_and_xor:
2685  case Builtin::BI__sync_fetch_and_nand:
2686  case Builtin::BI__sync_add_and_fetch:
2687  case Builtin::BI__sync_sub_and_fetch:
2688  case Builtin::BI__sync_and_and_fetch:
2689  case Builtin::BI__sync_or_and_fetch:
2690  case Builtin::BI__sync_xor_and_fetch:
2691  case Builtin::BI__sync_nand_and_fetch:
2692  case Builtin::BI__sync_val_compare_and_swap:
2693  case Builtin::BI__sync_bool_compare_and_swap:
2694  case Builtin::BI__sync_lock_test_and_set:
2695  case Builtin::BI__sync_lock_release:
2696  case Builtin::BI__sync_swap:
2697  llvm_unreachable("Shouldn't make it through sema");
2698  case Builtin::BI__sync_fetch_and_add_1:
2699  case Builtin::BI__sync_fetch_and_add_2:
2700  case Builtin::BI__sync_fetch_and_add_4:
2701  case Builtin::BI__sync_fetch_and_add_8:
2702  case Builtin::BI__sync_fetch_and_add_16:
2703  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
2704  case Builtin::BI__sync_fetch_and_sub_1:
2705  case Builtin::BI__sync_fetch_and_sub_2:
2706  case Builtin::BI__sync_fetch_and_sub_4:
2707  case Builtin::BI__sync_fetch_and_sub_8:
2708  case Builtin::BI__sync_fetch_and_sub_16:
2709  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
2710  case Builtin::BI__sync_fetch_and_or_1:
2711  case Builtin::BI__sync_fetch_and_or_2:
2712  case Builtin::BI__sync_fetch_and_or_4:
2713  case Builtin::BI__sync_fetch_and_or_8:
2714  case Builtin::BI__sync_fetch_and_or_16:
2715  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
2716  case Builtin::BI__sync_fetch_and_and_1:
2717  case Builtin::BI__sync_fetch_and_and_2:
2718  case Builtin::BI__sync_fetch_and_and_4:
2719  case Builtin::BI__sync_fetch_and_and_8:
2720  case Builtin::BI__sync_fetch_and_and_16:
2721  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
2722  case Builtin::BI__sync_fetch_and_xor_1:
2723  case Builtin::BI__sync_fetch_and_xor_2:
2724  case Builtin::BI__sync_fetch_and_xor_4:
2725  case Builtin::BI__sync_fetch_and_xor_8:
2726  case Builtin::BI__sync_fetch_and_xor_16:
2727  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
2728  case Builtin::BI__sync_fetch_and_nand_1:
2729  case Builtin::BI__sync_fetch_and_nand_2:
2730  case Builtin::BI__sync_fetch_and_nand_4:
2731  case Builtin::BI__sync_fetch_and_nand_8:
2732  case Builtin::BI__sync_fetch_and_nand_16:
2733  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
2734 
2735  // Clang extensions: not overloaded yet.
2736  case Builtin::BI__sync_fetch_and_min:
2737  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
2738  case Builtin::BI__sync_fetch_and_max:
2739  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
2740  case Builtin::BI__sync_fetch_and_umin:
2741  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
2742  case Builtin::BI__sync_fetch_and_umax:
2743  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
2744 
2745  case Builtin::BI__sync_add_and_fetch_1:
2746  case Builtin::BI__sync_add_and_fetch_2:
2747  case Builtin::BI__sync_add_and_fetch_4:
2748  case Builtin::BI__sync_add_and_fetch_8:
2749  case Builtin::BI__sync_add_and_fetch_16:
2750  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
2751  llvm::Instruction::Add);
2752  case Builtin::BI__sync_sub_and_fetch_1:
2753  case Builtin::BI__sync_sub_and_fetch_2:
2754  case Builtin::BI__sync_sub_and_fetch_4:
2755  case Builtin::BI__sync_sub_and_fetch_8:
2756  case Builtin::BI__sync_sub_and_fetch_16:
2757  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
2758  llvm::Instruction::Sub);
2759  case Builtin::BI__sync_and_and_fetch_1:
2760  case Builtin::BI__sync_and_and_fetch_2:
2761  case Builtin::BI__sync_and_and_fetch_4:
2762  case Builtin::BI__sync_and_and_fetch_8:
2763  case Builtin::BI__sync_and_and_fetch_16:
2766  case Builtin::BI__sync_or_and_fetch_1:
2767  case Builtin::BI__sync_or_and_fetch_2:
2768  case Builtin::BI__sync_or_and_fetch_4:
2769  case Builtin::BI__sync_or_and_fetch_8:
2770  case Builtin::BI__sync_or_and_fetch_16:
2771  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
2772  llvm::Instruction::Or);
2773  case Builtin::BI__sync_xor_and_fetch_1:
2774  case Builtin::BI__sync_xor_and_fetch_2:
2775  case Builtin::BI__sync_xor_and_fetch_4:
2776  case Builtin::BI__sync_xor_and_fetch_8:
2777  case Builtin::BI__sync_xor_and_fetch_16:
2778  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
2779  llvm::Instruction::Xor);
2780  case Builtin::BI__sync_nand_and_fetch_1:
2781  case Builtin::BI__sync_nand_and_fetch_2:
2782  case Builtin::BI__sync_nand_and_fetch_4:
2783  case Builtin::BI__sync_nand_and_fetch_8:
2784  case Builtin::BI__sync_nand_and_fetch_16:
2785  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
2786  llvm::Instruction::And, true);
2787 
2788  case Builtin::BI__sync_val_compare_and_swap_1:
2789  case Builtin::BI__sync_val_compare_and_swap_2:
2790  case Builtin::BI__sync_val_compare_and_swap_4:
2791  case Builtin::BI__sync_val_compare_and_swap_8:
2792  case Builtin::BI__sync_val_compare_and_swap_16:
2793  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
2794 
2795  case Builtin::BI__sync_bool_compare_and_swap_1:
2796  case Builtin::BI__sync_bool_compare_and_swap_2:
2797  case Builtin::BI__sync_bool_compare_and_swap_4:
2798  case Builtin::BI__sync_bool_compare_and_swap_8:
2799  case Builtin::BI__sync_bool_compare_and_swap_16:
2800  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
2801 
2802  case Builtin::BI__sync_swap_1:
2803  case Builtin::BI__sync_swap_2:
2804  case Builtin::BI__sync_swap_4:
2805  case Builtin::BI__sync_swap_8:
2806  case Builtin::BI__sync_swap_16:
2807  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2808 
2809  case Builtin::BI__sync_lock_test_and_set_1:
2810  case Builtin::BI__sync_lock_test_and_set_2:
2811  case Builtin::BI__sync_lock_test_and_set_4:
2812  case Builtin::BI__sync_lock_test_and_set_8:
2813  case Builtin::BI__sync_lock_test_and_set_16:
2814  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
2815 
2816  case Builtin::BI__sync_lock_release_1:
2817  case Builtin::BI__sync_lock_release_2:
2818  case Builtin::BI__sync_lock_release_4:
2819  case Builtin::BI__sync_lock_release_8:
2820  case Builtin::BI__sync_lock_release_16: {
2821  Value *Ptr = EmitScalarExpr(E->getArg(0));
2822  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
2823  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
2824  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
2825  StoreSize.getQuantity() * 8);
2826  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
2827  llvm::StoreInst *Store =
2828  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
2829  StoreSize);
2830  Store->setAtomic(llvm::AtomicOrdering::Release);
2831  return RValue::get(nullptr);
2832  }
2833 
2834  case Builtin::BI__sync_synchronize: {
2835  // We assume this is supposed to correspond to a C++0x-style
2836  // sequentially-consistent fence (i.e. this is only usable for
2837  // synchronization, not device I/O or anything like that). This intrinsic
2838  // is really badly designed in the sense that in theory, there isn't
2839  // any way to safely use it... but in practice, it mostly works
2840  // to use it with non-atomic loads and stores to get acquire/release
2841  // semantics.
2842  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
2843  return RValue::get(nullptr);
2844  }
2845 
2846  case Builtin::BI__builtin_nontemporal_load:
2847  return RValue::get(EmitNontemporalLoad(*this, E));
2848  case Builtin::BI__builtin_nontemporal_store:
2849  return RValue::get(EmitNontemporalStore(*this, E));
2850  case Builtin::BI__c11_atomic_is_lock_free:
2851  case Builtin::BI__atomic_is_lock_free: {
2852  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
2853  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
2854  // _Atomic(T) is always properly-aligned.
2855  const char *LibCallName = "__atomic_is_lock_free";
2856  CallArgList Args;
2857  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
2858  getContext().getSizeType());
2859  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
2860  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
2861  getContext().VoidPtrTy);
2862  else
2863  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
2864  getContext().VoidPtrTy);
2865  const CGFunctionInfo &FuncInfo =
2866  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
2867  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
2868  llvm::FunctionCallee Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
2869  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
2870  ReturnValueSlot(), Args);
2871  }
2872 
2873  case Builtin::BI__atomic_test_and_set: {
2874  // Look at the argument type to determine whether this is a volatile
2875  // operation. The parameter type is always volatile.
2876  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2877  bool Volatile =
2878  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2879 
2880  Value *Ptr = EmitScalarExpr(E->getArg(0));
2881  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
2882  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2883  Value *NewVal = Builder.getInt8(1);
2884  Value *Order = EmitScalarExpr(E->getArg(1));
2885  if (isa<llvm::ConstantInt>(Order)) {
2886  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2887  AtomicRMWInst *Result = nullptr;
2888  switch (ord) {
2889  case 0: // memory_order_relaxed
2890  default: // invalid order
2891  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2892  llvm::AtomicOrdering::Monotonic);
2893  break;
2894  case 1: // memory_order_consume
2895  case 2: // memory_order_acquire
2896  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2897  llvm::AtomicOrdering::Acquire);
2898  break;
2899  case 3: // memory_order_release
2900  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2901  llvm::AtomicOrdering::Release);
2902  break;
2903  case 4: // memory_order_acq_rel
2904 
2905  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2906  llvm::AtomicOrdering::AcquireRelease);
2907  break;
2908  case 5: // memory_order_seq_cst
2909  Result = Builder.CreateAtomicRMW(
2910  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
2911  llvm::AtomicOrdering::SequentiallyConsistent);
2912  break;
2913  }
2914  Result->setVolatile(Volatile);
2915  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2916  }
2917 
2918  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2919 
2920  llvm::BasicBlock *BBs[5] = {
2921  createBasicBlock("monotonic", CurFn),
2922  createBasicBlock("acquire", CurFn),
2923  createBasicBlock("release", CurFn),
2924  createBasicBlock("acqrel", CurFn),
2925  createBasicBlock("seqcst", CurFn)
2926  };
2927  llvm::AtomicOrdering Orders[5] = {
2928  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
2929  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
2930  llvm::AtomicOrdering::SequentiallyConsistent};
2931 
2932  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2933  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2934 
2935  Builder.SetInsertPoint(ContBB);
2936  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
2937 
2938  for (unsigned i = 0; i < 5; ++i) {
2939  Builder.SetInsertPoint(BBs[i]);
2940  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
2941  Ptr, NewVal, Orders[i]);
2942  RMW->setVolatile(Volatile);
2943  Result->addIncoming(RMW, BBs[i]);
2944  Builder.CreateBr(ContBB);
2945  }
2946 
2947  SI->addCase(Builder.getInt32(0), BBs[0]);
2948  SI->addCase(Builder.getInt32(1), BBs[1]);
2949  SI->addCase(Builder.getInt32(2), BBs[1]);
2950  SI->addCase(Builder.getInt32(3), BBs[2]);
2951  SI->addCase(Builder.getInt32(4), BBs[3]);
2952  SI->addCase(Builder.getInt32(5), BBs[4]);
2953 
2954  Builder.SetInsertPoint(ContBB);
2955  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
2956  }
2957 
2958  case Builtin::BI__atomic_clear: {
2959  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
2960  bool Volatile =
2961  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
2962 
2963  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
2964  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
2965  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
2966  Value *NewVal = Builder.getInt8(0);
2967  Value *Order = EmitScalarExpr(E->getArg(1));
2968  if (isa<llvm::ConstantInt>(Order)) {
2969  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
2970  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
2971  switch (ord) {
2972  case 0: // memory_order_relaxed
2973  default: // invalid order
2974  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
2975  break;
2976  case 3: // memory_order_release
2977  Store->setOrdering(llvm::AtomicOrdering::Release);
2978  break;
2979  case 5: // memory_order_seq_cst
2980  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
2981  break;
2982  }
2983  return RValue::get(nullptr);
2984  }
2985 
2986  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
2987 
2988  llvm::BasicBlock *BBs[3] = {
2989  createBasicBlock("monotonic", CurFn),
2990  createBasicBlock("release", CurFn),
2991  createBasicBlock("seqcst", CurFn)
2992  };
2993  llvm::AtomicOrdering Orders[3] = {
2994  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
2995  llvm::AtomicOrdering::SequentiallyConsistent};
2996 
2997  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
2998  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
2999 
3000  for (unsigned i = 0; i < 3; ++i) {
3001  Builder.SetInsertPoint(BBs[i]);
3002  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
3003  Store->setOrdering(Orders[i]);
3004  Builder.CreateBr(ContBB);
3005  }
3006 
3007  SI->addCase(Builder.getInt32(0), BBs[0]);
3008  SI->addCase(Builder.getInt32(3), BBs[1]);
3009  SI->addCase(Builder.getInt32(5), BBs[2]);
3010 
3011  Builder.SetInsertPoint(ContBB);
3012  return RValue::get(nullptr);
3013  }
3014 
3015  case Builtin::BI__atomic_thread_fence:
3016  case Builtin::BI__atomic_signal_fence:
3017  case Builtin::BI__c11_atomic_thread_fence:
3018  case Builtin::BI__c11_atomic_signal_fence: {
3019  llvm::SyncScope::ID SSID;
3020  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
3021  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
3022  SSID = llvm::SyncScope::SingleThread;
3023  else
3024  SSID = llvm::SyncScope::System;
3025  Value *Order = EmitScalarExpr(E->getArg(0));
3026  if (isa<llvm::ConstantInt>(Order)) {
3027  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
3028  switch (ord) {
3029  case 0: // memory_order_relaxed
3030  default: // invalid order
3031  break;
3032  case 1: // memory_order_consume
3033  case 2: // memory_order_acquire
3034  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
3035  break;
3036  case 3: // memory_order_release
3037  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
3038  break;
3039  case 4: // memory_order_acq_rel
3040  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
3041  break;
3042  case 5: // memory_order_seq_cst
3043  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
3044  break;
3045  }
3046  return RValue::get(nullptr);
3047  }
3048 
3049  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
3050  AcquireBB = createBasicBlock("acquire", CurFn);
3051  ReleaseBB = createBasicBlock("release", CurFn);
3052  AcqRelBB = createBasicBlock("acqrel", CurFn);
3053  SeqCstBB = createBasicBlock("seqcst", CurFn);
3054  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
3055 
3056  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
3057  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
3058 
3059  Builder.SetInsertPoint(AcquireBB);
3060  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
3061  Builder.CreateBr(ContBB);
3062  SI->addCase(Builder.getInt32(1), AcquireBB);
3063  SI->addCase(Builder.getInt32(2), AcquireBB);
3064 
3065  Builder.SetInsertPoint(ReleaseBB);
3066  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
3067  Builder.CreateBr(ContBB);
3068  SI->addCase(Builder.getInt32(3), ReleaseBB);
3069 
3070  Builder.SetInsertPoint(AcqRelBB);
3071  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
3072  Builder.CreateBr(ContBB);
3073  SI->addCase(Builder.getInt32(4), AcqRelBB);
3074 
3075  Builder.SetInsertPoint(SeqCstBB);
3076  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
3077  Builder.CreateBr(ContBB);
3078  SI->addCase(Builder.getInt32(5), SeqCstBB);
3079 
3080  Builder.SetInsertPoint(ContBB);
3081  return RValue::get(nullptr);
3082  }
3083 
3084  case Builtin::BI__builtin_signbit:
3085  case Builtin::BI__builtin_signbitf:
3086  case Builtin::BI__builtin_signbitl: {
3087  return RValue::get(
3088  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
3089  ConvertType(E->getType())));
3090  }
3091  case Builtin::BI__annotation: {
3092  // Re-encode each wide string to UTF8 and make an MDString.
3094  for (const Expr *Arg : E->arguments()) {
3095  const auto *Str = cast<StringLiteral>(Arg->IgnoreParenCasts());
3096  assert(Str->getCharByteWidth() == 2);
3097  StringRef WideBytes = Str->getBytes();
3098  std::string StrUtf8;
3099  if (!convertUTF16ToUTF8String(
3100  makeArrayRef(WideBytes.data(), WideBytes.size()), StrUtf8)) {
3101  CGM.ErrorUnsupported(E, "non-UTF16 __annotation argument");
3102  continue;
3103  }
3104  Strings.push_back(llvm::MDString::get(getLLVMContext(), StrUtf8));
3105  }
3106 
3107  // Build and MDTuple of MDStrings and emit the intrinsic call.
3108  llvm::Function *F =
3109  CGM.getIntrinsic(llvm::Intrinsic::codeview_annotation, {});
3110  MDTuple *StrTuple = MDTuple::get(getLLVMContext(), Strings);
3111  Builder.CreateCall(F, MetadataAsValue::get(getLLVMContext(), StrTuple));
3112  return RValue::getIgnored();
3113  }
3114  case Builtin::BI__builtin_annotation: {
3115  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
3116  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
3117  AnnVal->getType());
3118 
3119  // Get the annotation string, go through casts. Sema requires this to be a
3120  // non-wide string literal, potentially casted, so the cast<> is safe.
3121  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
3122  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
3123  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
3124  }
3125  case Builtin::BI__builtin_addcb:
3126  case Builtin::BI__builtin_addcs:
3127  case Builtin::BI__builtin_addc:
3128  case Builtin::BI__builtin_addcl:
3129  case Builtin::BI__builtin_addcll:
3130  case Builtin::BI__builtin_subcb:
3131  case Builtin::BI__builtin_subcs:
3132  case Builtin::BI__builtin_subc:
3133  case Builtin::BI__builtin_subcl:
3134  case Builtin::BI__builtin_subcll: {
3135 
3136  // We translate all of these builtins from expressions of the form:
3137  // int x = ..., y = ..., carryin = ..., carryout, result;
3138  // result = __builtin_addc(x, y, carryin, &carryout);
3139  //
3140  // to LLVM IR of the form:
3141  //
3142  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
3143  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
3144  // %carry1 = extractvalue {i32, i1} %tmp1, 1
3145  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
3146  // i32 %carryin)
3147  // %result = extractvalue {i32, i1} %tmp2, 0
3148  // %carry2 = extractvalue {i32, i1} %tmp2, 1
3149  // %tmp3 = or i1 %carry1, %carry2
3150  // %tmp4 = zext i1 %tmp3 to i32
3151  // store i32 %tmp4, i32* %carryout
3152 
3153  // Scalarize our inputs.
3154  llvm::Value *X = EmitScalarExpr(E->getArg(0));
3155  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3156  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
3157  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
3158 
3159  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
3160  llvm::Intrinsic::ID IntrinsicId;
3161  switch (BuiltinID) {
3162  default: llvm_unreachable("Unknown multiprecision builtin id.");
3163  case Builtin::BI__builtin_addcb:
3164  case Builtin::BI__builtin_addcs:
3165  case Builtin::BI__builtin_addc:
3166  case Builtin::BI__builtin_addcl:
3167  case Builtin::BI__builtin_addcll:
3168  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3169  break;
3170  case Builtin::BI__builtin_subcb:
3171  case Builtin::BI__builtin_subcs:
3172  case Builtin::BI__builtin_subc:
3173  case Builtin::BI__builtin_subcl:
3174  case Builtin::BI__builtin_subcll:
3175  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3176  break;
3177  }
3178 
3179  // Construct our resulting LLVM IR expression.
3180  llvm::Value *Carry1;
3181  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
3182  X, Y, Carry1);
3183  llvm::Value *Carry2;
3184  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
3185  Sum1, Carryin, Carry2);
3186  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
3187  X->getType());
3188  Builder.CreateStore(CarryOut, CarryOutPtr);
3189  return RValue::get(Sum2);
3190  }
3191 
3192  case Builtin::BI__builtin_add_overflow:
3193  case Builtin::BI__builtin_sub_overflow:
3194  case Builtin::BI__builtin_mul_overflow: {
3195  const clang::Expr *LeftArg = E->getArg(0);
3196  const clang::Expr *RightArg = E->getArg(1);
3197  const clang::Expr *ResultArg = E->getArg(2);
3198 
3199  clang::QualType ResultQTy =
3200  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
3201 
3202  WidthAndSignedness LeftInfo =
3203  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
3204  WidthAndSignedness RightInfo =
3205  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
3206  WidthAndSignedness ResultInfo =
3207  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
3208 
3209  // Handle mixed-sign multiplication as a special case, because adding
3210  // runtime or backend support for our generic irgen would be too expensive.
3211  if (isSpecialMixedSignMultiply(BuiltinID, LeftInfo, RightInfo, ResultInfo))
3212  return EmitCheckedMixedSignMultiply(*this, LeftArg, LeftInfo, RightArg,
3213  RightInfo, ResultArg, ResultQTy,
3214  ResultInfo);
3215 
3216  WidthAndSignedness EncompassingInfo =
3217  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
3218 
3219  llvm::Type *EncompassingLLVMTy =
3220  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
3221 
3222  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
3223 
3224  llvm::Intrinsic::ID IntrinsicId;
3225  switch (BuiltinID) {
3226  default:
3227  llvm_unreachable("Unknown overflow builtin id.");
3228  case Builtin::BI__builtin_add_overflow:
3229  IntrinsicId = EncompassingInfo.Signed
3230  ? llvm::Intrinsic::sadd_with_overflow
3231  : llvm::Intrinsic::uadd_with_overflow;
3232  break;
3233  case Builtin::BI__builtin_sub_overflow:
3234  IntrinsicId = EncompassingInfo.Signed
3235  ? llvm::Intrinsic::ssub_with_overflow
3236  : llvm::Intrinsic::usub_with_overflow;
3237  break;
3238  case Builtin::BI__builtin_mul_overflow:
3239  IntrinsicId = EncompassingInfo.Signed
3240  ? llvm::Intrinsic::smul_with_overflow
3241  : llvm::Intrinsic::umul_with_overflow;
3242  break;
3243  }
3244 
3245  llvm::Value *Left = EmitScalarExpr(LeftArg);
3246  llvm::Value *Right = EmitScalarExpr(RightArg);
3247  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
3248 
3249  // Extend each operand to the encompassing type.
3250  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
3251  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
3252 
3253  // Perform the operation on the extended values.
3254  llvm::Value *Overflow, *Result;
3255  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
3256 
3257  if (EncompassingInfo.Width > ResultInfo.Width) {
3258  // The encompassing type is wider than the result type, so we need to
3259  // truncate it.
3260  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
3261 
3262  // To see if the truncation caused an overflow, we will extend
3263  // the result and then compare it to the original result.
3264  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
3265  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
3266  llvm::Value *TruncationOverflow =
3267  Builder.CreateICmpNE(Result, ResultTruncExt);
3268 
3269  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
3270  Result = ResultTrunc;
3271  }
3272 
3273  // Finally, store the result using the pointer.
3274  bool isVolatile =
3275  ResultArg->getType()->getPointeeType().isVolatileQualified();
3276  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
3277 
3278  return RValue::get(Overflow);
3279  }
3280 
3281  case Builtin::BI__builtin_uadd_overflow:
3282  case Builtin::BI__builtin_uaddl_overflow:
3283  case Builtin::BI__builtin_uaddll_overflow:
3284  case Builtin::BI__builtin_usub_overflow:
3285  case Builtin::BI__builtin_usubl_overflow:
3286  case Builtin::BI__builtin_usubll_overflow:
3287  case Builtin::BI__builtin_umul_overflow:
3288  case Builtin::BI__builtin_umull_overflow:
3289  case Builtin::BI__builtin_umulll_overflow:
3290  case Builtin::BI__builtin_sadd_overflow:
3291  case Builtin::BI__builtin_saddl_overflow:
3292  case Builtin::BI__builtin_saddll_overflow:
3293  case Builtin::BI__builtin_ssub_overflow:
3294  case Builtin::BI__builtin_ssubl_overflow:
3295  case Builtin::BI__builtin_ssubll_overflow:
3296  case Builtin::BI__builtin_smul_overflow:
3297  case Builtin::BI__builtin_smull_overflow:
3298  case Builtin::BI__builtin_smulll_overflow: {
3299 
3300  // We translate all of these builtins directly to the relevant llvm IR node.
3301 
3302  // Scalarize our inputs.
3303  llvm::Value *X = EmitScalarExpr(E->getArg(0));
3304  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
3305  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
3306 
3307  // Decide which of the overflow intrinsics we are lowering to:
3308  llvm::Intrinsic::ID IntrinsicId;
3309  switch (BuiltinID) {
3310  default: llvm_unreachable("Unknown overflow builtin id.");
3311  case Builtin::BI__builtin_uadd_overflow:
3312  case Builtin::BI__builtin_uaddl_overflow:
3313  case Builtin::BI__builtin_uaddll_overflow:
3314  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
3315  break;
3316  case Builtin::BI__builtin_usub_overflow:
3317  case Builtin::BI__builtin_usubl_overflow:
3318  case Builtin::BI__builtin_usubll_overflow:
3319  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
3320  break;
3321  case Builtin::BI__builtin_umul_overflow:
3322  case Builtin::BI__builtin_umull_overflow:
3323  case Builtin::BI__builtin_umulll_overflow:
3324  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
3325  break;
3326  case Builtin::BI__builtin_sadd_overflow:
3327  case Builtin::BI__builtin_saddl_overflow:
3328  case Builtin::BI__builtin_saddll_overflow:
3329  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
3330  break;
3331  case Builtin::BI__builtin_ssub_overflow:
3332  case Builtin::BI__builtin_ssubl_overflow:
3333  case Builtin::BI__builtin_ssubll_overflow:
3334  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
3335  break;
3336  case Builtin::BI__builtin_smul_overflow:
3337  case Builtin::BI__builtin_smull_overflow:
3338  case Builtin::BI__builtin_smulll_overflow:
3339  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
3340  break;
3341  }
3342 
3343 
3344  llvm::Value *Carry;
3345  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
3346  Builder.CreateStore(Sum, SumOutPtr);
3347 
3348  return RValue::get(Carry);
3349  }
3350  case Builtin::BI__builtin_addressof:
3351  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
3352  case Builtin::BI__builtin_operator_new:
3353  return EmitBuiltinNewDeleteCall(
3354  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, false);
3355  case Builtin::BI__builtin_operator_delete:
3356  return EmitBuiltinNewDeleteCall(
3357  E->getCallee()->getType()->castAs<FunctionProtoType>(), E, true);
3358 
3359  case Builtin::BI__noop:
3360  // __noop always evaluates to an integer literal zero.
3361  return RValue::get(ConstantInt::get(IntTy, 0));
3362  case Builtin::BI__builtin_call_with_static_chain: {
3363  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
3364  const Expr *Chain = E->getArg(1);
3365  return EmitCall(Call->getCallee()->getType(),
3366  EmitCallee(Call->getCallee()), Call, ReturnValue,
3367  EmitScalarExpr(Chain));
3368  }
3369  case Builtin::BI_InterlockedExchange8:
3370  case Builtin::BI_InterlockedExchange16:
3371  case Builtin::BI_InterlockedExchange:
3372  case Builtin::BI_InterlockedExchangePointer:
3373  return RValue::get(
3374  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
3375  case Builtin::BI_InterlockedCompareExchangePointer:
3376  case Builtin::BI_InterlockedCompareExchangePointer_nf: {
3377  llvm::Type *RTy;
3378  llvm::IntegerType *IntType =
3379  IntegerType::get(getLLVMContext(),
3380  getContext().getTypeSize(E->getType()));
3381  llvm::Type *IntPtrType = IntType->getPointerTo();
3382 
3383  llvm::Value *Destination =
3384  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
3385 
3386  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
3387  RTy = Exchange->getType();
3388  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
3389 
3390  llvm::Value *Comparand =
3391  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
3392 
3393  auto Ordering =
3394  BuiltinID == Builtin::BI_InterlockedCompareExchangePointer_nf ?
3395  AtomicOrdering::Monotonic : AtomicOrdering::SequentiallyConsistent;
3396 
3397  auto Result = Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
3398  Ordering, Ordering);
3399  Result->setVolatile(true);
3400 
3401  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
3402  0),
3403  RTy));
3404  }
3405  case Builtin::BI_InterlockedCompareExchange8:
3406  case Builtin::BI_InterlockedCompareExchange16:
3407  case Builtin::BI_InterlockedCompareExchange:
3408  case Builtin::BI_InterlockedCompareExchange64:
3409  return RValue::get(EmitAtomicCmpXchgForMSIntrin(*this, E));
3410  case Builtin::BI_InterlockedIncrement16:
3411  case Builtin::BI_InterlockedIncrement:
3412  return RValue::get(
3413  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
3414  case Builtin::BI_InterlockedDecrement16:
3415  case Builtin::BI_InterlockedDecrement:
3416  return RValue::get(
3417  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
3418  case Builtin::BI_InterlockedAnd8:
3419  case Builtin::BI_InterlockedAnd16:
3420  case Builtin::BI_InterlockedAnd:
3421  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
3422  case Builtin::BI_InterlockedExchangeAdd8:
3423  case Builtin::BI_InterlockedExchangeAdd16:
3424  case Builtin::BI_InterlockedExchangeAdd:
3425  return RValue::get(
3426  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
3427  case Builtin::BI_InterlockedExchangeSub8:
3428  case Builtin::BI_InterlockedExchangeSub16:
3429  case Builtin::BI_InterlockedExchangeSub:
3430  return RValue::get(
3431  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
3432  case Builtin::BI_InterlockedOr8:
3433  case Builtin::BI_InterlockedOr16:
3434  case Builtin::BI_InterlockedOr:
3435  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
3436  case Builtin::BI_InterlockedXor8:
3437  case Builtin::BI_InterlockedXor16:
3438  case Builtin::BI_InterlockedXor:
3439  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
3440 
3441  case Builtin::BI_bittest64:
3442  case Builtin::BI_bittest:
3443  case Builtin::BI_bittestandcomplement64:
3444  case Builtin::BI_bittestandcomplement:
3445  case Builtin::BI_bittestandreset64:
3446  case Builtin::BI_bittestandreset:
3447  case Builtin::BI_bittestandset64:
3448  case Builtin::BI_bittestandset:
3449  case Builtin::BI_interlockedbittestandreset:
3450  case Builtin::BI_interlockedbittestandreset64:
3451  case Builtin::BI_interlockedbittestandset64:
3452  case Builtin::BI_interlockedbittestandset:
3453  case Builtin::BI_interlockedbittestandset_acq:
3454  case Builtin::BI_interlockedbittestandset_rel:
3455  case Builtin::BI_interlockedbittestandset_nf:
3456  case Builtin::BI_interlockedbittestandreset_acq:
3457  case Builtin::BI_interlockedbittestandreset_rel:
3458  case Builtin::BI_interlockedbittestandreset_nf:
3459  return RValue::get(EmitBitTestIntrinsic(*this, BuiltinID, E));
3460 
3461  // These builtins exist to emit regular volatile loads and stores not
3462  // affected by the -fms-volatile setting.
3463  case Builtin::BI__iso_volatile_load8:
3464  case Builtin::BI__iso_volatile_load16:
3465  case Builtin::BI__iso_volatile_load32:
3466  case Builtin::BI__iso_volatile_load64:
3467  return RValue::get(EmitISOVolatileLoad(*this, E));
3468  case Builtin::BI__iso_volatile_store8:
3469  case Builtin::BI__iso_volatile_store16:
3470  case Builtin::BI__iso_volatile_store32:
3471  case Builtin::BI__iso_volatile_store64:
3472  return RValue::get(EmitISOVolatileStore(*this, E));
3473 
3474  case Builtin::BI__exception_code:
3475  case Builtin::BI_exception_code:
3476  return RValue::get(EmitSEHExceptionCode());
3477  case Builtin::BI__exception_info:
3478  case Builtin::BI_exception_info:
3479  return RValue::get(EmitSEHExceptionInfo());
3480  case Builtin::BI__abnormal_termination:
3481  case Builtin::BI_abnormal_termination:
3482  return RValue::get(EmitSEHAbnormalTermination());
3483  case Builtin::BI_setjmpex:
3484  if (getTarget().getTriple().isOSMSVCRT())
3485  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3486  break;
3487  case Builtin::BI_setjmp:
3488  if (getTarget().getTriple().isOSMSVCRT()) {
3489  if (getTarget().getTriple().getArch() == llvm::Triple::x86)
3490  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp3, E);
3491  else if (getTarget().getTriple().getArch() == llvm::Triple::aarch64)
3492  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmpex, E);
3493  return EmitMSVCRTSetJmp(*this, MSVCSetJmpKind::_setjmp, E);
3494  }
3495  break;
3496 
3497  case Builtin::BI__GetExceptionInfo: {
3498  if (llvm::GlobalVariable *GV =
3499  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
3500  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
3501  break;
3502  }
3503 
3504  case Builtin::BI__fastfail:
3505  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
3506 
3507  case Builtin::BI__builtin_coro_size: {
3508  auto & Context = getContext();
3509  auto SizeTy = Context.getSizeType();
3510  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
3511  Function *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
3512  return RValue::get(Builder.CreateCall(F));
3513  }
3514 
3515  case Builtin::BI__builtin_coro_id:
3516  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
3517  case Builtin::BI__builtin_coro_promise:
3518  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
3519  case Builtin::BI__builtin_coro_resume:
3520  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
3521  case Builtin::BI__builtin_coro_frame:
3522  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
3523  case Builtin::BI__builtin_coro_noop:
3524  return EmitCoroutineIntrinsic(E, Intrinsic::coro_noop);
3525  case Builtin::BI__builtin_coro_free:
3526  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
3527  case Builtin::BI__builtin_coro_destroy:
3528  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
3529  case Builtin::BI__builtin_coro_done:
3530  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
3531  case Builtin::BI__builtin_coro_alloc:
3532  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
3533  case Builtin::BI__builtin_coro_begin:
3534  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
3535  case Builtin::BI__builtin_coro_end:
3536  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
3537  case Builtin::BI__builtin_coro_suspend:
3538  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
3539  case Builtin::BI__builtin_coro_param:
3540  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
3541 
3542  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
3543  case Builtin::BIread_pipe:
3544  case Builtin::BIwrite_pipe: {
3545  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3546  *Arg1 = EmitScalarExpr(E->getArg(1));
3547  CGOpenCLRuntime OpenCLRT(CGM);
3548  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3549  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3550 
3551  // Type of the generic packet parameter.
3552  unsigned GenericAS =
3553  getContext().getTargetAddressSpace(LangAS::opencl_generic);
3554  llvm::Type *I8PTy = llvm::PointerType::get(
3555  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
3556 
3557  // Testing which overloaded version we should generate the call for.
3558  if (2U == E->getNumArgs()) {
3559  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
3560  : "__write_pipe_2";
3561  // Creating a generic function type to be able to call with any builtin or
3562  // user defined type.
3563  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
3564  llvm::FunctionType *FTy = llvm::FunctionType::get(
3565  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3566  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
3567  return RValue::get(
3568  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3569  {Arg0, BCast, PacketSize, PacketAlign}));
3570  } else {
3571  assert(4 == E->getNumArgs() &&
3572  "Illegal number of parameters to pipe function");
3573  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
3574  : "__write_pipe_4";
3575 
3576  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
3577  Int32Ty, Int32Ty};
3578  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
3579  *Arg3 = EmitScalarExpr(E->getArg(3));
3580  llvm::FunctionType *FTy = llvm::FunctionType::get(
3581  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3582  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
3583  // We know the third argument is an integer type, but we may need to cast
3584  // it to i32.
3585  if (Arg2->getType() != Int32Ty)
3586  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
3587  return RValue::get(Builder.CreateCall(
3588  CGM.CreateRuntimeFunction(FTy, Name),
3589  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
3590  }
3591  }
3592  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
3593  // functions
3594  case Builtin::BIreserve_read_pipe:
3595  case Builtin::BIreserve_write_pipe:
3596  case Builtin::BIwork_group_reserve_read_pipe:
3597  case Builtin::BIwork_group_reserve_write_pipe:
3598  case Builtin::BIsub_group_reserve_read_pipe:
3599  case Builtin::BIsub_group_reserve_write_pipe: {
3600  // Composing the mangled name for the function.
3601  const char *Name;
3602  if (BuiltinID == Builtin::BIreserve_read_pipe)
3603  Name = "__reserve_read_pipe";
3604  else if (BuiltinID == Builtin::BIreserve_write_pipe)
3605  Name = "__reserve_write_pipe";
3606  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
3607  Name = "__work_group_reserve_read_pipe";
3608  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
3609  Name = "__work_group_reserve_write_pipe";
3610  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
3611  Name = "__sub_group_reserve_read_pipe";
3612  else
3613  Name = "__sub_group_reserve_write_pipe";
3614 
3615  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3616  *Arg1 = EmitScalarExpr(E->getArg(1));
3617  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
3618  CGOpenCLRuntime OpenCLRT(CGM);
3619  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3620  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3621 
3622  // Building the generic function prototype.
3623  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
3624  llvm::FunctionType *FTy = llvm::FunctionType::get(
3625  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3626  // We know the second argument is an integer type, but we may need to cast
3627  // it to i32.
3628  if (Arg1->getType() != Int32Ty)
3629  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
3630  return RValue::get(
3631  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3632  {Arg0, Arg1, PacketSize, PacketAlign}));
3633  }
3634  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
3635  // functions
3636  case Builtin::BIcommit_read_pipe:
3637  case Builtin::BIcommit_write_pipe:
3638  case Builtin::BIwork_group_commit_read_pipe:
3639  case Builtin::BIwork_group_commit_write_pipe:
3640  case Builtin::BIsub_group_commit_read_pipe:
3641  case Builtin::BIsub_group_commit_write_pipe: {
3642  const char *Name;
3643  if (BuiltinID == Builtin::BIcommit_read_pipe)
3644  Name = "__commit_read_pipe";
3645  else if (BuiltinID == Builtin::BIcommit_write_pipe)
3646  Name = "__commit_write_pipe";
3647  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
3648  Name = "__work_group_commit_read_pipe";
3649  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
3650  Name = "__work_group_commit_write_pipe";
3651  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
3652  Name = "__sub_group_commit_read_pipe";
3653  else
3654  Name = "__sub_group_commit_write_pipe";
3655 
3656  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
3657  *Arg1 = EmitScalarExpr(E->getArg(1));
3658  CGOpenCLRuntime OpenCLRT(CGM);
3659  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3660  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3661 
3662  // Building the generic function prototype.
3663  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
3664  llvm::FunctionType *FTy =
3665  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
3666  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3667 
3668  return RValue::get(
3669  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3670  {Arg0, Arg1, PacketSize, PacketAlign}));
3671  }
3672  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
3673  case Builtin::BIget_pipe_num_packets:
3674  case Builtin::BIget_pipe_max_packets: {
3675  const char *BaseName;
3676  const PipeType *PipeTy = E->getArg(0)->getType()->getAs<PipeType>();
3677  if (BuiltinID == Builtin::BIget_pipe_num_packets)
3678  BaseName = "__get_pipe_num_packets";
3679  else
3680  BaseName = "__get_pipe_max_packets";
3681  auto Name = std::string(BaseName) +
3682  std::string(PipeTy->isReadOnly() ? "_ro" : "_wo");
3683 
3684  // Building the generic function prototype.
3685  Value *Arg0 = EmitScalarExpr(E->getArg(0));
3686  CGOpenCLRuntime OpenCLRT(CGM);
3687  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
3688  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
3689  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
3690  llvm::FunctionType *FTy = llvm::FunctionType::get(
3691  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3692 
3693  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3694  {Arg0, PacketSize, PacketAlign}));
3695  }
3696 
3697  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
3698  case Builtin::BIto_global:
3699  case Builtin::BIto_local:
3700  case Builtin::BIto_private: {
3701  auto Arg0 = EmitScalarExpr(E->getArg(0));
3702  auto NewArgT = llvm::PointerType::get(Int8Ty,
3703  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3704  auto NewRetT = llvm::PointerType::get(Int8Ty,
3705  CGM.getContext().getTargetAddressSpace(
3707  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
3708  llvm::Value *NewArg;
3709  if (Arg0->getType()->getPointerAddressSpace() !=
3710  NewArgT->getPointerAddressSpace())
3711  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
3712  else
3713  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
3714  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
3715  auto NewCall =
3716  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
3717  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
3718  ConvertType(E->getType())));
3719  }
3720 
3721  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
3722  // It contains four different overload formats specified in Table 6.13.17.1.
3723  case Builtin::BIenqueue_kernel: {
3724  StringRef Name; // Generated function call name
3725  unsigned NumArgs = E->getNumArgs();
3726 
3727  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
3728  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3729  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3730 
3731  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
3732  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
3733  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
3734  llvm::Value *Range = NDRangeL.getAddress().getPointer();
3735  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
3736 
3737  if (NumArgs == 4) {
3738  // The most basic form of the call with parameters:
3739  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
3740  Name = "__enqueue_kernel_basic";
3741  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy,
3742  GenericVoidPtrTy};
3743  llvm::FunctionType *FTy = llvm::FunctionType::get(
3744  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3745 
3746  auto Info =
3747  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3748  llvm::Value *Kernel =
3749  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3750  llvm::Value *Block =
3751  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3752 
3753  AttrBuilder B;
3754  B.addByValAttr(NDRangeL.getAddress().getElementType());
3755  llvm::AttributeList ByValAttrSet =
3756  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
3757 
3758  auto RTCall =
3759  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
3760  {Queue, Flags, Range, Kernel, Block});
3761  RTCall->setAttributes(ByValAttrSet);
3762  return RValue::get(RTCall);
3763  }
3764  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
3765 
3766  // Create a temporary array to hold the sizes of local pointer arguments
3767  // for the block. \p First is the position of the first size argument.
3768  auto CreateArrayForSizeVar = [=](unsigned First)
3769  -> std::tuple<llvm::Value *, llvm::Value *, llvm::Value *> {
3770  llvm::APInt ArraySize(32, NumArgs - First);
3771  QualType SizeArrayTy = getContext().getConstantArrayType(
3772  getContext().getSizeType(), ArraySize, ArrayType::Normal,
3773  /*IndexTypeQuals=*/0);
3774  auto Tmp = CreateMemTemp(SizeArrayTy, "block_sizes");
3775  llvm::Value *TmpPtr = Tmp.getPointer();
3776  llvm::Value *TmpSize = EmitLifetimeStart(
3777  CGM.getDataLayout().getTypeAllocSize(Tmp.getElementType()), TmpPtr);
3778  llvm::Value *ElemPtr;
3779  // Each of the following arguments specifies the size of the corresponding
3780  // argument passed to the enqueued block.
3781  auto *Zero = llvm::ConstantInt::get(IntTy, 0);
3782  for (unsigned I = First; I < NumArgs; ++I) {
3783  auto *Index = llvm::ConstantInt::get(IntTy, I - First);
3784  auto *GEP = Builder.CreateGEP(TmpPtr, {Zero, Index});
3785  if (I == First)
3786  ElemPtr = GEP;
3787  auto *V =
3788  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy);
3789  Builder.CreateAlignedStore(
3790  V, GEP, CGM.getDataLayout().getPrefTypeAlignment(SizeTy));
3791  }
3792  return std::tie(ElemPtr, TmpSize, TmpPtr);
3793  };
3794 
3795  // Could have events and/or varargs.
3796  if (E->getArg(3)->getType()->isBlockPointerType()) {
3797  // No events passed, but has variadic arguments.
3798  Name = "__enqueue_kernel_varargs";
3799  auto Info =
3800  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(3));
3801  llvm::Value *Kernel =
3802  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3803  auto *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3804  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3805  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(4);
3806 
3807  // Create a vector of the arguments, as well as a constant value to
3808  // express to the runtime the number of variadic arguments.
3809  std::vector<llvm::Value *> Args = {
3810  Queue, Flags, Range,
3811  Kernel, Block, ConstantInt::get(IntTy, NumArgs - 4),
3812  ElemPtr};
3813  std::vector<llvm::Type *> ArgTys = {
3814  QueueTy, IntTy, RangeTy, GenericVoidPtrTy,
3815  GenericVoidPtrTy, IntTy, ElemPtr->getType()};
3816 
3817  llvm::FunctionType *FTy = llvm::FunctionType::get(
3818  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3819  auto Call =
3820  RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3822  if (TmpSize)
3823  EmitLifetimeEnd(TmpSize, TmpPtr);
3824  return Call;
3825  }
3826  // Any calls now have event arguments passed.
3827  if (NumArgs >= 7) {
3828  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
3829  llvm::PointerType *EventPtrTy = EventTy->getPointerTo(
3830  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
3831 
3832  llvm::Value *NumEvents =
3833  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
3834 
3835  // Since SemaOpenCLBuiltinEnqueueKernel allows fifth and sixth arguments
3836  // to be a null pointer constant (including `0` literal), we can take it
3837  // into account and emit null pointer directly.
3838  llvm::Value *EventWaitList = nullptr;
3839  if (E->getArg(4)->isNullPointerConstant(
3840  getContext(), Expr::NPC_ValueDependentIsNotNull)) {
3841  EventWaitList = llvm::ConstantPointerNull::get(EventPtrTy);
3842  } else {
3843  EventWaitList = E->getArg(4)->getType()->isArrayType()
3844  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
3845  : EmitScalarExpr(E->getArg(4));
3846  // Convert to generic address space.
3847  EventWaitList = Builder.CreatePointerCast(EventWaitList, EventPtrTy);
3848  }
3849  llvm::Value *EventRet = nullptr;
3850  if (E->getArg(5)->isNullPointerConstant(
3851  getContext(), Expr::NPC_ValueDependentIsNotNull)) {
3852  EventRet = llvm::ConstantPointerNull::get(EventPtrTy);
3853  } else {
3854  EventRet =
3855  Builder.CreatePointerCast(EmitScalarExpr(E->getArg(5)), EventPtrTy);
3856  }
3857 
3858  auto Info =
3859  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(6));
3860  llvm::Value *Kernel =
3861  Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3862  llvm::Value *Block =
3863  Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3864 
3865  std::vector<llvm::Type *> ArgTys = {
3866  QueueTy, Int32Ty, RangeTy, Int32Ty,
3867  EventPtrTy, EventPtrTy, GenericVoidPtrTy, GenericVoidPtrTy};
3868 
3869  std::vector<llvm::Value *> Args = {Queue, Flags, Range,
3870  NumEvents, EventWaitList, EventRet,
3871  Kernel, Block};
3872 
3873  if (NumArgs == 7) {
3874  // Has events but no variadics.
3875  Name = "__enqueue_kernel_basic_events";
3876  llvm::FunctionType *FTy = llvm::FunctionType::get(
3877  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3878  return RValue::get(
3879  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3881  }
3882  // Has event info and variadics
3883  // Pass the number of variadics to the runtime function too.
3884  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
3885  ArgTys.push_back(Int32Ty);
3886  Name = "__enqueue_kernel_events_varargs";
3887 
3888  llvm::Value *ElemPtr, *TmpSize, *TmpPtr;
3889  std::tie(ElemPtr, TmpSize, TmpPtr) = CreateArrayForSizeVar(7);
3890  Args.push_back(ElemPtr);
3891  ArgTys.push_back(ElemPtr->getType());
3892 
3893  llvm::FunctionType *FTy = llvm::FunctionType::get(
3894  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
3895  auto Call =
3896  RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
3898  if (TmpSize)
3899  EmitLifetimeEnd(TmpSize, TmpPtr);
3900  return Call;
3901  }
3902  LLVM_FALLTHROUGH;
3903  }
3904  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
3905  // parameter.
3906  case Builtin::BIget_kernel_work_group_size: {
3907  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3908  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3909  auto Info =
3910  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3911  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3912  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3913  return RValue::get(Builder.CreateCall(
3914  CGM.CreateRuntimeFunction(
3915  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3916  false),
3917  "__get_kernel_work_group_size_impl"),
3918  {Kernel, Arg}));
3919  }
3920  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
3921  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3922  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3923  auto Info =
3924  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(0));
3925  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3926  Value *Arg = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3927  return RValue::get(Builder.CreateCall(
3928  CGM.CreateRuntimeFunction(
3929  llvm::FunctionType::get(IntTy, {GenericVoidPtrTy, GenericVoidPtrTy},
3930  false),
3931  "__get_kernel_preferred_work_group_size_multiple_impl"),
3932  {Kernel, Arg}));
3933  }
3934  case Builtin::BIget_kernel_max_sub_group_size_for_ndrange:
3935  case Builtin::BIget_kernel_sub_group_count_for_ndrange: {
3936  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
3937  getContext().getTargetAddressSpace(LangAS::opencl_generic));
3938  LValue NDRangeL = EmitAggExprToLValue(E->getArg(0));
3939  llvm::Value *NDRange = NDRangeL.getAddress().getPointer();
3940  auto Info =
3941  CGM.getOpenCLRuntime().emitOpenCLEnqueuedBlock(*this, E->getArg(1));
3942  Value *Kernel = Builder.CreatePointerCast(Info.Kernel, GenericVoidPtrTy);
3943  Value *Block = Builder.CreatePointerCast(Info.BlockArg, GenericVoidPtrTy);
3944  const char *Name =
3945  BuiltinID == Builtin::BIget_kernel_max_sub_group_size_for_ndrange
3946  ? "__get_kernel_max_sub_group_size_for_ndrange_impl"
3947  : "__get_kernel_sub_group_count_for_ndrange_impl";
3948  return RValue::get(Builder.CreateCall(
3949  CGM.CreateRuntimeFunction(
3950  llvm::FunctionType::get(
3951  IntTy, {NDRange->getType(), GenericVoidPtrTy, GenericVoidPtrTy},
3952  false),
3953  Name),
3954  {NDRange, Kernel, Block}));
3955  }
3956 
3957  case Builtin::BI__builtin_store_half:
3958  case Builtin::BI__builtin_store_halff: {
3959  Value *Val = EmitScalarExpr(E->getArg(0));
3960  Address Address = EmitPointerWithAlignment(E->getArg(1));
3961  Value *HalfVal = Builder.CreateFPTrunc(Val, Builder.getHalfTy());
3962  return RValue::get(Builder.CreateStore(HalfVal, Address));
3963  }
3964  case Builtin::BI__builtin_load_half: {
3965  Address Address = EmitPointerWithAlignment(E->getArg(0));
3966  Value *HalfVal = Builder.CreateLoad(Address);
3967  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getDoubleTy()));
3968  }
3969  case Builtin::BI__builtin_load_halff: {
3970  Address Address = EmitPointerWithAlignment(E->getArg(0));
3971  Value *HalfVal = Builder.CreateLoad(Address);
3972  return RValue::get(Builder.CreateFPExt(HalfVal, Builder.getFloatTy()));
3973  }
3974  case Builtin::BIprintf:
3975  if (getTarget().getTriple().isNVPTX())
3976  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
3977  break;
3978  case Builtin::BI__builtin_canonicalize:
3979  case Builtin::BI__builtin_canonicalizef:
3980  case Builtin::BI__builtin_canonicalizel:
3981  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
3982 
3983  case Builtin::BI__builtin_thread_pointer: {
3984  if (!getContext().getTargetInfo().isTLSSupported())
3985  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
3986  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
3987  break;
3988  }
3989  case Builtin::BI__builtin_os_log_format:
3990  return emitBuiltinOSLogFormat(*E);
3991 
3992  case Builtin::BI__xray_customevent: {
3993  if (!ShouldXRayInstrumentFunction())
3994  return RValue::getIgnored();
3995 
3996  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
3998  return RValue::getIgnored();
3999 
4000  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
4001  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayCustomEvents())
4002  return RValue::getIgnored();
4003 
4004  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
4005  auto FTy = F->getFunctionType();
4006  auto Arg0 = E->getArg(0);
4007  auto Arg0Val = EmitScalarExpr(Arg0);
4008  auto Arg0Ty = Arg0->getType();
4009  auto PTy0 = FTy->getParamType(0);
4010  if (PTy0 != Arg0Val->getType()) {
4011  if (Arg0Ty->isArrayType())
4012  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
4013  else
4014  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
4015  }
4016  auto Arg1 = EmitScalarExpr(E->getArg(1));
4017  auto PTy1 = FTy->getParamType(1);
4018  if (PTy1 != Arg1->getType())
4019  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
4020  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
4021  }
4022 
4023  case Builtin::BI__xray_typedevent: {
4024  // TODO: There should be a way to always emit events even if the current
4025  // function is not instrumented. Losing events in a stream can cripple
4026  // a trace.
4027  if (!ShouldXRayInstrumentFunction())
4028  return RValue::getIgnored();
4029 
4030  if (!CGM.getCodeGenOpts().XRayInstrumentationBundle.has(
4032  return RValue::getIgnored();
4033 
4034  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>())
4035  if (XRayAttr->neverXRayInstrument() && !AlwaysEmitXRayTypedEvents())
4036  return RValue::getIgnored();
4037 
4038  Function *F = CGM.getIntrinsic(Intrinsic::xray_typedevent);
4039  auto FTy = F->getFunctionType();
4040  auto Arg0 = EmitScalarExpr(E->getArg(0));
4041  auto PTy0 = FTy->getParamType(0);
4042  if (PTy0 != Arg0->getType())
4043  Arg0 = Builder.CreateTruncOrBitCast(Arg0, PTy0);
4044  auto Arg1 = E->getArg(1);
4045  auto Arg1Val = EmitScalarExpr(Arg1);
4046  auto Arg1Ty = Arg1->getType();
4047  auto PTy1 = FTy->getParamType(1);
4048  if (PTy1 != Arg1Val->getType()) {
4049  if (Arg1Ty->isArrayType())
4050  Arg1Val = EmitArrayToPointerDecay(Arg1).getPointer();
4051  else
4052  Arg1Val = Builder.CreatePointerCast(Arg1Val, PTy1);
4053  }
4054  auto Arg2 = EmitScalarExpr(E->getArg(2));
4055  auto PTy2 = FTy->getParamType(2);
4056  if (PTy2 != Arg2->getType())
4057  Arg2 = Builder.CreateTruncOrBitCast(Arg2, PTy2);
4058  return RValue::get(Builder.CreateCall(F, {Arg0, Arg1Val, Arg2}));
4059  }
4060 
4061  case Builtin::BI__builtin_ms_va_start:
4062  case Builtin::BI__builtin_ms_va_end:
4063  return RValue::get(
4064  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
4065  BuiltinID == Builtin::BI__builtin_ms_va_start));
4066 
4067  case Builtin::BI__builtin_ms_va_copy: {
4068  // Lower this manually. We can't reliably determine whether or not any
4069  // given va_copy() is for a Win64 va_list from the calling convention
4070  // alone, because it's legal to do this from a System V ABI function.
4071  // With opaque pointer types, we won't have enough information in LLVM
4072  // IR to determine this from the argument types, either. Best to do it
4073  // now, while we have enough information.
4074  Address DestAddr = EmitMSVAListRef(E->getArg(0));
4075  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
4076 
4077  llvm::Type *BPP = Int8PtrPtrTy;
4078 
4079  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
4080  DestAddr.getAlignment());
4081  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
4082  SrcAddr.getAlignment());
4083 
4084  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
4085  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
4086  }
4087  }
4088 
4089  // If this is an alias for a lib function (e.g. __builtin_sin), emit
4090  // the call using the normal call path, but using the unmangled
4091  // version of the function name.
4092  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
4093  return emitLibraryCall(*this, FD, E,
4094  CGM.getBuiltinLibFunction(FD, BuiltinID));
4095 
4096  // If this is a predefined lib function (e.g. malloc), emit the call
4097  // using exactly the normal call path.
4098  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
4099  return emitLibraryCall(*this, FD, E,
4100  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
4101 
4102  // Check that a call to a target specific builtin has the correct target
4103  // features.
4104  // This is down here to avoid non-target specific builtins, however, if
4105  // generic builtins start to require generic target features then we
4106  // can move this up to the beginning of the function.
4107  checkTargetFeatures(E, FD);
4108 
4109  if (unsigned VectorWidth = getContext().BuiltinInfo.getRequiredVectorWidth(BuiltinID))
4110  LargestVectorWidth = std::max(LargestVectorWidth, VectorWidth);
4111 
4112  // See if we have a target specific intrinsic.
4113  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
4114  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
4115  StringRef Prefix =
4116  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
4117  if (!Prefix.empty()) {
4118  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
4119  // NOTE we don't need to perform a compatibility flag check here since the
4120  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
4121  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
4122  if (IntrinsicID == Intrinsic::not_intrinsic)
4123  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
4124  }
4125 
4126  if (IntrinsicID != Intrinsic::not_intrinsic) {
4128 
4129  // Find out if any arguments are required to be integer constant
4130  // expressions.
4131  unsigned ICEArguments = 0;
4133  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4134  assert(Error == ASTContext::GE_None && "Should not codegen an error");
4135 
4136  Function *F = CGM.getIntrinsic(IntrinsicID);
4137  llvm::FunctionType *FTy = F->getFunctionType();
4138 
4139  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
4140  Value *ArgValue;
4141  // If this is a normal argument, just emit it as a scalar.
4142  if ((ICEArguments & (1 << i)) == 0) {
4143  ArgValue = EmitScalarExpr(E->getArg(i));
4144  } else {
4145  // If this is required to be a constant, constant fold it so that we
4146  // know that the generated intrinsic gets a ConstantInt.
4147  llvm::APSInt Result;
4148  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
4149  assert(IsConst && "Constant arg isn't actually constant?");
4150  (void)IsConst;
4151  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
4152  }
4153 
4154  // If the intrinsic arg type is different from the builtin arg type
4155  // we need to do a bit cast.
4156  llvm::Type *PTy = FTy->getParamType(i);
4157  if (PTy != ArgValue->getType()) {
4158  // XXX - vector of pointers?
4159  if (auto *PtrTy = dyn_cast<llvm::PointerType>(PTy)) {
4160  if (PtrTy->getAddressSpace() !=
4161  ArgValue->getType()->getPointerAddressSpace()) {
4162  ArgValue = Builder.CreateAddrSpaceCast(
4163  ArgValue,
4164  ArgValue->getType()->getPointerTo(PtrTy->getAddressSpace()));
4165  }
4166  }
4167 
4168  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
4169  "Must be able to losslessly bit cast to param");
4170  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
4171  }
4172 
4173  Args.push_back(ArgValue);
4174  }
4175 
4176  Value *V = Builder.CreateCall(F, Args);
4177  QualType BuiltinRetType = E->getType();
4178 
4179  llvm::Type *RetTy = VoidTy;
4180  if (!BuiltinRetType->isVoidType())
4181  RetTy = ConvertType(BuiltinRetType);
4182 
4183  if (RetTy != V->getType()) {
4184  // XXX - vector of pointers?
4185  if (auto *PtrTy = dyn_cast<llvm::PointerType>(RetTy)) {
4186  if (PtrTy->getAddressSpace() != V->getType()->getPointerAddressSpace()) {
4187  V = Builder.CreateAddrSpaceCast(
4188  V, V->getType()->getPointerTo(PtrTy->getAddressSpace()));
4189  }
4190  }
4191 
4192  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
4193  "Must be able to losslessly bit cast result type");
4194  V = Builder.CreateBitCast(V, RetTy);
4195  }
4196 
4197  return RValue::get(V);
4198  }
4199 
4200  // See if we have a target specific builtin that needs to be lowered.
4201  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
4202  return RValue::get(V);
4203 
4204  ErrorUnsupported(E, "builtin function");
4205 
4206  // Unknown builtin, for now just dump it out and return undef.
4207  return GetUndefRValue(E->getType());
4208 }
4209 
4211  unsigned BuiltinID, const CallExpr *E,
4212  llvm::Triple::ArchType Arch) {
4213  switch (Arch) {
4214  case llvm::Triple::arm:
4215  case llvm::Triple::armeb:
4216  case llvm::Triple::thumb:
4217  case llvm::Triple::thumbeb:
4218  return CGF->EmitARMBuiltinExpr(BuiltinID, E, Arch);
4219  case llvm::Triple::aarch64:
4220  case llvm::Triple::aarch64_be:
4221  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E, Arch);
4222  case llvm::Triple::x86:
4223  case llvm::Triple::x86_64:
4224  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
4225  case llvm::Triple::ppc:
4226  case llvm::Triple::ppc64:
4227  case llvm::Triple::ppc64le:
4228  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
4229  case llvm::Triple::r600:
4230  case llvm::Triple::amdgcn:
4231  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
4232  case llvm::Triple::systemz:
4233  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
4234  case llvm::Triple::nvptx:
4235  case llvm::Triple::nvptx64:
4236  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
4237  case llvm::Triple::wasm32:
4238  case llvm::Triple::wasm64:
4239  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
4240  case llvm::Triple::hexagon:
4241  return CGF->EmitHexagonBuiltinExpr(BuiltinID, E);
4242  default:
4243  return nullptr;
4244  }
4245 }
4246 
4248  const CallExpr *E) {
4249  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
4250  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
4252  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
4253  getContext().getAuxTargetInfo()->getTriple().getArch());
4254  }
4255 
4256  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
4257  getTarget().getTriple().getArch());
4258 }
4259 
4260 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
4261  NeonTypeFlags TypeFlags,
4262  bool HasLegalHalfType=true,
4263  bool V1Ty=false) {
4264  int IsQuad = TypeFlags.isQuad();
4265  switch (TypeFlags.getEltType()) {
4266  case NeonTypeFlags::Int8:
4267  case NeonTypeFlags::Poly8:
4268  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
4269  case NeonTypeFlags::Int16:
4270  case NeonTypeFlags::Poly16:
4271  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4273  if (HasLegalHalfType)
4274  return llvm::VectorType::get(CGF->HalfTy, V1Ty ? 1 : (4 << IsQuad));
4275  else
4276  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
4277  case NeonTypeFlags::Int32:
4278  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
4279  case NeonTypeFlags::Int64:
4280  case NeonTypeFlags::Poly64:
4281  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
4283  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
4284  // There is a lot of i128 and f128 API missing.
4285  // so we use v16i8 to represent poly128 and get pattern matched.
4286  return llvm::VectorType::get(CGF->Int8Ty, 16);
4288  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
4290  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
4291  }
4292  llvm_unreachable("Unknown vector element type!");
4293 }
4294 
4295 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
4296  NeonTypeFlags IntTypeFlags) {
4297  int IsQuad = IntTypeFlags.isQuad();
4298  switch (IntTypeFlags.getEltType()) {
4299  case NeonTypeFlags::Int16:
4300  return llvm::VectorType::get(CGF->HalfTy, (4 << IsQuad));
4301  case NeonTypeFlags::Int32:
4302  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
4303  case NeonTypeFlags::Int64:
4304  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
4305  default:
4306  llvm_unreachable("Type can't be converted to floating-point!");
4307  }
4308 }
4309 
4311  unsigned nElts = V->getType()->getVectorNumElements();
4312  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
4313  return Builder.CreateShuffleVector(V, V, SV, "lane");
4314 }
4315 
4317  const char *name,
4318  unsigned shift, bool rightshift) {
4319  unsigned j = 0;
4320  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
4321  ai != ae; ++ai, ++j)
4322  if (shift > 0 && shift == j)
4323  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
4324  else
4325  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
4326 
4327  return Builder.CreateCall(F, Ops, name);
4328 }
4329 
4331  bool neg) {
4332  int SV = cast<ConstantInt>(V)->getSExtValue();
4333  return ConstantInt::get(Ty, neg ? -SV : SV);
4334 }
4335 
4336 // Right-shift a vector by a constant.
4338  llvm::Type *Ty, bool usgn,
4339  const char *name) {
4340  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
4341 
4342  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
4343  int EltSize = VTy->getScalarSizeInBits();
4344 
4345  Vec = Builder.CreateBitCast(Vec, Ty);
4346 
4347  // lshr/ashr are undefined when the shift amount is equal to the vector
4348  // element size.
4349  if (ShiftAmt == EltSize) {
4350  if (usgn) {
4351  // Right-shifting an unsigned value by its size yields 0.
4352  return llvm::ConstantAggregateZero::get(VTy);
4353  } else {
4354  // Right-shifting a signed value by its size is equivalent
4355  // to a shift of size-1.
4356  --ShiftAmt;
4357  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
4358  }
4359  }
4360 
4361  Shift = EmitNeonShiftVector(Shift, Ty, false);
4362  if (usgn)
4363  return Builder.CreateLShr(Vec, Shift, name);
4364  else
4365  return Builder.CreateAShr(Vec, Shift, name);
4366 }
4367 
4368 enum {
4369  AddRetType = (1 << 0),
4370  Add1ArgType = (1 << 1),
4371  Add2ArgTypes = (1 << 2),
4372 
4373  VectorizeRetType = (1 << 3),
4374  VectorizeArgTypes = (1 << 4),
4375 
4376  InventFloatType = (1 << 5),
4377  UnsignedAlts = (1 << 6),
4378 
4379  Use64BitVectors = (1 << 7),
4380  Use128BitVectors = (1 << 8),
4381 
4388 };
4389 
4390 namespace {
4391 struct NeonIntrinsicInfo {
4392  const char *NameHint;
4393  unsigned BuiltinID;
4394  unsigned LLVMIntrinsic;
4395  unsigned AltLLVMIntrinsic;
4396  unsigned TypeModifier;
4397 
4398  bool operator<(unsigned RHSBuiltinID) const {
4399  return BuiltinID < RHSBuiltinID;
4400  }
4401  bool operator<(const NeonIntrinsicInfo &TE) const {
4402  return BuiltinID < TE.BuiltinID;
4403  }
4404 };
4405 } // end anonymous namespace
4406 
4407 #define NEONMAP0(NameBase) \
4408  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
4409 
4410 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
4411  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4412  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
4413 
4414 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
4415  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
4416  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
4417  TypeModifier }
4418 
4419 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
4420  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4421  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
4422  NEONMAP1(vabs_v, arm_neon_vabs, 0),
4423  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
4424  NEONMAP0(vaddhn_v),
4425  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
4426  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
4427  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
4428  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
4429  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
4430  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
4431  NEONMAP1(vcage_v, arm_neon_vacge, 0),
4432  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
4433  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
4434  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
4435  NEONMAP1(vcale_v, arm_neon_vacge, 0),
4436  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
4437  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
4438  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
4439  NEONMAP0(vceqz_v),
4440  NEONMAP0(vceqzq_v),
4441  NEONMAP0(vcgez_v),
4442  NEONMAP0(vcgezq_v),
4443  NEONMAP0(vcgtz_v),
4444  NEONMAP0(vcgtzq_v),
4445  NEONMAP0(vclez_v),
4446  NEONMAP0(vclezq_v),
4447  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
4448  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
4449  NEONMAP0(vcltz_v),
4450  NEONMAP0(vcltzq_v),
4451  NEONMAP1(vclz_v, ctlz, Add1ArgType),
4452  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4453  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4454  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4455  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
4456  NEONMAP0(vcvt_f16_v),
4457  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
4458  NEONMAP0(vcvt_f32_v),
4459  NEONMAP2(vcvt_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4460  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4461  NEONMAP1(vcvt_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4462  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4463  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4464  NEONMAP1(vcvt_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4465  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4466  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4467  NEONMAP0(vcvt_s16_v),
4468  NEONMAP0(vcvt_s32_v),
4469  NEONMAP0(vcvt_s64_v),
4470  NEONMAP0(vcvt_u16_v),
4471  NEONMAP0(vcvt_u32_v),
4472  NEONMAP0(vcvt_u64_v),
4473  NEONMAP1(vcvta_s16_v, arm_neon_vcvtas, 0),
4474  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
4475  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
4476  NEONMAP1(vcvta_u16_v, arm_neon_vcvtau, 0),
4477  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
4478  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
4479  NEONMAP1(vcvtaq_s16_v, arm_neon_vcvtas, 0),
4480  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
4481  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
4482  NEONMAP1(vcvtaq_u16_v, arm_neon_vcvtau, 0),
4483  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
4484  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
4485  NEONMAP1(vcvtm_s16_v, arm_neon_vcvtms, 0),
4486  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
4487  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
4488  NEONMAP1(vcvtm_u16_v, arm_neon_vcvtmu, 0),
4489  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
4490  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
4491  NEONMAP1(vcvtmq_s16_v, arm_neon_vcvtms, 0),
4492  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
4493  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
4494  NEONMAP1(vcvtmq_u16_v, arm_neon_vcvtmu, 0),
4495  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
4496  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
4497  NEONMAP1(vcvtn_s16_v, arm_neon_vcvtns, 0),
4498  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
4499  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
4500  NEONMAP1(vcvtn_u16_v, arm_neon_vcvtnu, 0),
4501  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
4502  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
4503  NEONMAP1(vcvtnq_s16_v, arm_neon_vcvtns, 0),
4504  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
4505  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
4506  NEONMAP1(vcvtnq_u16_v, arm_neon_vcvtnu, 0),
4507  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
4508  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
4509  NEONMAP1(vcvtp_s16_v, arm_neon_vcvtps, 0),
4510  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
4511  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
4512  NEONMAP1(vcvtp_u16_v, arm_neon_vcvtpu, 0),
4513  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
4514  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
4515  NEONMAP1(vcvtpq_s16_v, arm_neon_vcvtps, 0),
4516  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
4517  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
4518  NEONMAP1(vcvtpq_u16_v, arm_neon_vcvtpu, 0),
4519  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
4520  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
4521  NEONMAP0(vcvtq_f16_v),
4522  NEONMAP0(vcvtq_f32_v),
4523  NEONMAP2(vcvtq_n_f16_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4524  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
4525  NEONMAP1(vcvtq_n_s16_v, arm_neon_vcvtfp2fxs, 0),
4526  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
4527  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
4528  NEONMAP1(vcvtq_n_u16_v, arm_neon_vcvtfp2fxu, 0),
4529  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
4530  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
4531  NEONMAP0(vcvtq_s16_v),
4532  NEONMAP0(vcvtq_s32_v),
4533  NEONMAP0(vcvtq_s64_v),
4534  NEONMAP0(vcvtq_u16_v),
4535  NEONMAP0(vcvtq_u32_v),
4536  NEONMAP0(vcvtq_u64_v),
4537  NEONMAP2(vdot_v, arm_neon_udot, arm_neon_sdot, 0),
4538  NEONMAP2(vdotq_v, arm_neon_udot, arm_neon_sdot, 0),
4539  NEONMAP0(vext_v),
4540  NEONMAP0(vextq_v),
4541  NEONMAP0(vfma_v),
4542  NEONMAP0(vfmaq_v),
4543  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4544  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
4545  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4546  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
4547  NEONMAP0(vld1_dup_v),
4548  NEONMAP1(vld1_v, arm_neon_vld1, 0),
4549  NEONMAP1(vld1_x2_v, arm_neon_vld1x2, 0),
4550  NEONMAP1(vld1_x3_v, arm_neon_vld1x3, 0),
4551  NEONMAP1(vld1_x4_v, arm_neon_vld1x4, 0),
4552  NEONMAP0(vld1q_dup_v),
4553  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
4554  NEONMAP1(vld1q_x2_v, arm_neon_vld1x2, 0),
4555  NEONMAP1(vld1q_x3_v, arm_neon_vld1x3, 0),
4556  NEONMAP1(vld1q_x4_v, arm_neon_vld1x4, 0),
4557  NEONMAP1(vld2_dup_v, arm_neon_vld2dup, 0),
4558  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
4559  NEONMAP1(vld2_v, arm_neon_vld2, 0),
4560  NEONMAP1(vld2q_dup_v, arm_neon_vld2dup, 0),
4561  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
4562  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
4563  NEONMAP1(vld3_dup_v, arm_neon_vld3dup, 0),
4564  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
4565  NEONMAP1(vld3_v, arm_neon_vld3, 0),
4566  NEONMAP1(vld3q_dup_v, arm_neon_vld3dup, 0),
4567  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
4568  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
4569  NEONMAP1(vld4_dup_v, arm_neon_vld4dup, 0),
4570  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
4571  NEONMAP1(vld4_v, arm_neon_vld4, 0),
4572  NEONMAP1(vld4q_dup_v, arm_neon_vld4dup, 0),
4573  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
4574  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
4575  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4576  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
4577  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
4578  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
4579  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4580  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
4581  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
4582  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
4583  NEONMAP0(vmovl_v),
4584  NEONMAP0(vmovn_v),
4585  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
4586  NEONMAP0(vmull_v),
4587  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
4588  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4589  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
4590  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
4591  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4592  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
4593  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
4594  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
4595  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
4596  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
4597  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
4598  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4599  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
4600  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
4601  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
4602  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
4603  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
4604  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
4605  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
4606  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
4607  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
4608  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
4609  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
4610  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
4611  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4612  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
4613  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4614  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4615  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
4616  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
4617  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
4618  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
4619  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4620  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
4621  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
4622  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4623  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
4624  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
4625  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
4626  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4627  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
4628  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
4629  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
4630  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
4631  NEONMAP0(vrndi_v),
4632  NEONMAP0(vrndiq_v),
4633  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
4634  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
4635  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
4636  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
4637  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
4638  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
4639  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
4640  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
4641  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
4642  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4643  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
4644  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4645  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
4646  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4647  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
4648  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
4649  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
4650  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
4651  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
4652  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
4653  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
4654  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
4655  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
4656  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
4657  NEONMAP0(vshl_n_v),
4658  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4659  NEONMAP0(vshll_n_v),
4660  NEONMAP0(vshlq_n_v),
4661  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
4662  NEONMAP0(vshr_n_v),
4663  NEONMAP0(vshrn_n_v),
4664  NEONMAP0(vshrq_n_v),
4665  NEONMAP1(vst1_v, arm_neon_vst1, 0),
4666  NEONMAP1(vst1_x2_v, arm_neon_vst1x2, 0),
4667  NEONMAP1(vst1_x3_v, arm_neon_vst1x3, 0),
4668  NEONMAP1(vst1_x4_v, arm_neon_vst1x4, 0),
4669  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
4670  NEONMAP1(vst1q_x2_v, arm_neon_vst1x2, 0),
4671  NEONMAP1(vst1q_x3_v, arm_neon_vst1x3, 0),
4672  NEONMAP1(vst1q_x4_v, arm_neon_vst1x4, 0),
4673  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
4674  NEONMAP1(vst2_v, arm_neon_vst2, 0),
4675  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
4676  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
4677  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
4678  NEONMAP1(vst3_v, arm_neon_vst3, 0),
4679  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
4680  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
4681  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
4682  NEONMAP1(vst4_v, arm_neon_vst4, 0),
4683  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
4684  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
4685  NEONMAP0(vsubhn_v),
4686  NEONMAP0(vtrn_v),
4687  NEONMAP0(vtrnq_v),
4688  NEONMAP0(vtst_v),
4689  NEONMAP0(vtstq_v),
4690  NEONMAP0(vuzp_v),
4691  NEONMAP0(vuzpq_v),
4692  NEONMAP0(vzip_v),
4693  NEONMAP0(vzipq_v)
4694 };
4695 
4696 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
4697  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
4698  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
4699  NEONMAP0(vaddhn_v),
4700  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
4701  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
4702  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
4703  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
4704  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
4705  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
4706  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
4707  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
4708  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
4709  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
4710  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
4711  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
4712  NEONMAP0(vceqz_v),
4713  NEONMAP0(vceqzq_v),
4714  NEONMAP0(vcgez_v),
4715  NEONMAP0(vcgezq_v),
4716  NEONMAP0(vcgtz_v),
4717  NEONMAP0(vcgtzq_v),
4718  NEONMAP0(vclez_v),
4719  NEONMAP0(vclezq_v),
4720  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
4721  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
4722  NEONMAP0(vcltz_v),
4723  NEONMAP0(vcltzq_v),
4724  NEONMAP1(vclz_v, ctlz, Add1ArgType),
4725  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
4726  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
4727  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
4728  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
4729  NEONMAP0(vcvt_f16_v),
4730  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
4731  NEONMAP0(vcvt_f32_v),
4732  NEONMAP2(vcvt_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4733  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4734  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4735  NEONMAP1(vcvt_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4736  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4737  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4738  NEONMAP1(vcvt_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4739  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4740  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4741  NEONMAP0(vcvtq_f16_v),
4742  NEONMAP0(vcvtq_f32_v),
4743  NEONMAP2(vcvtq_n_f16_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4744  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4745  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
4746  NEONMAP1(vcvtq_n_s16_v, aarch64_neon_vcvtfp2fxs, 0),
4747  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
4748  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
4749  NEONMAP1(vcvtq_n_u16_v, aarch64_neon_vcvtfp2fxu, 0),
4750  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
4751  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
4752  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
4753  NEONMAP2(vdot_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4754  NEONMAP2(vdotq_v, aarch64_neon_udot, aarch64_neon_sdot, 0),
4755  NEONMAP0(vext_v),
4756  NEONMAP0(vextq_v),
4757  NEONMAP0(vfma_v),
4758  NEONMAP0(vfmaq_v),
4759  NEONMAP1(vfmlal_high_v, aarch64_neon_fmlal2, 0),
4760  NEONMAP1(vfmlal_low_v, aarch64_neon_fmlal, 0),
4761  NEONMAP1(vfmlalq_high_v, aarch64_neon_fmlal2, 0),
4762  NEONMAP1(vfmlalq_low_v, aarch64_neon_fmlal, 0),
4763  NEONMAP1(vfmlsl_high_v, aarch64_neon_fmlsl2, 0),
4764  NEONMAP1(vfmlsl_low_v, aarch64_neon_fmlsl, 0),
4765  NEONMAP1(vfmlslq_high_v, aarch64_neon_fmlsl2, 0),
4766  NEONMAP1(vfmlslq_low_v, aarch64_neon_fmlsl, 0),
4767  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4768  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
4769  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4770  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
4771  NEONMAP1(vld1_x2_v, aarch64_neon_ld1x2, 0),
4772  NEONMAP1(vld1_x3_v, aarch64_neon_ld1x3, 0),
4773  NEONMAP1(vld1_x4_v, aarch64_neon_ld1x4, 0),
4774  NEONMAP1(vld1q_x2_v, aarch64_neon_ld1x2, 0),
4775  NEONMAP1(vld1q_x3_v, aarch64_neon_ld1x3, 0),
4776  NEONMAP1(vld1q_x4_v, aarch64_neon_ld1x4, 0),
4777  NEONMAP0(vmovl_v),
4778  NEONMAP0(vmovn_v),
4779  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
4780  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
4781  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
4782  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4783  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
4784  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
4785  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
4786  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
4787  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4788  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
4789  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
4790  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
4791  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
4792  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
4793  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
4794  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
4795  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
4796  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
4797  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
4798  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
4799  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
4800  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4801  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
4802  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
4803  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4804  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
4805  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
4806  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
4807  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
4808  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4809  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
4810  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
4811  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4812  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
4813  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
4814  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
4815  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4816  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
4817  NEONMAP0(vrndi_v),
4818  NEONMAP0(vrndiq_v),
4819  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4820  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
4821  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4822  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
4823  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4824  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
4825  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
4826  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
4827  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
4828  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
4829  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
4830  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
4831  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
4832  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
4833  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
4834  NEONMAP0(vshl_n_v),
4835  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4836  NEONMAP0(vshll_n_v),
4837  NEONMAP0(vshlq_n_v),
4838  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
4839  NEONMAP0(vshr_n_v),
4840  NEONMAP0(vshrn_n_v),
4841  NEONMAP0(vshrq_n_v),
4842  NEONMAP1(vst1_x2_v, aarch64_neon_st1x2, 0),
4843  NEONMAP1(vst1_x3_v, aarch64_neon_st1x3, 0),
4844  NEONMAP1(vst1_x4_v, aarch64_neon_st1x4, 0),
4845  NEONMAP1(vst1q_x2_v, aarch64_neon_st1x2, 0),
4846  NEONMAP1(vst1q_x3_v, aarch64_neon_st1x3, 0),
4847  NEONMAP1(vst1q_x4_v, aarch64_neon_st1x4, 0),
4848  NEONMAP0(vsubhn_v),
4849  NEONMAP0(vtst_v),
4850  NEONMAP0(vtstq_v),
4851 };
4852 
4853 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
4854  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
4855  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
4856  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
4857  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4858  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4859  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
4860  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
4861  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4862  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4863  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4864  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
4865  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
4866  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
4867  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
4868  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4869  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4870  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4871  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4872  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4873  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4874  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
4875  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
4876  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
4877  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
4878  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4879  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4880  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
4881  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
4882  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4883  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4884  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4885  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4886  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4887  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4888  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
4889  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
4890  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4891  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4892  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
4893  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
4894  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4895  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4896  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
4897  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
4898  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
4899  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
4900  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
4901  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
4902  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
4903  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4904  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4905  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4906  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4907  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4908  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4909  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4910  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4911  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
4912  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
4913  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4914  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4915  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4916  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4917  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4918  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4919  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4920  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4921  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
4922  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
4923  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
4924  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
4925  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
4926  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4927  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
4928  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4929  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
4930  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4931  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
4932  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4933  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
4934  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
4935  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
4936  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4937  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
4938  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
4939  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
4940  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4941  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4942  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
4943  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
4944  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
4945  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
4946  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
4947  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
4948  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
4949  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
4950  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
4951  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
4952  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
4953  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
4954  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4955  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4956  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
4957  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
4958  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
4959  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4960  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
4961  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4962  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
4963  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
4964  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
4965  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
4966  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
4967  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4968  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4969  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
4970  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
4971  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
4972  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
4973  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
4974  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
4975  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
4976  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
4977  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4978  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4979  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
4980  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
4981  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
4982  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4983  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
4984  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4985  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4986  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4987  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4988  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
4989  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
4990  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4991  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4992  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
4993  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
4994  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
4995  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
4996  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
4997  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
4998  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
4999  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
5000  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
5001  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
5002  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
5003  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
5004  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
5005  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
5006  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
5007  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
5008  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
5009  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
5010  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
5011  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
5012  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
5013  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
5014  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
5015  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
5016  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
5017  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
5018  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
5019  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
5020  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
5021  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
5022  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
5023  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
5024  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
5025  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
5026  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
5027  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
5028  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
5029  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
5030  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
5031  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
5032  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
5033  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
5034  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
5035  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
5036  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
5037  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
5038  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
5039  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
5040  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
5041  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
5042  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
5043  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
5044  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
5045  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
5046  // FP16 scalar intrinisics go here.
5047  NEONMAP1(vabdh_f16, aarch64_sisd_fabd, Add1ArgType),
5048  NEONMAP1(vcvtah_s32_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
5049  NEONMAP1(vcvtah_s64_f16, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
5050  NEONMAP1(vcvtah_u32_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
5051  NEONMAP1(vcvtah_u64_f16, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
5052  NEONMAP1(vcvth_n_f16_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
5053  NEONMAP1(vcvth_n_f16_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
5054  NEONMAP1(vcvth_n_f16_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
5055  NEONMAP1(vcvth_n_f16_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
5056  NEONMAP1(vcvth_n_s32_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
5057  NEONMAP1(vcvth_n_s64_f16, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
5058  NEONMAP1(vcvth_n_u32_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
5059  NEONMAP1(vcvth_n_u64_f16, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
5060  NEONMAP1(vcvtmh_s32_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
5061  NEONMAP1(vcvtmh_s64_f16, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
5062  NEONMAP1(vcvtmh_u32_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
5063  NEONMAP1(vcvtmh_u64_f16, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
5064  NEONMAP1(vcvtnh_s32_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
5065  NEONMAP1(vcvtnh_s64_f16, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
5066  NEONMAP1(vcvtnh_u32_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
5067  NEONMAP1(vcvtnh_u64_f16, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
5068  NEONMAP1(vcvtph_s32_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
5069  NEONMAP1(vcvtph_s64_f16, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
5070  NEONMAP1(vcvtph_u32_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
5071  NEONMAP1(vcvtph_u64_f16, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
5072  NEONMAP1(vmulxh_f16, aarch64_neon_fmulx, Add1ArgType),
5073  NEONMAP1(vrecpeh_f16, aarch64_neon_frecpe, Add1ArgType),
5074  NEONMAP1(vrecpxh_f16, aarch64_neon_frecpx, Add1ArgType),
5075  NEONMAP1(vrsqrteh_f16, aarch64_neon_frsqrte, Add1ArgType),
5076  NEONMAP1(vrsqrtsh_f16, aarch64_neon_frsqrts, Add1ArgType),
5077 };
5078 
5079 #undef NEONMAP0
5080 #undef NEONMAP1
5081 #undef NEONMAP2
5082 
5084 
5087 
5088 
5089 static const NeonIntrinsicInfo *
5091  unsigned BuiltinID, bool &MapProvenSorted) {
5092 
5093 #ifndef NDEBUG
5094  if (!MapProvenSorted) {
5095  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
5096  MapProvenSorted = true;
5097  }
5098 #endif
5099 
5100  const NeonIntrinsicInfo *Builtin = llvm::lower_bound(IntrinsicMap, BuiltinID);
5101 
5102  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
5103  return Builtin;
5104 
5105  return nullptr;
5106 }
5107 
5108 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
5109  unsigned Modifier,
5110  llvm::Type *ArgType,
5111  const CallExpr *E) {
5112  int VectorSize = 0;
5113  if (Modifier & Use64BitVectors)
5114  VectorSize = 64;
5115  else if (Modifier & Use128BitVectors)
5116  VectorSize = 128;
5117 
5118  // Return type.
5120  if (Modifier & AddRetType) {
5121  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
5122  if (Modifier & VectorizeRetType)
5123  Ty = llvm::VectorType::get(
5124  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
5125 
5126  Tys.push_back(Ty);
5127  }
5128 
5129  // Arguments.
5130  if (Modifier & VectorizeArgTypes) {
5131  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
5132  ArgType = llvm::VectorType::get(ArgType, Elts);
5133  }
5134 
5135  if (Modifier & (Add1ArgType | Add2ArgTypes))
5136  Tys.push_back(ArgType);
5137 
5138  if (Modifier & Add2ArgTypes)
5139  Tys.push_back(ArgType);
5140 
5141  if (Modifier & InventFloatType)
5142  Tys.push_back(FloatTy);
5143 
5144  return CGM.getIntrinsic(IntrinsicID, Tys);
5145 }
5146 
5148  const NeonIntrinsicInfo &SISDInfo,
5150  const CallExpr *E) {
5151  unsigned BuiltinID = SISDInfo.BuiltinID;
5152  unsigned int Int = SISDInfo.LLVMIntrinsic;
5153  unsigned Modifier = SISDInfo.TypeModifier;
5154  const char *s = SISDInfo.NameHint;
5155 
5156  switch (BuiltinID) {
5157  case NEON::BI__builtin_neon_vcled_s64:
5158  case NEON::BI__builtin_neon_vcled_u64:
5159  case NEON::BI__builtin_neon_vcles_f32:
5160  case NEON::BI__builtin_neon_vcled_f64:
5161  case NEON::BI__builtin_neon_vcltd_s64:
5162  case NEON::BI__builtin_neon_vcltd_u64:
5163  case NEON::BI__builtin_neon_vclts_f32:
5164  case NEON::BI__builtin_neon_vcltd_f64:
5165  case NEON::BI__builtin_neon_vcales_f32:
5166  case NEON::BI__builtin_neon_vcaled_f64:
5167  case NEON::BI__builtin_neon_vcalts_f32:
5168  case NEON::BI__builtin_neon_vcaltd_f64:
5169  // Only one direction of comparisons actually exist, cmle is actually a cmge
5170  // with swapped operands. The table gives us the right intrinsic but we
5171  // still need to do the swap.
5172  std::swap(Ops[0], Ops[1]);
5173  break;
5174  }
5175 
5176  assert(Int && "Generic code assumes a valid intrinsic");
5177 
5178  // Determine the type(s) of this overloaded AArch64 intrinsic.
5179  const Expr *Arg = E->getArg(0);
5180  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
5181  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
5182 
5183  int j = 0;
5184  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
5185  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
5186  ai != ae; ++ai, ++j) {
5187  llvm::Type *ArgTy = ai->getType();
5188  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
5189  ArgTy->getPrimitiveSizeInBits())
5190  continue;
5191 
5192  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
5193  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
5194  // it before inserting.
5195  Ops[j] =
5196  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
5197  Ops[j] =
5198  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
5199  }
5200 
5201  Value *Result = CGF.EmitNeonCall(F, Ops, s);
5202  llvm::Type *ResultType = CGF.ConvertType(E->getType());
5203  if (ResultType->getPrimitiveSizeInBits() <
5204  Result->getType()->getPrimitiveSizeInBits())
5205  return CGF.Builder.CreateExtractElement(Result, C0);
5206 
5207  return CGF.Builder.CreateBitCast(Result, ResultType, s);
5208 }
5209 
5211  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
5212  const char *NameHint, unsigned Modifier, const CallExpr *E,
5213  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1,
5214  llvm::Triple::ArchType Arch) {
5215  // Get the last argument, which specifies the vector type.
5216  llvm::APSInt NeonTypeConst;
5217  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5218  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
5219  return nullptr;
5220 
5221  // Determine the type of this overloaded NEON intrinsic.
5222  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
5223  bool Usgn = Type.isUnsigned();
5224  bool Quad = Type.isQuad();
5225  const bool HasLegalHalfType = getTarget().hasLegalHalfType();
5226 
5227  llvm::VectorType *VTy = GetNeonType(this, Type, HasLegalHalfType);
5228  llvm::Type *Ty = VTy;
5229  if (!Ty)
5230  return nullptr;
5231 
5232  auto getAlignmentValue32 = [&](Address addr) -> Value* {
5233  return Builder.getInt32(addr.getAlignment().getQuantity());
5234  };
5235 
5236  unsigned Int = LLVMIntrinsic;
5237  if ((Modifier & UnsignedAlts) && !Usgn)
5238  Int = AltLLVMIntrinsic;
5239 
5240  switch (BuiltinID) {
5241  default: break;
5242  case NEON::BI__builtin_neon_vpadd_v:
5243  case NEON::BI__builtin_neon_vpaddq_v:
5244  // We don't allow fp/int overloading of intrinsics.
5245  if (VTy->getElementType()->isFloatingPointTy() &&
5246  Int == Intrinsic::aarch64_neon_addp)
5247  Int = Intrinsic::aarch64_neon_faddp;
5248  break;
5249  case NEON::BI__builtin_neon_vabs_v:
5250  case NEON::BI__builtin_neon_vabsq_v:
5251  if (VTy->getElementType()->isFloatingPointTy())
5252  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
5253  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
5254  case NEON::BI__builtin_neon_vaddhn_v: {
5255  llvm::VectorType *SrcTy =
5256  llvm::VectorType::getExtendedElementVectorType(VTy);
5257 
5258  // %sum = add <4 x i32> %lhs, %rhs
5259  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5260  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5261  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
5262 
5263  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5264  Constant *ShiftAmt =
5265  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5266  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
5267 
5268  // %res = trunc <4 x i32> %high to <4 x i16>
5269  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
5270  }
5271  case NEON::BI__builtin_neon_vcale_v:
5272  case NEON::BI__builtin_neon_vcaleq_v:
5273  case NEON::BI__builtin_neon_vcalt_v:
5274  case NEON::BI__builtin_neon_vcaltq_v:
5275  std::swap(Ops[0], Ops[1]);
5276  LLVM_FALLTHROUGH;
5277  case NEON::BI__builtin_neon_vcage_v:
5278  case NEON::BI__builtin_neon_vcageq_v:
5279  case NEON::BI__builtin_neon_vcagt_v:
5280  case NEON::BI__builtin_neon_vcagtq_v: {
5281  llvm::Type *Ty;
5282  switch (VTy->getScalarSizeInBits()) {
5283  default: llvm_unreachable("unexpected type");
5284  case 32:
5285  Ty = FloatTy;
5286  break;
5287  case 64:
5288  Ty = DoubleTy;
5289  break;
5290  case 16:
5291  Ty = HalfTy;
5292  break;
5293  }
5294  llvm::Type *VecFlt = llvm::VectorType::get(Ty, VTy->getNumElements());
5295  llvm::Type *Tys[] = { VTy, VecFlt };
5296  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5297  return EmitNeonCall(F, Ops, NameHint);
5298  }
5299  case NEON::BI__builtin_neon_vceqz_v:
5300  case NEON::BI__builtin_neon_vceqzq_v:
5301  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
5302  ICmpInst::ICMP_EQ, "vceqz");
5303  case NEON::BI__builtin_neon_vcgez_v:
5304  case NEON::BI__builtin_neon_vcgezq_v:
5305  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
5306  ICmpInst::ICMP_SGE, "vcgez");
5307  case NEON::BI__builtin_neon_vclez_v:
5308  case NEON::BI__builtin_neon_vclezq_v:
5309  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
5310  ICmpInst::ICMP_SLE, "vclez");
5311  case NEON::BI__builtin_neon_vcgtz_v:
5312  case NEON::BI__builtin_neon_vcgtzq_v:
5313  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
5314  ICmpInst::ICMP_SGT, "vcgtz");
5315  case NEON::BI__builtin_neon_vcltz_v:
5316  case NEON::BI__builtin_neon_vcltzq_v:
5317  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
5318  ICmpInst::ICMP_SLT, "vcltz");
5319  case NEON::BI__builtin_neon_vclz_v:
5320  case NEON::BI__builtin_neon_vclzq_v:
5321  // We generate target-independent intrinsic, which needs a second argument
5322  // for whether or not clz of zero is undefined; on ARM it isn't.
5323  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
5324  break;
5325  case NEON::BI__builtin_neon_vcvt_f32_v:
5326  case NEON::BI__builtin_neon_vcvtq_f32_v:
5327  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5328  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad),
5329  HasLegalHalfType);
5330  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5331  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5332  case NEON::BI__builtin_neon_vcvt_f16_v:
5333  case NEON::BI__builtin_neon_vcvtq_f16_v:
5334  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5335  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float16, false, Quad),
5336  HasLegalHalfType);
5337  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
5338  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
5339  case NEON::BI__builtin_neon_vcvt_n_f16_v:
5340  case NEON::BI__builtin_neon_vcvt_n_f32_v:
5341  case NEON::BI__builtin_neon_vcvt_n_f64_v:
5342  case NEON::BI__builtin_neon_vcvtq_n_f16_v:
5343  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
5344  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
5345  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
5346  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5347  Function *F = CGM.getIntrinsic(Int, Tys);
5348  return EmitNeonCall(F, Ops, "vcvt_n");
5349  }
5350  case NEON::BI__builtin_neon_vcvt_n_s16_v:
5351  case NEON::BI__builtin_neon_vcvt_n_s32_v:
5352  case NEON::BI__builtin_neon_vcvt_n_u16_v:
5353  case NEON::BI__builtin_neon_vcvt_n_u32_v:
5354  case NEON::BI__builtin_neon_vcvt_n_s64_v:
5355  case NEON::BI__builtin_neon_vcvt_n_u64_v:
5356  case NEON::BI__builtin_neon_vcvtq_n_s16_v:
5357  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
5358  case NEON::BI__builtin_neon_vcvtq_n_u16_v:
5359  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
5360  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
5361  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
5362  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5363  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5364  return EmitNeonCall(F, Ops, "vcvt_n");
5365  }
5366  case NEON::BI__builtin_neon_vcvt_s32_v:
5367  case NEON::BI__builtin_neon_vcvt_u32_v:
5368  case NEON::BI__builtin_neon_vcvt_s64_v:
5369  case NEON::BI__builtin_neon_vcvt_u64_v:
5370  case NEON::BI__builtin_neon_vcvt_s16_v:
5371  case NEON::BI__builtin_neon_vcvt_u16_v:
5372  case NEON::BI__builtin_neon_vcvtq_s32_v:
5373  case NEON::BI__builtin_neon_vcvtq_u32_v:
5374  case NEON::BI__builtin_neon_vcvtq_s64_v:
5375  case NEON::BI__builtin_neon_vcvtq_u64_v:
5376  case NEON::BI__builtin_neon_vcvtq_s16_v:
5377  case NEON::BI__builtin_neon_vcvtq_u16_v: {
5378  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
5379  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
5380  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
5381  }
5382  case NEON::BI__builtin_neon_vcvta_s16_v:
5383  case NEON::BI__builtin_neon_vcvta_s32_v:
5384  case NEON::BI__builtin_neon_vcvta_s64_v:
5385  case NEON::BI__builtin_neon_vcvta_u16_v:
5386  case NEON::BI__builtin_neon_vcvta_u32_v:
5387  case NEON::BI__builtin_neon_vcvta_u64_v:
5388  case NEON::BI__builtin_neon_vcvtaq_s16_v:
5389  case NEON::BI__builtin_neon_vcvtaq_s32_v:
5390  case NEON::BI__builtin_neon_vcvtaq_s64_v:
5391  case NEON::BI__builtin_neon_vcvtaq_u16_v:
5392  case NEON::BI__builtin_neon_vcvtaq_u32_v:
5393  case NEON::BI__builtin_neon_vcvtaq_u64_v:
5394  case NEON::BI__builtin_neon_vcvtn_s16_v:
5395  case NEON::BI__builtin_neon_vcvtn_s32_v:
5396  case NEON::BI__builtin_neon_vcvtn_s64_v:
5397  case NEON::BI__builtin_neon_vcvtn_u16_v:
5398  case NEON::BI__builtin_neon_vcvtn_u32_v:
5399  case NEON::BI__builtin_neon_vcvtn_u64_v:
5400  case NEON::BI__builtin_neon_vcvtnq_s16_v:
5401  case NEON::BI__builtin_neon_vcvtnq_s32_v:
5402  case NEON::BI__builtin_neon_vcvtnq_s64_v:
5403  case NEON::BI__builtin_neon_vcvtnq_u16_v:
5404  case NEON::BI__builtin_neon_vcvtnq_u32_v:
5405  case NEON::BI__builtin_neon_vcvtnq_u64_v:
5406  case NEON::BI__builtin_neon_vcvtp_s16_v:
5407  case NEON::BI__builtin_neon_vcvtp_s32_v:
5408  case NEON::BI__builtin_neon_vcvtp_s64_v:
5409  case NEON::BI__builtin_neon_vcvtp_u16_v:
5410  case NEON::BI__builtin_neon_vcvtp_u32_v:
5411  case NEON::BI__builtin_neon_vcvtp_u64_v:
5412  case NEON::BI__builtin_neon_vcvtpq_s16_v:
5413  case NEON::BI__builtin_neon_vcvtpq_s32_v:
5414  case NEON::BI__builtin_neon_vcvtpq_s64_v:
5415  case NEON::BI__builtin_neon_vcvtpq_u16_v:
5416  case NEON::BI__builtin_neon_vcvtpq_u32_v:
5417  case NEON::BI__builtin_neon_vcvtpq_u64_v:
5418  case NEON::BI__builtin_neon_vcvtm_s16_v:
5419  case NEON::BI__builtin_neon_vcvtm_s32_v:
5420  case NEON::BI__builtin_neon_vcvtm_s64_v:
5421  case NEON::BI__builtin_neon_vcvtm_u16_v:
5422  case NEON::BI__builtin_neon_vcvtm_u32_v:
5423  case NEON::BI__builtin_neon_vcvtm_u64_v:
5424  case NEON::BI__builtin_neon_vcvtmq_s16_v:
5425  case NEON::BI__builtin_neon_vcvtmq_s32_v:
5426  case NEON::BI__builtin_neon_vcvtmq_s64_v:
5427  case NEON::BI__builtin_neon_vcvtmq_u16_v:
5428  case NEON::BI__builtin_neon_vcvtmq_u32_v:
5429  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
5430  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
5431  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
5432  }
5433  case NEON::BI__builtin_neon_vext_v:
5434  case NEON::BI__builtin_neon_vextq_v: {
5435  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
5436  SmallVector<uint32_t, 16> Indices;
5437  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5438  Indices.push_back(i+CV);
5439 
5440  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5441  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5442  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
5443  }
5444  case NEON::BI__builtin_neon_vfma_v:
5445  case NEON::BI__builtin_neon_vfmaq_v: {
5446  Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
5447  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5448  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5449  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5450 
5451  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
5452  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
5453  }
5454  case NEON::BI__builtin_neon_vld1_v:
5455  case NEON::BI__builtin_neon_vld1q_v: {
5456  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5457  Ops.push_back(getAlignmentValue32(PtrOp0));
5458  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
5459  }
5460  case NEON::BI__builtin_neon_vld1_x2_v:
5461  case NEON::BI__builtin_neon_vld1q_x2_v:
5462  case NEON::BI__builtin_neon_vld1_x3_v:
5463  case NEON::BI__builtin_neon_vld1q_x3_v:
5464  case NEON::BI__builtin_neon_vld1_x4_v:
5465  case NEON::BI__builtin_neon_vld1q_x4_v: {
5466  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5467  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
5468  llvm::Type *Tys[2] = { VTy, PTy };
5469  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5470  Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
5471  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5472  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5473  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5474  }
5475  case NEON::BI__builtin_neon_vld2_v:
5476  case NEON::BI__builtin_neon_vld2q_v:
5477  case NEON::BI__builtin_neon_vld3_v:
5478  case NEON::BI__builtin_neon_vld3q_v:
5479  case NEON::BI__builtin_neon_vld4_v:
5480  case NEON::BI__builtin_neon_vld4q_v:
5481  case NEON::BI__builtin_neon_vld2_dup_v:
5482  case NEON::BI__builtin_neon_vld2q_dup_v:
5483  case NEON::BI__builtin_neon_vld3_dup_v:
5484  case NEON::BI__builtin_neon_vld3q_dup_v:
5485  case NEON::BI__builtin_neon_vld4_dup_v:
5486  case NEON::BI__builtin_neon_vld4q_dup_v: {
5487  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5488  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5489  Value *Align = getAlignmentValue32(PtrOp1);
5490  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
5491  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5492  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5493  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5494  }
5495  case NEON::BI__builtin_neon_vld1_dup_v:
5496  case NEON::BI__builtin_neon_vld1q_dup_v: {
5497  Value *V = UndefValue::get(Ty);
5498  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
5499  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
5500  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
5501  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5502  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
5503  return EmitNeonSplat(Ops[0], CI);
5504  }
5505  case NEON::BI__builtin_neon_vld2_lane_v:
5506  case NEON::BI__builtin_neon_vld2q_lane_v:
5507  case NEON::BI__builtin_neon_vld3_lane_v:
5508  case NEON::BI__builtin_neon_vld3q_lane_v:
5509  case NEON::BI__builtin_neon_vld4_lane_v:
5510  case NEON::BI__builtin_neon_vld4q_lane_v: {
5511  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5512  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
5513  for (unsigned I = 2; I < Ops.size() - 1; ++I)
5514  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
5515  Ops.push_back(getAlignmentValue32(PtrOp1));
5516  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
5517  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5518  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5519  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5520  }
5521  case NEON::BI__builtin_neon_vmovl_v: {
5522  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
5523  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
5524  if (Usgn)
5525  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
5526  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
5527  }
5528  case NEON::BI__builtin_neon_vmovn_v: {
5529  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5530  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
5531  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
5532  }
5533  case NEON::BI__builtin_neon_vmull_v:
5534  // FIXME: the integer vmull operations could be emitted in terms of pure
5535  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
5536  // hoisting the exts outside loops. Until global ISel comes along that can
5537  // see through such movement this leads to bad CodeGen. So we need an
5538  // intrinsic for now.
5539  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
5540  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
5541  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
5542  case NEON::BI__builtin_neon_vpadal_v:
5543  case NEON::BI__builtin_neon_vpadalq_v: {
5544  // The source operand type has twice as many elements of half the size.
5545  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5546  llvm::Type *EltTy =
5547  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5548  llvm::Type *NarrowTy =
5549  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5550  llvm::Type *Tys[2] = { Ty, NarrowTy };
5551  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
5552  }
5553  case NEON::BI__builtin_neon_vpaddl_v:
5554  case NEON::BI__builtin_neon_vpaddlq_v: {
5555  // The source operand type has twice as many elements of half the size.
5556  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
5557  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
5558  llvm::Type *NarrowTy =
5559  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
5560  llvm::Type *Tys[2] = { Ty, NarrowTy };
5561  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
5562  }
5563  case NEON::BI__builtin_neon_vqdmlal_v:
5564  case NEON::BI__builtin_neon_vqdmlsl_v: {
5565  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
5566  Ops[1] =
5567  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
5568  Ops.resize(2);
5569  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
5570  }
5571  case NEON::BI__builtin_neon_vqshl_n_v:
5572  case NEON::BI__builtin_neon_vqshlq_n_v:
5573  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
5574  1, false);
5575  case NEON::BI__builtin_neon_vqshlu_n_v:
5576  case NEON::BI__builtin_neon_vqshluq_n_v:
5577  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
5578  1, false);
5579  case NEON::BI__builtin_neon_vrecpe_v:
5580  case NEON::BI__builtin_neon_vrecpeq_v:
5581  case NEON::BI__builtin_neon_vrsqrte_v:
5582  case NEON::BI__builtin_neon_vrsqrteq_v:
5583  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
5584  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5585  case NEON::BI__builtin_neon_vrndi_v:
5586  case NEON::BI__builtin_neon_vrndiq_v:
5587  Int = Intrinsic::nearbyint;
5588  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
5589  case NEON::BI__builtin_neon_vrshr_n_v:
5590  case NEON::BI__builtin_neon_vrshrq_n_v:
5591  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
5592  1, true);
5593  case NEON::BI__builtin_neon_vshl_n_v:
5594  case NEON::BI__builtin_neon_vshlq_n_v:
5595  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
5596  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
5597  "vshl_n");
5598  case NEON::BI__builtin_neon_vshll_n_v: {
5599  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
5600  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5601  if (Usgn)
5602  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
5603  else
5604  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
5605  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
5606  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
5607  }
5608  case NEON::BI__builtin_neon_vshrn_n_v: {
5609  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
5610  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5611  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
5612  if (Usgn)
5613  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
5614  else
5615  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
5616  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
5617  }
5618  case NEON::BI__builtin_neon_vshr_n_v:
5619  case NEON::BI__builtin_neon_vshrq_n_v:
5620  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
5621  case NEON::BI__builtin_neon_vst1_v:
5622  case NEON::BI__builtin_neon_vst1q_v:
5623  case NEON::BI__builtin_neon_vst2_v:
5624  case NEON::BI__builtin_neon_vst2q_v:
5625  case NEON::BI__builtin_neon_vst3_v:
5626  case NEON::BI__builtin_neon_vst3q_v:
5627  case NEON::BI__builtin_neon_vst4_v:
5628  case NEON::BI__builtin_neon_vst4q_v:
5629  case NEON::BI__builtin_neon_vst2_lane_v:
5630  case NEON::BI__builtin_neon_vst2q_lane_v:
5631  case NEON::BI__builtin_neon_vst3_lane_v:
5632  case NEON::BI__builtin_neon_vst3q_lane_v:
5633  case NEON::BI__builtin_neon_vst4_lane_v:
5634  case NEON::BI__builtin_neon_vst4q_lane_v: {
5635  llvm::Type *Tys[] = {Int8PtrTy, Ty};
5636  Ops.push_back(getAlignmentValue32(PtrOp0));
5637  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
5638  }
5639  case NEON::BI__builtin_neon_vst1_x2_v:
5640  case NEON::BI__builtin_neon_vst1q_x2_v:
5641  case NEON::BI__builtin_neon_vst1_x3_v:
5642  case NEON::BI__builtin_neon_vst1q_x3_v:
5643  case NEON::BI__builtin_neon_vst1_x4_v:
5644  case NEON::BI__builtin_neon_vst1q_x4_v: {
5645  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
5646  // TODO: Currently in AArch32 mode the pointer operand comes first, whereas
5647  // in AArch64 it comes last. We may want to stick to one or another.
5648  if (Arch == llvm::Triple::aarch64 || Arch == llvm::Triple::aarch64_be) {
5649  llvm::Type *Tys[2] = { VTy, PTy };
5650  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
5651  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5652  }
5653  llvm::Type *Tys[2] = { PTy, VTy };
5654  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "");
5655  }
5656  case NEON::BI__builtin_neon_vsubhn_v: {
5657  llvm::VectorType *SrcTy =
5658  llvm::VectorType::getExtendedElementVectorType(VTy);
5659 
5660  // %sum = add <4 x i32> %lhs, %rhs
5661  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
5662  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
5663  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
5664 
5665  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
5666  Constant *ShiftAmt =
5667  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
5668  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
5669 
5670  // %res = trunc <4 x i32> %high to <4 x i16>
5671  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
5672  }
5673  case NEON::BI__builtin_neon_vtrn_v:
5674  case NEON::BI__builtin_neon_vtrnq_v: {
5675  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5676  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5677  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5678  Value *SV = nullptr;
5679 
5680  for (unsigned vi = 0; vi != 2; ++vi) {
5681  SmallVector<uint32_t, 16> Indices;
5682  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5683  Indices.push_back(i+vi);
5684  Indices.push_back(i+e+vi);
5685  }
5686  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5687  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
5688  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5689  }
5690  return SV;
5691  }
5692  case NEON::BI__builtin_neon_vtst_v:
5693  case NEON::BI__builtin_neon_vtstq_v: {
5694  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5695  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5696  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5697  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5698  ConstantAggregateZero::get(Ty));
5699  return Builder.CreateSExt(Ops[0], Ty, "vtst");
5700  }
5701  case NEON::BI__builtin_neon_vuzp_v:
5702  case NEON::BI__builtin_neon_vuzpq_v: {
5703  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5704  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5705  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5706  Value *SV = nullptr;
5707 
5708  for (unsigned vi = 0; vi != 2; ++vi) {
5709  SmallVector<uint32_t, 16> Indices;
5710  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
5711  Indices.push_back(2*i+vi);
5712 
5713  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5714  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
5715  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5716  }
5717  return SV;
5718  }
5719  case NEON::BI__builtin_neon_vzip_v:
5720  case NEON::BI__builtin_neon_vzipq_v: {
5721  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
5722  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5723  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
5724  Value *SV = nullptr;
5725 
5726  for (unsigned vi = 0; vi != 2; ++vi) {
5727  SmallVector<uint32_t, 16> Indices;
5728  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
5729  Indices.push_back((i + vi*e) >> 1);
5730  Indices.push_back(((i + vi*e) >> 1)+e);
5731  }
5732  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
5733  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
5734  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
5735  }
5736  return SV;
5737  }
5738  case NEON::BI__builtin_neon_vdot_v:
5739  case NEON::BI__builtin_neon_vdotq_v: {
5740  llvm::Type *InputTy =
5741  llvm::VectorType::get(Int8Ty, Ty->getPrimitiveSizeInBits() / 8);
5742  llvm::Type *Tys[2] = { Ty, InputTy };
5743  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
5744  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vdot");
5745  }
5746  case NEON::BI__builtin_neon_vfmlal_low_v:
5747  case NEON::BI__builtin_neon_vfmlalq_low_v: {
5748  llvm::Type *InputTy =
5749  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5750  llvm::Type *Tys[2] = { Ty, InputTy };
5751  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_low");
5752  }
5753  case NEON::BI__builtin_neon_vfmlsl_low_v:
5754  case NEON::BI__builtin_neon_vfmlslq_low_v: {
5755  llvm::Type *InputTy =
5756  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5757  llvm::Type *Tys[2] = { Ty, InputTy };
5758  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_low");
5759  }
5760  case NEON::BI__builtin_neon_vfmlal_high_v:
5761  case NEON::BI__builtin_neon_vfmlalq_high_v: {
5762  llvm::Type *InputTy =
5763  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5764  llvm::Type *Tys[2] = { Ty, InputTy };
5765  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlal_high");
5766  }
5767  case NEON::BI__builtin_neon_vfmlsl_high_v:
5768  case NEON::BI__builtin_neon_vfmlslq_high_v: {
5769  llvm::Type *InputTy =
5770  llvm::VectorType::get(HalfTy, Ty->getPrimitiveSizeInBits() / 16);
5771  llvm::Type *Tys[2] = { Ty, InputTy };
5772  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vfmlsl_high");
5773  }
5774  }
5775 
5776  assert(Int && "Expected valid intrinsic number");
5777 
5778  // Determine the type(s) of this overloaded AArch64 intrinsic.
5779  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
5780 
5781  Value *Result = EmitNeonCall(F, Ops, NameHint);
5782  llvm::Type *ResultType = ConvertType(E->getType());
5783  // AArch64 intrinsic one-element vector type cast to
5784  // scalar type expected by the builtin
5785  return Builder.CreateBitCast(Result, ResultType, NameHint);
5786 }
5787 
5789  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
5790  const CmpInst::Predicate Ip, const Twine &Name) {
5791  llvm::Type *OTy = Op->getType();
5792 
5793  // FIXME: this is utterly horrific. We should not be looking at previous
5794  // codegen context to find out what needs doing. Unfortunately TableGen
5795  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
5796  // (etc).
5797  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
5798  OTy = BI->getOperand(0)->getType();
5799 
5800  Op = Builder.CreateBitCast(Op, OTy);
5801  if (OTy->getScalarType()->isFloatingPointTy()) {
5802  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
5803  } else {
5804  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
5805  }
5806  return Builder.CreateSExt(Op, Ty, Name);
5807 }
5808 
5810  Value *ExtOp, Value *IndexOp,
5811  llvm::Type *ResTy, unsigned IntID,
5812  const char *Name) {
5813  SmallVector<Value *, 2> TblOps;
5814  if (ExtOp)
5815  TblOps.push_back(ExtOp);
5816 
5817  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
5818  SmallVector<uint32_t, 16> Indices;
5819  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
5820  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
5821  Indices.push_back(2*i);
5822  Indices.push_back(2*i+1);
5823  }
5824 
5825  int PairPos = 0, End = Ops.size() - 1;
5826  while (PairPos < End) {
5827  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5828  Ops[PairPos+1], Indices,
5829  Name));
5830  PairPos += 2;
5831  }
5832 
5833  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
5834  // of the 128-bit lookup table with zero.
5835  if (PairPos == End) {
5836  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
5837  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
5838  ZeroTbl, Indices, Name));
5839  }
5840 
5841  Function *TblF;
5842  TblOps.push_back(IndexOp);
5843  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
5844 
5845  return CGF.EmitNeonCall(TblF, TblOps, Name);
5846 }
5847 
5848 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
5849  unsigned Value;
5850  switch (BuiltinID) {
5851  default:
5852  return nullptr;
5853  case ARM::BI__builtin_arm_nop:
5854  Value = 0;
5855  break;
5856  case ARM::BI__builtin_arm_yield:
5857  case ARM::BI__yield:
5858  Value = 1;
5859  break;
5860  case ARM::BI__builtin_arm_wfe:
5861  case ARM::BI__wfe:
5862  Value = 2;
5863  break;
5864  case ARM::BI__builtin_arm_wfi:
5865  case ARM::BI__wfi:
5866  Value = 3;
5867  break;
5868  case ARM::BI__builtin_arm_sev:
5869  case ARM::BI__sev:
5870  Value = 4;
5871  break;
5872  case ARM::BI__builtin_arm_sevl:
5873  case ARM::BI__sevl:
5874  Value = 5;
5875  break;
5876  }
5877 
5878  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
5879  llvm::ConstantInt::get(Int32Ty, Value));
5880 }
5881 
5882 // Generates the IR for the read/write special register builtin,
5883 // ValueType is the type of the value that is to be written or read,
5884 // RegisterType is the type of the register being written to or read from.
5886  const CallExpr *E,
5887  llvm::Type *RegisterType,
5888  llvm::Type *ValueType,
5889  bool IsRead,
5890  StringRef SysReg = "") {
5891  // write and register intrinsics only support 32 and 64 bit operations.
5892  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
5893  && "Unsupported size for register.");
5894 
5895  CodeGen::CGBuilderTy &Builder = CGF.Builder;
5896  CodeGen::CodeGenModule &CGM = CGF.CGM;
5897  LLVMContext &Context = CGM.getLLVMContext();
5898 
5899  if (SysReg.empty()) {
5900  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
5901  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
5902  }
5903 
5904  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
5905  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
5906  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
5907 
5908  llvm::Type *Types[] = { RegisterType };
5909 
5910  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
5911  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
5912  && "Can't fit 64-bit value in 32-bit register");
5913 
5914  if (IsRead) {
5915  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
5916  llvm::Value *Call = Builder.CreateCall(F, Metadata);
5917 
5918  if (MixedTypes)
5919  // Read into 64 bit register and then truncate result to 32 bit.
5920  return Builder.CreateTrunc(Call, ValueType);
5921 
5922  if (ValueType->isPointerTy())
5923  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
5924  return Builder.CreateIntToPtr(Call, ValueType);
5925 
5926  return Call;
5927  }
5928 
5929  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
5930  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
5931  if (MixedTypes) {
5932  // Extend 32 bit write value to 64 bit to pass to write.
5933  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
5934  return Builder.CreateCall(F, { Metadata, ArgValue });
5935  }
5936 
5937  if (ValueType->isPointerTy()) {
5938  // Have VoidPtrTy ArgValue but want to return an i32/i64.
5939  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
5940  return Builder.CreateCall(F, { Metadata, ArgValue });
5941  }
5942 
5943  return Builder.CreateCall(F, { Metadata, ArgValue });
5944 }
5945 
5946 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
5947 /// argument that specifies the vector type.
5948 static bool HasExtraNeonArgument(unsigned BuiltinID) {
5949  switch (BuiltinID) {
5950  default: break;
5951  case NEON::BI__builtin_neon_vget_lane_i8:
5952  case NEON::BI__builtin_neon_vget_lane_i16:
5953  case NEON::BI__builtin_neon_vget_lane_i32:
5954  case NEON::BI__builtin_neon_vget_lane_i64:
5955  case NEON::BI__builtin_neon_vget_lane_f32:
5956  case NEON::BI__builtin_neon_vgetq_lane_i8:
5957  case NEON::BI__builtin_neon_vgetq_lane_i16:
5958  case NEON::BI__builtin_neon_vgetq_lane_i32:
5959  case NEON::BI__builtin_neon_vgetq_lane_i64:
5960  case NEON::BI__builtin_neon_vgetq_lane_f32:
5961  case NEON::BI__builtin_neon_vset_lane_i8:
5962  case NEON::BI__builtin_neon_vset_lane_i16:
5963  case NEON::BI__builtin_neon_vset_lane_i32:
5964  case NEON::BI__builtin_neon_vset_lane_i64:
5965  case NEON::BI__builtin_neon_vset_lane_f32:
5966  case NEON::BI__builtin_neon_vsetq_lane_i8:
5967  case NEON::BI__builtin_neon_vsetq_lane_i16:
5968  case NEON::BI__builtin_neon_vsetq_lane_i32:
5969  case NEON::BI__builtin_neon_vsetq_lane_i64:
5970  case NEON::BI__builtin_neon_vsetq_lane_f32:
5971  case NEON::BI__builtin_neon_vsha1h_u32:
5972  case NEON::BI__builtin_neon_vsha1cq_u32:
5973  case NEON::BI__builtin_neon_vsha1pq_u32:
5974  case NEON::BI__builtin_neon_vsha1mq_u32:
5975  case clang::ARM::BI_MoveToCoprocessor:
5976  case clang::ARM::BI_MoveToCoprocessor2:
5977  return false;
5978  }
5979  return true;
5980 }
5981 
5983  const CallExpr *E,
5984  llvm::Triple::ArchType Arch) {
5985  if (auto Hint = GetValueForARMHint(BuiltinID))
5986  return Hint;
5987 
5988  if (BuiltinID == ARM::BI__emit) {
5989  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
5990  llvm::FunctionType *FTy =
5991  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
5992 
5993  Expr::EvalResult Result;
5994  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
5995  llvm_unreachable("Sema will ensure that the parameter is constant");
5996 
5997  llvm::APSInt Value = Result.Val.getInt();
5998  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
5999 
6000  llvm::InlineAsm *Emit =
6001  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
6002  /*hasSideEffects=*/true)
6003  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
6004  /*hasSideEffects=*/true);
6005 
6006  return Builder.CreateCall(Emit);
6007  }
6008 
6009  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
6010  Value *Option = EmitScalarExpr(E->getArg(0));
6011  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
6012  }
6013 
6014  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
6015  Value *Address = EmitScalarExpr(E->getArg(0));
6016  Value *RW = EmitScalarExpr(E->getArg(1));
6017  Value *IsData = EmitScalarExpr(E->getArg(2));
6018 
6019  // Locality is not supported on ARM target
6020  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
6021 
6022  Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
6023  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
6024  }
6025 
6026  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
6027  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6028  return Builder.CreateCall(
6029  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6030  }
6031 
6032  if (BuiltinID == ARM::BI__clear_cache) {
6033  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
6034  const FunctionDecl *FD = E->getDirectCallee();
6035  Value *Ops[2];
6036  for (unsigned i = 0; i < 2; i++)
6037  Ops[i] = EmitScalarExpr(E->getArg(i));
6038  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
6039  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
6040  StringRef Name = FD->getName();
6041  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
6042  }
6043 
6044  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
6045  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
6046  Function *F;
6047 
6048  switch (BuiltinID) {
6049  default: llvm_unreachable("unexpected builtin");
6050  case ARM::BI__builtin_arm_mcrr:
6051  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
6052  break;
6053  case ARM::BI__builtin_arm_mcrr2:
6054  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
6055  break;
6056  }
6057 
6058  // MCRR{2} instruction has 5 operands but
6059  // the intrinsic has 4 because Rt and Rt2
6060  // are represented as a single unsigned 64
6061  // bit integer in the intrinsic definition
6062  // but internally it's represented as 2 32
6063  // bit integers.
6064 
6065  Value *Coproc = EmitScalarExpr(E->getArg(0));
6066  Value *Opc1 = EmitScalarExpr(E->getArg(1));
6067  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
6068  Value *CRm = EmitScalarExpr(E->getArg(3));
6069 
6070  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
6071  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
6072  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
6073  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
6074 
6075  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
6076  }
6077 
6078  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
6079  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
6080  Function *F;
6081 
6082  switch (BuiltinID) {
6083  default: llvm_unreachable("unexpected builtin");
6084  case ARM::BI__builtin_arm_mrrc:
6085  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
6086  break;
6087  case ARM::BI__builtin_arm_mrrc2:
6088  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
6089  break;
6090  }
6091 
6092  Value *Coproc = EmitScalarExpr(E->getArg(0));
6093  Value *Opc1 = EmitScalarExpr(E->getArg(1));
6094  Value *CRm = EmitScalarExpr(E->getArg(2));
6095  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
6096 
6097  // Returns an unsigned 64 bit integer, represented
6098  // as two 32 bit integers.
6099 
6100  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
6101  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
6102  Rt = Builder.CreateZExt(Rt, Int64Ty);
6103  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
6104 
6105  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
6106  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
6107  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
6108 
6109  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
6110  }
6111 
6112  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
6113  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
6114  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
6115  getContext().getTypeSize(E->getType()) == 64) ||
6116  BuiltinID == ARM::BI__ldrexd) {
6117  Function *F;
6118 
6119  switch (BuiltinID) {
6120  default: llvm_unreachable("unexpected builtin");
6121  case ARM::BI__builtin_arm_ldaex:
6122  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
6123  break;
6124  case ARM::BI__builtin_arm_ldrexd:
6125  case ARM::BI__builtin_arm_ldrex:
6126  case ARM::BI__ldrexd:
6127  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
6128  break;
6129  }
6130 
6131  Value *LdPtr = EmitScalarExpr(E->getArg(0));
6132  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
6133  "ldrexd");
6134 
6135  Value *Val0 = Builder.CreateExtractValue(Val, 1);
6136  Value *Val1 = Builder.CreateExtractValue(Val, 0);
6137  Val0 = Builder.CreateZExt(Val0, Int64Ty);
6138  Val1 = Builder.CreateZExt(Val1, Int64Ty);
6139 
6140  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
6141  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
6142  Val = Builder.CreateOr(Val, Val1);
6143  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
6144  }
6145 
6146  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
6147  BuiltinID == ARM::BI__builtin_arm_ldaex) {
6148  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
6149 
6150  QualType Ty = E->getType();
6151  llvm::Type *RealResTy = ConvertType(Ty);
6152  llvm::Type *PtrTy = llvm::IntegerType::get(
6153  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
6154  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
6155 
6156  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
6157  ? Intrinsic::arm_ldaex
6158  : Intrinsic::arm_ldrex,
6159  PtrTy);
6160  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
6161 
6162  if (RealResTy->isPointerTy())
6163  return Builder.CreateIntToPtr(Val, RealResTy);
6164  else {
6165  llvm::Type *IntResTy = llvm::IntegerType::get(
6166  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
6167  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
6168  return Builder.CreateBitCast(Val, RealResTy);
6169  }
6170  }
6171 
6172  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
6173  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
6174  BuiltinID == ARM::BI__builtin_arm_strex) &&
6175  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
6176  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6177  ? Intrinsic::arm_stlexd
6178  : Intrinsic::arm_strexd);
6179  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
6180 
6181  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
6182  Value *Val = EmitScalarExpr(E->getArg(0));
6183  Builder.CreateStore(Val, Tmp);
6184 
6185  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
6186  Val = Builder.CreateLoad(LdPtr);
6187 
6188  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
6189  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
6190  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
6191  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
6192  }
6193 
6194  if (BuiltinID == ARM::BI__builtin_arm_strex ||
6195  BuiltinID == ARM::BI__builtin_arm_stlex) {
6196  Value *StoreVal = EmitScalarExpr(E->getArg(0));
6197  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
6198 
6199  QualType Ty = E->getArg(0)->getType();
6200  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
6201  getContext().getTypeSize(Ty));
6202  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
6203 
6204  if (StoreVal->getType()->isPointerTy())
6205  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
6206  else {
6207  llvm::Type *IntTy = llvm::IntegerType::get(
6208  getLLVMContext(),
6209  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
6210  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
6211  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
6212  }
6213 
6214  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
6215  ? Intrinsic::arm_stlex
6216  : Intrinsic::arm_strex,
6217  StoreAddr->getType());
6218  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
6219  }
6220 
6221  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
6222  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
6223  return Builder.CreateCall(F);
6224  }
6225 
6226  // CRC32
6227  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
6228  switch (BuiltinID) {
6229  case ARM::BI__builtin_arm_crc32b:
6230  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
6231  case ARM::BI__builtin_arm_crc32cb:
6232  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
6233  case ARM::BI__builtin_arm_crc32h:
6234  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
6235  case ARM::BI__builtin_arm_crc32ch:
6236  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
6237  case ARM::BI__builtin_arm_crc32w:
6238  case ARM::BI__builtin_arm_crc32d:
6239  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
6240  case ARM::BI__builtin_arm_crc32cw:
6241  case ARM::BI__builtin_arm_crc32cd:
6242  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
6243  }
6244 
6245  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
6246  Value *Arg0 = EmitScalarExpr(E->getArg(0));
6247  Value *Arg1 = EmitScalarExpr(E->getArg(1));
6248 
6249  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
6250  // intrinsics, hence we need different codegen for these cases.
6251  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
6252  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
6253  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
6254  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
6255  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
6256  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
6257 
6258  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6259  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
6260  return Builder.CreateCall(F, {Res, Arg1b});
6261  } else {
6262  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
6263 
6264  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
6265  return Builder.CreateCall(F, {Arg0, Arg1});
6266  }
6267  }
6268 
6269  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
6270  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6271  BuiltinID == ARM::BI__builtin_arm_rsrp ||
6272  BuiltinID == ARM::BI__builtin_arm_wsr ||
6273  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
6274  BuiltinID == ARM::BI__builtin_arm_wsrp) {
6275 
6276  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
6277  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6278  BuiltinID == ARM::BI__builtin_arm_rsrp;
6279 
6280  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
6281  BuiltinID == ARM::BI__builtin_arm_wsrp;
6282 
6283  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
6284  BuiltinID == ARM::BI__builtin_arm_wsr64;
6285 
6286  llvm::Type *ValueType;
6287  llvm::Type *RegisterType;
6288  if (IsPointerBuiltin) {
6289  ValueType = VoidPtrTy;
6290  RegisterType = Int32Ty;
6291  } else if (Is64Bit) {
6292  ValueType = RegisterType = Int64Ty;
6293  } else {
6294  ValueType = RegisterType = Int32Ty;
6295  }
6296 
6297  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
6298  }
6299 
6300  // Find out if any arguments are required to be integer constant
6301  // expressions.
6302  unsigned ICEArguments = 0;
6304  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
6305  assert(Error == ASTContext::GE_None && "Should not codegen an error");
6306 
6307  auto getAlignmentValue32 = [&](Address addr) -> Value* {
6308  return Builder.getInt32(addr.getAlignment().getQuantity());
6309  };
6310 
6311  Address PtrOp0 = Address::invalid();
6312  Address PtrOp1 = Address::invalid();
6314  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
6315  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
6316  for (unsigned i = 0, e = NumArgs; i != e; i++) {
6317  if (i == 0) {
6318  switch (BuiltinID) {
6319  case NEON::BI__builtin_neon_vld1_v:
6320  case NEON::BI__builtin_neon_vld1q_v:
6321  case NEON::BI__builtin_neon_vld1q_lane_v:
6322  case NEON::BI__builtin_neon_vld1_lane_v:
6323  case NEON::BI__builtin_neon_vld1_dup_v:
6324  case NEON::BI__builtin_neon_vld1q_dup_v:
6325  case NEON::BI__builtin_neon_vst1_v:
6326  case NEON::BI__builtin_neon_vst1q_v:
6327  case NEON::BI__builtin_neon_vst1q_lane_v:
6328  case NEON::BI__builtin_neon_vst1_lane_v:
6329  case NEON::BI__builtin_neon_vst2_v:
6330  case NEON::BI__builtin_neon_vst2q_v:
6331  case NEON::BI__builtin_neon_vst2_lane_v:
6332  case NEON::BI__builtin_neon_vst2q_lane_v:
6333  case NEON::BI__builtin_neon_vst3_v:
6334  case NEON::BI__builtin_neon_vst3q_v:
6335  case NEON::BI__builtin_neon_vst3_lane_v:
6336  case NEON::BI__builtin_neon_vst3q_lane_v:
6337  case NEON::BI__builtin_neon_vst4_v:
6338  case NEON::BI__builtin_neon_vst4q_v:
6339  case NEON::BI__builtin_neon_vst4_lane_v:
6340  case NEON::BI__builtin_neon_vst4q_lane_v:
6341  // Get the alignment for the argument in addition to the value;
6342  // we'll use it later.
6343  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
6344  Ops.push_back(PtrOp0.getPointer());
6345  continue;
6346  }
6347  }
6348  if (i == 1) {
6349  switch (BuiltinID) {
6350  case NEON::BI__builtin_neon_vld2_v:
6351  case NEON::BI__builtin_neon_vld2q_v:
6352  case NEON::BI__builtin_neon_vld3_v:
6353  case NEON::BI__builtin_neon_vld3q_v:
6354  case NEON::BI__builtin_neon_vld4_v:
6355  case NEON::BI__builtin_neon_vld4q_v:
6356  case NEON::BI__builtin_neon_vld2_lane_v:
6357  case NEON::BI__builtin_neon_vld2q_lane_v:
6358  case NEON::BI__builtin_neon_vld3_lane_v:
6359  case NEON::BI__builtin_neon_vld3q_lane_v:
6360  case NEON::BI__builtin_neon_vld4_lane_v:
6361  case NEON::BI__builtin_neon_vld4q_lane_v:
6362  case NEON::BI__builtin_neon_vld2_dup_v:
6363  case NEON::BI__builtin_neon_vld2q_dup_v:
6364  case NEON::BI__builtin_neon_vld3_dup_v:
6365  case NEON::BI__builtin_neon_vld3q_dup_v:
6366  case NEON::BI__builtin_neon_vld4_dup_v:
6367  case NEON::BI__builtin_neon_vld4q_dup_v:
6368  // Get the alignment for the argument in addition to the value;
6369  // we'll use it later.
6370  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
6371  Ops.push_back(PtrOp1.getPointer());
6372  continue;
6373  }
6374  }
6375 
6376  if ((ICEArguments & (1 << i)) == 0) {
6377  Ops.push_back(EmitScalarExpr(E->getArg(i)));
6378  } else {
6379  // If this is required to be a constant, constant fold it so that we know
6380  // that the generated intrinsic gets a ConstantInt.
6381  llvm::APSInt Result;
6382  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
6383  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
6384  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
6385  }
6386  }
6387 
6388  switch (BuiltinID) {
6389  default: break;
6390 
6391  case NEON::BI__builtin_neon_vget_lane_i8:
6392  case NEON::BI__builtin_neon_vget_lane_i16:
6393  case NEON::BI__builtin_neon_vget_lane_i32:
6394  case NEON::BI__builtin_neon_vget_lane_i64:
6395  case NEON::BI__builtin_neon_vget_lane_f32:
6396  case NEON::BI__builtin_neon_vgetq_lane_i8:
6397  case NEON::BI__builtin_neon_vgetq_lane_i16:
6398  case NEON::BI__builtin_neon_vgetq_lane_i32:
6399  case NEON::BI__builtin_neon_vgetq_lane_i64:
6400  case NEON::BI__builtin_neon_vgetq_lane_f32:
6401  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
6402 
6403  case NEON::BI__builtin_neon_vrndns_f32: {
6404  Value *Arg = EmitScalarExpr(E->getArg(0));
6405  llvm::Type *Tys[] = {Arg->getType()};
6406  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vrintn, Tys);
6407  return Builder.CreateCall(F, {Arg}, "vrndn"); }
6408 
6409  case NEON::BI__builtin_neon_vset_lane_i8:
6410  case NEON::BI__builtin_neon_vset_lane_i16:
6411  case NEON::BI__builtin_neon_vset_lane_i32:
6412  case NEON::BI__builtin_neon_vset_lane_i64:
6413  case NEON::BI__builtin_neon_vset_lane_f32:
6414  case NEON::BI__builtin_neon_vsetq_lane_i8:
6415  case NEON::BI__builtin_neon_vsetq_lane_i16:
6416  case NEON::BI__builtin_neon_vsetq_lane_i32:
6417  case NEON::BI__builtin_neon_vsetq_lane_i64:
6418  case NEON::BI__builtin_neon_vsetq_lane_f32:
6419  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
6420 
6421  case NEON::BI__builtin_neon_vsha1h_u32:
6422  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
6423  "vsha1h");
6424  case NEON::BI__builtin_neon_vsha1cq_u32:
6425  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
6426  "vsha1h");
6427  case NEON::BI__builtin_neon_vsha1pq_u32:
6428  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
6429  "vsha1h");
6430  case NEON::BI__builtin_neon_vsha1mq_u32:
6431  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
6432  "vsha1h");
6433 
6434  // The ARM _MoveToCoprocessor builtins put the input register value as
6435  // the first argument, but the LLVM intrinsic expects it as the third one.
6436  case ARM::BI_MoveToCoprocessor:
6437  case ARM::BI_MoveToCoprocessor2: {
6438  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
6439  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
6440  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
6441  Ops[3], Ops[4], Ops[5]});
6442  }
6443  case ARM::BI_BitScanForward:
6444  case ARM::BI_BitScanForward64:
6445  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
6446  case ARM::BI_BitScanReverse:
6447  case ARM::BI_BitScanReverse64:
6448  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
6449 
6450  case ARM::BI_InterlockedAnd64:
6451  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
6452  case ARM::BI_InterlockedExchange64:
6453  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
6454  case ARM::BI_InterlockedExchangeAdd64:
6455  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
6456  case ARM::BI_InterlockedExchangeSub64:
6457  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
6458  case ARM::BI_InterlockedOr64:
6459  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
6460  case ARM::BI_InterlockedXor64:
6461  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
6462  case ARM::BI_InterlockedDecrement64:
6463  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
6464  case ARM::BI_InterlockedIncrement64:
6465  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
6466  case ARM::BI_InterlockedExchangeAdd8_acq:
6467  case ARM::BI_InterlockedExchangeAdd16_acq:
6468  case ARM::BI_InterlockedExchangeAdd_acq:
6469  case ARM::BI_InterlockedExchangeAdd64_acq:
6470  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
6471  case ARM::BI_InterlockedExchangeAdd8_rel:
6472  case ARM::BI_InterlockedExchangeAdd16_rel:
6473  case ARM::BI_InterlockedExchangeAdd_rel:
6474  case ARM::BI_InterlockedExchangeAdd64_rel:
6475  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
6476  case ARM::BI_InterlockedExchangeAdd8_nf:
6477  case ARM::BI_InterlockedExchangeAdd16_nf:
6478  case ARM::BI_InterlockedExchangeAdd_nf:
6479  case ARM::BI_InterlockedExchangeAdd64_nf:
6480  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
6481  case ARM::BI_InterlockedExchange8_acq:
6482  case ARM::BI_InterlockedExchange16_acq:
6483  case ARM::BI_InterlockedExchange_acq:
6484  case ARM::BI_InterlockedExchange64_acq:
6485  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
6486  case ARM::BI_InterlockedExchange8_rel:
6487  case ARM::BI_InterlockedExchange16_rel:
6488  case ARM::BI_InterlockedExchange_rel:
6489  case ARM::BI_InterlockedExchange64_rel:
6490  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
6491  case ARM::BI_InterlockedExchange8_nf:
6492  case ARM::BI_InterlockedExchange16_nf:
6493  case ARM::BI_InterlockedExchange_nf:
6494  case ARM::BI_InterlockedExchange64_nf:
6495  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
6496  case ARM::BI_InterlockedCompareExchange8_acq:
6497  case ARM::BI_InterlockedCompareExchange16_acq:
6498  case ARM::BI_InterlockedCompareExchange_acq:
6499  case ARM::BI_InterlockedCompareExchange64_acq:
6500  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
6501  case ARM::BI_InterlockedCompareExchange8_rel:
6502  case ARM::BI_InterlockedCompareExchange16_rel:
6503  case ARM::BI_InterlockedCompareExchange_rel:
6504  case ARM::BI_InterlockedCompareExchange64_rel:
6505  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
6506  case ARM::BI_InterlockedCompareExchange8_nf:
6507  case ARM::BI_InterlockedCompareExchange16_nf:
6508  case ARM::BI_InterlockedCompareExchange_nf:
6509  case ARM::BI_InterlockedCompareExchange64_nf:
6510  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
6511  case ARM::BI_InterlockedOr8_acq:
6512  case ARM::BI_InterlockedOr16_acq:
6513  case ARM::BI_InterlockedOr_acq:
6514  case ARM::BI_InterlockedOr64_acq:
6515  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
6516  case ARM::BI_InterlockedOr8_rel:
6517  case ARM::BI_InterlockedOr16_rel:
6518  case ARM::BI_InterlockedOr_rel:
6519  case ARM::BI_InterlockedOr64_rel:
6520  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
6521  case ARM::BI_InterlockedOr8_nf:
6522  case ARM::BI_InterlockedOr16_nf:
6523  case ARM::BI_InterlockedOr_nf:
6524  case ARM::BI_InterlockedOr64_nf:
6525  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
6526  case ARM::BI_InterlockedXor8_acq:
6527  case ARM::BI_InterlockedXor16_acq:
6528  case ARM::BI_InterlockedXor_acq:
6529  case ARM::BI_InterlockedXor64_acq:
6530  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
6531  case ARM::BI_InterlockedXor8_rel:
6532  case ARM::BI_InterlockedXor16_rel:
6533  case ARM::BI_InterlockedXor_rel:
6534  case ARM::BI_InterlockedXor64_rel:
6535  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
6536  case ARM::BI_InterlockedXor8_nf:
6537  case ARM::BI_InterlockedXor16_nf:
6538  case ARM::BI_InterlockedXor_nf:
6539  case ARM::BI_InterlockedXor64_nf:
6540  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
6541  case ARM::BI_InterlockedAnd8_acq:
6542  case ARM::BI_InterlockedAnd16_acq:
6543  case ARM::BI_InterlockedAnd_acq:
6544  case ARM::BI_InterlockedAnd64_acq:
6545  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
6546  case ARM::BI_InterlockedAnd8_rel:
6547  case ARM::BI_InterlockedAnd16_rel:
6548  case ARM::BI_InterlockedAnd_rel:
6549  case ARM::BI_InterlockedAnd64_rel:
6550  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
6551  case ARM::BI_InterlockedAnd8_nf:
6552  case ARM::BI_InterlockedAnd16_nf:
6553  case ARM::BI_InterlockedAnd_nf:
6554  case ARM::BI_InterlockedAnd64_nf:
6555  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
6556  case ARM::BI_InterlockedIncrement16_acq:
6557  case ARM::BI_InterlockedIncrement_acq:
6558  case ARM::BI_InterlockedIncrement64_acq:
6559  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
6560  case ARM::BI_InterlockedIncrement16_rel:
6561  case ARM::BI_InterlockedIncrement_rel:
6562  case ARM::BI_InterlockedIncrement64_rel:
6563  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
6564  case ARM::BI_InterlockedIncrement16_nf:
6565  case ARM::BI_InterlockedIncrement_nf:
6566  case ARM::BI_InterlockedIncrement64_nf:
6567  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
6568  case ARM::BI_InterlockedDecrement16_acq:
6569  case ARM::BI_InterlockedDecrement_acq:
6570  case ARM::BI_InterlockedDecrement64_acq:
6571  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
6572  case ARM::BI_InterlockedDecrement16_rel:
6573  case ARM::BI_InterlockedDecrement_rel:
6574  case ARM::BI_InterlockedDecrement64_rel:
6575  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
6576  case ARM::BI_InterlockedDecrement16_nf:
6577  case ARM::BI_InterlockedDecrement_nf:
6578  case ARM::BI_InterlockedDecrement64_nf:
6579  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
6580  }
6581 
6582  // Get the last argument, which specifies the vector type.
6583  assert(HasExtraArg);
6584  llvm::APSInt Result;
6585  const Expr *Arg = E->getArg(E->getNumArgs()-1);
6586  if (!Arg->isIntegerConstantExpr(Result, getContext()))
6587  return nullptr;
6588 
6589  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
6590  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
6591  // Determine the overloaded type of this builtin.
6592  llvm::Type *Ty;
6593  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
6594  Ty = FloatTy;
6595  else
6596  Ty = DoubleTy;
6597 
6598  // Determine whether this is an unsigned conversion or not.
6599  bool usgn = Result.getZExtValue() == 1;
6600  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
6601 
6602  // Call the appropriate intrinsic.
6603  Function *F = CGM.getIntrinsic(Int, Ty);
6604  return Builder.CreateCall(F, Ops, "vcvtr");
6605  }
6606 
6607  // Determine the type of this overloaded NEON intrinsic.
6608  NeonTypeFlags Type(Result.getZExtValue());
6609  bool usgn = Type.isUnsigned();
6610  bool rightShift = false;
6611 
6612  llvm::VectorType *VTy = GetNeonType(this, Type,
6613  getTarget().hasLegalHalfType());
6614  llvm::Type *Ty = VTy;
6615  if (!Ty)
6616  return nullptr;
6617 
6618  // Many NEON builtins have identical semantics and uses in ARM and
6619  // AArch64. Emit these in a single function.
6620  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
6621  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
6622  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
6623  if (Builtin)
6624  return EmitCommonNeonBuiltinExpr(
6625  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6626  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1, Arch);
6627 
6628  unsigned Int;
6629  switch (BuiltinID) {
6630  default: return nullptr;
6631  case NEON::BI__builtin_neon_vld1q_lane_v:
6632  // Handle 64-bit integer elements as a special case. Use shuffles of
6633  // one-element vectors to avoid poor code for i64 in the backend.
6634  if (VTy->getElementType()->isIntegerTy(64)) {
6635  // Extract the other lane.
6636  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6637  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
6638  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
6639  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6640  // Load the value as a one-element vector.
6641  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
6642  llvm::Type *Tys[] = {Ty, Int8PtrTy};
6643  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
6644  Value *Align = getAlignmentValue32(PtrOp0);
6645  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
6646  // Combine them.
6647  uint32_t Indices[] = {1 - Lane, Lane};
6648  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
6649  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
6650  }
6651  LLVM_FALLTHROUGH;
6652  case NEON::BI__builtin_neon_vld1_lane_v: {
6653  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6654  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
6655  Value *Ld = Builder.CreateLoad(PtrOp0);
6656  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
6657  }
6658  case NEON::BI__builtin_neon_vqrshrn_n_v:
6659  Int =
6660  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
6661  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
6662  1, true);
6663  case NEON::BI__builtin_neon_vqrshrun_n_v:
6664  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
6665  Ops, "vqrshrun_n", 1, true);
6666  case NEON::BI__builtin_neon_vqshrn_n_v:
6667  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
6668  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
6669  1, true);
6670  case NEON::BI__builtin_neon_vqshrun_n_v:
6671  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
6672  Ops, "vqshrun_n", 1, true);
6673  case NEON::BI__builtin_neon_vrecpe_v:
6674  case NEON::BI__builtin_neon_vrecpeq_v:
6675  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
6676  Ops, "vrecpe");
6677  case NEON::BI__builtin_neon_vrshrn_n_v:
6678  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
6679  Ops, "vrshrn_n", 1, true);
6680  case NEON::BI__builtin_neon_vrsra_n_v:
6681  case NEON::BI__builtin_neon_vrsraq_n_v:
6682  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6683  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6684  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
6685  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
6686  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
6687  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
6688  case NEON::BI__builtin_neon_vsri_n_v:
6689  case NEON::BI__builtin_neon_vsriq_n_v:
6690  rightShift = true;
6691  LLVM_FALLTHROUGH;
6692  case NEON::BI__builtin_neon_vsli_n_v:
6693  case NEON::BI__builtin_neon_vsliq_n_v:
6694  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
6695  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
6696  Ops, "vsli_n");
6697  case NEON::BI__builtin_neon_vsra_n_v:
6698  case NEON::BI__builtin_neon_vsraq_n_v:
6699  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6700  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6701  return Builder.CreateAdd(Ops[0], Ops[1]);
6702  case NEON::BI__builtin_neon_vst1q_lane_v:
6703  // Handle 64-bit integer elements as a special case. Use a shuffle to get
6704  // a one-element vector and avoid poor code for i64 in the backend.
6705  if (VTy->getElementType()->isIntegerTy(64)) {
6706  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6707  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
6708  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
6709  Ops[2] = getAlignmentValue32(PtrOp0);
6710  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
6711  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
6712  Tys), Ops);
6713  }
6714  LLVM_FALLTHROUGH;
6715  case NEON::BI__builtin_neon_vst1_lane_v: {
6716  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6717  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6718  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6719  auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
6720  return St;
6721  }
6722  case NEON::BI__builtin_neon_vtbl1_v:
6723  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
6724  Ops, "vtbl1");
6725  case NEON::BI__builtin_neon_vtbl2_v:
6726  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
6727  Ops, "vtbl2");
6728  case NEON::BI__builtin_neon_vtbl3_v:
6729  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
6730  Ops, "vtbl3");
6731  case NEON::BI__builtin_neon_vtbl4_v:
6732  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
6733  Ops, "vtbl4");
6734  case NEON::BI__builtin_neon_vtbx1_v:
6735  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
6736  Ops, "vtbx1");
6737  case NEON::BI__builtin_neon_vtbx2_v:
6738  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
6739  Ops, "vtbx2");
6740  case NEON::BI__builtin_neon_vtbx3_v:
6741  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
6742  Ops, "vtbx3");
6743  case NEON::BI__builtin_neon_vtbx4_v:
6744  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
6745  Ops, "vtbx4");
6746  }
6747 }
6748 
6749 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
6750  const CallExpr *E,
6752  llvm::Triple::ArchType Arch) {
6753  unsigned int Int = 0;
6754  const char *s = nullptr;
6755 
6756  switch (BuiltinID) {
6757  default:
6758  return nullptr;
6759  case NEON::BI__builtin_neon_vtbl1_v:
6760  case NEON::BI__builtin_neon_vqtbl1_v:
6761  case NEON::BI__builtin_neon_vqtbl1q_v:
6762  case NEON::BI__builtin_neon_vtbl2_v:
6763  case NEON::BI__builtin_neon_vqtbl2_v:
6764  case NEON::BI__builtin_neon_vqtbl2q_v:
6765  case NEON::BI__builtin_neon_vtbl3_v:
6766  case NEON::BI__builtin_neon_vqtbl3_v:
6767  case NEON::BI__builtin_neon_vqtbl3q_v:
6768  case NEON::BI__builtin_neon_vtbl4_v:
6769  case NEON::BI__builtin_neon_vqtbl4_v:
6770  case NEON::BI__builtin_neon_vqtbl4q_v:
6771  break;
6772  case NEON::BI__builtin_neon_vtbx1_v:
6773  case NEON::BI__builtin_neon_vqtbx1_v:
6774  case NEON::BI__builtin_neon_vqtbx1q_v:
6775  case NEON::BI__builtin_neon_vtbx2_v:
6776  case NEON::BI__builtin_neon_vqtbx2_v:
6777  case NEON::BI__builtin_neon_vqtbx2q_v:
6778  case NEON::BI__builtin_neon_vtbx3_v:
6779  case NEON::BI__builtin_neon_vqtbx3_v:
6780  case NEON::BI__builtin_neon_vqtbx3q_v:
6781  case NEON::BI__builtin_neon_vtbx4_v:
6782  case NEON::BI__builtin_neon_vqtbx4_v:
6783  case NEON::BI__builtin_neon_vqtbx4q_v:
6784  break;
6785  }
6786 
6787  assert(E->getNumArgs() >= 3);
6788 
6789  // Get the last argument, which specifies the vector type.
6790  llvm::APSInt Result;
6791  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
6792  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
6793  return nullptr;
6794 
6795  // Determine the type of this overloaded NEON intrinsic.
6796  NeonTypeFlags Type(Result.getZExtValue());
6797  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
6798  if (!Ty)
6799  return nullptr;
6800 
6801  CodeGen::CGBuilderTy &Builder = CGF.Builder;
6802 
6803  // AArch64 scalar builtins are not overloaded, they do not have an extra
6804  // argument that specifies the vector type, need to handle each case.
6805  switch (BuiltinID) {
6806  case NEON::BI__builtin_neon_vtbl1_v: {
6807  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
6808  Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
6809  "vtbl1");
6810  }
6811  case NEON::BI__builtin_neon_vtbl2_v: {
6812  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
6813  Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
6814  "vtbl1");
6815  }
6816  case NEON::BI__builtin_neon_vtbl3_v: {
6817  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
6818  Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
6819  "vtbl2");
6820  }
6821  case NEON::BI__builtin_neon_vtbl4_v: {
6822  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
6823  Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
6824  "vtbl2");
6825  }
6826  case NEON::BI__builtin_neon_vtbx1_v: {
6827  Value *TblRes =
6828  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
6829  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
6830 
6831  llvm::Constant *EightV = ConstantInt::get(Ty, 8);
6832  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
6833  CmpRes = Builder.CreateSExt(CmpRes, Ty);
6834 
6835  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6836  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6837  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6838  }
6839  case NEON::BI__builtin_neon_vtbx2_v: {
6840  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
6841  Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
6842  "vtbx1");
6843  }
6844  case NEON::BI__builtin_neon_vtbx3_v: {
6845  Value *TblRes =
6846  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
6847  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
6848 
6849  llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
6850  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
6851  TwentyFourV);
6852  CmpRes = Builder.CreateSExt(CmpRes, Ty);
6853 
6854  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
6855  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
6856  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
6857  }
6858  case NEON::BI__builtin_neon_vtbx4_v: {
6859  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
6860  Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
6861  "vtbx2");
6862  }
6863  case NEON::BI__builtin_neon_vqtbl1_v:
6864  case NEON::BI__builtin_neon_vqtbl1q_v:
6865  Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
6866  case NEON::BI__builtin_neon_vqtbl2_v:
6867  case NEON::BI__builtin_neon_vqtbl2q_v: {
6868  Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
6869  case NEON::BI__builtin_neon_vqtbl3_v:
6870  case NEON::BI__builtin_neon_vqtbl3q_v:
6871  Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
6872  case NEON::BI__builtin_neon_vqtbl4_v:
6873  case NEON::BI__builtin_neon_vqtbl4q_v:
6874  Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
6875  case NEON::BI__builtin_neon_vqtbx1_v:
6876  case NEON::BI__builtin_neon_vqtbx1q_v:
6877  Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
6878  case NEON::BI__builtin_neon_vqtbx2_v:
6879  case NEON::BI__builtin_neon_vqtbx2q_v:
6880  Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
6881  case NEON::BI__builtin_neon_vqtbx3_v:
6882  case NEON::BI__builtin_neon_vqtbx3q_v:
6883  Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
6884  case NEON::BI__builtin_neon_vqtbx4_v:
6885  case NEON::BI__builtin_neon_vqtbx4q_v:
6886  Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
6887  }
6888  }
6889 
6890  if (!Int)
6891  return nullptr;
6892 
6893  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
6894  return CGF.EmitNeonCall(F, Ops, s);
6895 }
6896 
6898  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
6899  Op = Builder.CreateBitCast(Op, Int16Ty);
6900  Value *V = UndefValue::get(VTy);
6901  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
6902  Op = Builder.CreateInsertElement(V, Op, CI);
6903  return Op;
6904 }
6905 
6907  const CallExpr *E,
6908  llvm::Triple::ArchType Arch) {
6909  unsigned HintID = static_cast<unsigned>(-1);
6910  switch (BuiltinID) {
6911  default: break;
6912  case AArch64::BI__builtin_arm_nop:
6913  HintID = 0;
6914  break;
6915  case AArch64::BI__builtin_arm_yield:
6916  case AArch64::BI__yield:
6917  HintID = 1;
6918  break;
6919  case AArch64::BI__builtin_arm_wfe:
6920  case AArch64::BI__wfe:
6921  HintID = 2;
6922  break;
6923  case AArch64::BI__builtin_arm_wfi:
6924  case AArch64::BI__wfi:
6925  HintID = 3;
6926  break;
6927  case AArch64::BI__builtin_arm_sev:
6928  case AArch64::BI__sev:
6929  HintID = 4;
6930  break;
6931  case AArch64::BI__builtin_arm_sevl:
6932  case AArch64::BI__sevl:
6933  HintID = 5;
6934  break;
6935  }
6936 
6937  if (HintID != static_cast<unsigned>(-1)) {
6938  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
6939  return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
6940  }
6941 
6942  if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
6943  Value *Address = EmitScalarExpr(E->getArg(0));
6944  Value *RW = EmitScalarExpr(E->getArg(1));
6945  Value *CacheLevel = EmitScalarExpr(E->getArg(2));
6946  Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
6947  Value *IsData = EmitScalarExpr(E->getArg(4));
6948 
6949  Value *Locality = nullptr;
6950  if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
6951  // Temporal fetch, needs to convert cache level to locality.
6952  Locality = llvm::ConstantInt::get(Int32Ty,
6953  -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
6954  } else {
6955  // Streaming fetch.
6956  Locality = llvm::ConstantInt::get(Int32Ty, 0);
6957  }
6958 
6959  // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
6960  // PLDL3STRM or PLDL2STRM.
6961  Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
6962  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
6963  }
6964 
6965  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
6966  assert((getContext().getTypeSize(E->getType()) == 32) &&
6967  "rbit of unusual size!");
6968  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6969  return Builder.CreateCall(
6970  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6971  }
6972  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
6973  assert((getContext().getTypeSize(E->getType()) == 64) &&
6974  "rbit of unusual size!");
6975  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6976  return Builder.CreateCall(
6977  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
6978  }
6979 
6980  if (BuiltinID == AArch64::BI__builtin_arm_jcvt) {
6981  assert((getContext().getTypeSize(E->getType()) == 32) &&
6982  "__jcvt of unusual size!");
6983  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
6984  return Builder.CreateCall(
6985  CGM.getIntrinsic(Intrinsic::aarch64_fjcvtzs), Arg);
6986  }
6987 
6988  if (BuiltinID == AArch64::BI__clear_cache) {
6989  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
6990  const FunctionDecl *FD = E->getDirectCallee();
6991  Value *Ops[2];
6992  for (unsigned i = 0; i < 2; i++)
6993  Ops[i] = EmitScalarExpr(E->getArg(i));
6994  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
6995  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
6996  StringRef Name = FD->getName();
6997  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
6998  }
6999 
7000  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
7001  BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
7002  getContext().getTypeSize(E->getType()) == 128) {
7003  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
7004  ? Intrinsic::aarch64_ldaxp
7005  : Intrinsic::aarch64_ldxp);
7006 
7007  Value *LdPtr = EmitScalarExpr(E->getArg(0));
7008  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
7009  "ldxp");
7010 
7011  Value *Val0 = Builder.CreateExtractValue(Val, 1);
7012  Value *Val1 = Builder.CreateExtractValue(Val, 0);
7013  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
7014  Val0 = Builder.CreateZExt(Val0, Int128Ty);
7015  Val1 = Builder.CreateZExt(Val1, Int128Ty);
7016 
7017  Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
7018  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
7019  Val = Builder.CreateOr(Val, Val1);
7020  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
7021  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
7022  BuiltinID == AArch64::BI__builtin_arm_ldaex) {
7023  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
7024 
7025  QualType Ty = E->getType();
7026  llvm::Type *RealResTy = ConvertType(Ty);
7027  llvm::Type *PtrTy = llvm::IntegerType::get(
7028  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
7029  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
7030 
7031  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
7032  ? Intrinsic::aarch64_ldaxr
7033  : Intrinsic::aarch64_ldxr,
7034  PtrTy);
7035  Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
7036 
7037  if (RealResTy->isPointerTy())
7038  return Builder.CreateIntToPtr(Val, RealResTy);
7039 
7040  llvm::Type *IntResTy = llvm::IntegerType::get(
7041  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
7042  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
7043  return Builder.CreateBitCast(Val, RealResTy);
7044  }
7045 
7046  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
7047  BuiltinID == AArch64::BI__builtin_arm_stlex) &&
7048  getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
7049  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
7050  ? Intrinsic::aarch64_stlxp
7051  : Intrinsic::aarch64_stxp);
7052  llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
7053 
7054  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7055  EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
7056 
7057  Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
7058  llvm::Value *Val = Builder.CreateLoad(Tmp);
7059 
7060  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
7061  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
7062  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
7063  Int8PtrTy);
7064  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
7065  }
7066 
7067  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
7068  BuiltinID == AArch64::BI__builtin_arm_stlex) {
7069  Value *StoreVal = EmitScalarExpr(E->getArg(0));
7070  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
7071 
7072  QualType Ty = E->getArg(0)->getType();
7073  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
7074  getContext().getTypeSize(Ty));
7075  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
7076 
7077  if (StoreVal->getType()->isPointerTy())
7078  StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
7079  else {
7080  llvm::Type *IntTy = llvm::IntegerType::get(
7081  getLLVMContext(),
7082  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
7083  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
7084  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
7085  }
7086 
7087  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
7088  ? Intrinsic::aarch64_stlxr
7089  : Intrinsic::aarch64_stxr,
7090  StoreAddr->getType());
7091  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
7092  }
7093 
7094  if (BuiltinID == AArch64::BI__getReg) {
7095  Expr::EvalResult Result;
7096  if (!E->getArg(0)->EvaluateAsInt(Result, CGM.getContext()))
7097  llvm_unreachable("Sema will ensure that the parameter is constant");
7098 
7099  llvm::APSInt Value = Result.Val.getInt();
7100  LLVMContext &Context = CGM.getLLVMContext();
7101  std::string Reg = Value == 31 ? "sp" : "x" + Value.toString(10);
7102 
7103  llvm::Metadata *Ops[] = {llvm::MDString::get(Context, Reg)};
7104  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7105  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7106 
7107  llvm::Function *F =
7108  CGM.getIntrinsic(llvm::Intrinsic::read_register, {Int64Ty});
7109  return Builder.CreateCall(F, Metadata);
7110  }
7111 
7112  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
7113  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
7114  return Builder.CreateCall(F);
7115  }
7116 
7117  if (BuiltinID == AArch64::BI_ReadWriteBarrier)
7118  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
7119  llvm::SyncScope::SingleThread);
7120 
7121  // CRC32
7122  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
7123  switch (BuiltinID) {
7124  case AArch64::BI__builtin_arm_crc32b:
7125  CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
7126  case AArch64::BI__builtin_arm_crc32cb:
7127  CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
7128  case AArch64::BI__builtin_arm_crc32h:
7129  CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
7130  case AArch64::BI__builtin_arm_crc32ch:
7131  CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
7132  case AArch64::BI__builtin_arm_crc32w:
7133  CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
7134  case AArch64::BI__builtin_arm_crc32cw:
7135  CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
7136  case AArch64::BI__builtin_arm_crc32d:
7137  CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
7138  case AArch64::BI__builtin_arm_crc32cd:
7139  CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
7140  }
7141 
7142  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
7143  Value *Arg0 = EmitScalarExpr(E->getArg(0));
7144  Value *Arg1 = EmitScalarExpr(E->getArg(1));
7145  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
7146 
7147  llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
7148  Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
7149 
7150  return Builder.CreateCall(F, {Arg0, Arg1});
7151  }
7152 
7153  // Memory Tagging Extensions (MTE) Intrinsics
7154  Intrinsic::ID MTEIntrinsicID = Intrinsic::not_intrinsic;
7155  switch (BuiltinID) {
7156  case AArch64::BI__builtin_arm_irg:
7157  MTEIntrinsicID = Intrinsic::aarch64_irg; break;
7158  case AArch64::BI__builtin_arm_addg:
7159  MTEIntrinsicID = Intrinsic::aarch64_addg; break;
7160  case AArch64::BI__builtin_arm_gmi:
7161  MTEIntrinsicID = Intrinsic::aarch64_gmi; break;
7162  case AArch64::BI__builtin_arm_ldg:
7163  MTEIntrinsicID = Intrinsic::aarch64_ldg; break;
7164  case AArch64::BI__builtin_arm_stg:
7165  MTEIntrinsicID = Intrinsic::aarch64_stg; break;
7166  case AArch64::BI__builtin_arm_subp:
7167  MTEIntrinsicID = Intrinsic::aarch64_subp; break;
7168  }
7169 
7170  if (MTEIntrinsicID != Intrinsic::not_intrinsic) {
7171  llvm::Type *T = ConvertType(E->getType());
7172 
7173  if (MTEIntrinsicID == Intrinsic::aarch64_irg) {
7174  Value *Pointer = EmitScalarExpr(E->getArg(0));
7175  Value *Mask = EmitScalarExpr(E->getArg(1));
7176 
7177  Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
7178  Mask = Builder.CreateZExt(Mask, Int64Ty);
7179  Value *RV = Builder.CreateCall(
7180  CGM.getIntrinsic(MTEIntrinsicID), {Pointer, Mask});
7181  return Builder.CreatePointerCast(RV, T);
7182  }
7183  if (MTEIntrinsicID == Intrinsic::aarch64_addg) {
7184  Value *Pointer = EmitScalarExpr(E->getArg(0));
7185  Value *TagOffset = EmitScalarExpr(E->getArg(1));
7186 
7187  Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
7188  TagOffset = Builder.CreateZExt(TagOffset, Int64Ty);
7189  Value *RV = Builder.CreateCall(
7190  CGM.getIntrinsic(MTEIntrinsicID), {Pointer, TagOffset});
7191  return Builder.CreatePointerCast(RV, T);
7192  }
7193  if (MTEIntrinsicID == Intrinsic::aarch64_gmi) {
7194  Value *Pointer = EmitScalarExpr(E->getArg(0));
7195  Value *ExcludedMask = EmitScalarExpr(E->getArg(1));
7196 
7197  ExcludedMask = Builder.CreateZExt(ExcludedMask, Int64Ty);
7198  Pointer = Builder.CreatePointerCast(Pointer, Int8PtrTy);
7199  return Builder.CreateCall(
7200  CGM.getIntrinsic(MTEIntrinsicID), {Pointer, ExcludedMask});
7201  }
7202  // Although it is possible to supply a different return
7203  // address (first arg) to this intrinsic, for now we set
7204  // return address same as input address.
7205  if (MTEIntrinsicID == Intrinsic::aarch64_ldg) {
7206  Value *TagAddress = EmitScalarExpr(E->getArg(0));
7207  TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
7208  Value *RV = Builder.CreateCall(
7209  CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
7210  return Builder.CreatePointerCast(RV, T);
7211  }
7212  // Although it is possible to supply a different tag (to set)
7213  // to this intrinsic (as first arg), for now we supply
7214  // the tag that is in input address arg (common use case).
7215  if (MTEIntrinsicID == Intrinsic::aarch64_stg) {
7216  Value *TagAddress = EmitScalarExpr(E->getArg(0));
7217  TagAddress = Builder.CreatePointerCast(TagAddress, Int8PtrTy);
7218  return Builder.CreateCall(
7219  CGM.getIntrinsic(MTEIntrinsicID), {TagAddress, TagAddress});
7220  }
7221  if (MTEIntrinsicID == Intrinsic::aarch64_subp) {
7222  Value *PointerA = EmitScalarExpr(E->getArg(0));
7223  Value *PointerB = EmitScalarExpr(E->getArg(1));
7224  PointerA = Builder.CreatePointerCast(PointerA, Int8PtrTy);
7225  PointerB = Builder.CreatePointerCast(PointerB, Int8PtrTy);
7226  return Builder.CreateCall(
7227  CGM.getIntrinsic(MTEIntrinsicID), {PointerA, PointerB});
7228  }
7229  }
7230 
7231  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
7232  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7233  BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7234  BuiltinID == AArch64::BI__builtin_arm_wsr ||
7235  BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
7236  BuiltinID == AArch64::BI__builtin_arm_wsrp) {
7237 
7238  bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
7239  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
7240  BuiltinID == AArch64::BI__builtin_arm_rsrp;
7241 
7242  bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
7243  BuiltinID == AArch64::BI__builtin_arm_wsrp;
7244 
7245  bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
7246  BuiltinID != AArch64::BI__builtin_arm_wsr;
7247 
7248  llvm::Type *ValueType;
7249  llvm::Type *RegisterType = Int64Ty;
7250  if (IsPointerBuiltin) {
7251  ValueType = VoidPtrTy;
7252  } else if (Is64Bit) {
7253  ValueType = Int64Ty;
7254  } else {
7255  ValueType = Int32Ty;
7256  }
7257 
7258  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
7259  }
7260 
7261  if (BuiltinID == AArch64::BI_ReadStatusReg ||
7262  BuiltinID == AArch64::BI_WriteStatusReg) {
7263  LLVMContext &Context = CGM.getLLVMContext();
7264 
7265  unsigned SysReg =
7266  E->getArg(0)->EvaluateKnownConstInt(getContext()).getZExtValue();
7267 
7268  std::string SysRegStr;
7269  llvm::raw_string_ostream(SysRegStr) <<
7270  ((1 << 1) | ((SysReg >> 14) & 1)) << ":" <<
7271  ((SysReg >> 11) & 7) << ":" <<
7272  ((SysReg >> 7) & 15) << ":" <<
7273  ((SysReg >> 3) & 15) << ":" <<
7274  ( SysReg & 7);
7275 
7276  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysRegStr) };
7277  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
7278  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
7279 
7280  llvm::Type *RegisterType = Int64Ty;
7281  llvm::Type *Types[] = { RegisterType };
7282 
7283  if (BuiltinID == AArch64::BI_ReadStatusReg) {
7284  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
7285 
7286  return Builder.CreateCall(F, Metadata);
7287  }
7288 
7289  llvm::Function *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
7290  llvm::Value *ArgValue = EmitScalarExpr(E->getArg(1));
7291 
7292  return Builder.CreateCall(F, { Metadata, ArgValue });
7293  }
7294 
7295  if (BuiltinID == AArch64::BI_AddressOfReturnAddress) {
7296  llvm::Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
7297  return Builder.CreateCall(F);
7298  }
7299 
7300  if (BuiltinID == AArch64::BI__builtin_sponentry) {
7301  llvm::Function *F = CGM.getIntrinsic(Intrinsic::sponentry);
7302  return Builder.CreateCall(F);
7303  }
7304 
7305  // Find out if any arguments are required to be integer constant
7306  // expressions.
7307  unsigned ICEArguments = 0;
7309  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7310  assert(Error == ASTContext::GE_None && "Should not codegen an error");
7311 
7313  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
7314  if ((ICEArguments & (1 << i)) == 0) {
7315  Ops.push_back(EmitScalarExpr(E->getArg(i)));
7316  } else {
7317  // If this is required to be a constant, constant fold it so that we know
7318  // that the generated intrinsic gets a ConstantInt.
7319  llvm::APSInt Result;
7320  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7321  assert(IsConst && "Constant arg isn't actually constant?");
7322  (void)IsConst;
7323  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7324  }
7325  }
7326 
7327  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
7328  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
7329  SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
7330 
7331  if (Builtin) {
7332  Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
7333  Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
7334  assert(Result && "SISD intrinsic should have been handled");
7335  return Result;
7336  }
7337 
7338  llvm::APSInt Result;
7339  const Expr *Arg = E->getArg(E->getNumArgs()-1);
7340  NeonTypeFlags Type(0);
7341  if (Arg->isIntegerConstantExpr(Result, getContext()))
7342  // Determine the type of this overloaded NEON intrinsic.
7343  Type = NeonTypeFlags(Result.getZExtValue());
7344 
7345  bool usgn = Type.isUnsigned();
7346  bool quad = Type.isQuad();
7347 
7348  // Handle non-overloaded intrinsics first.
7349  switch (BuiltinID) {
7350  default: break;
7351  case NEON::BI__builtin_neon_vabsh_f16:
7352  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7353  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, HalfTy), Ops, "vabs");
7354  case NEON::BI__builtin_neon_vldrq_p128: {
7355  llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
7356  llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
7357  Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
7358  return Builder.CreateAlignedLoad(Int128Ty, Ptr,
7360  }
7361  case NEON::BI__builtin_neon_vstrq_p128: {
7362  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
7363  Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
7364  return Builder.CreateDefaultAlignedStore(EmitScalarExpr(E->getArg(1)), Ptr);
7365  }
7366  case NEON::BI__builtin_neon_vcvts_u32_f32:
7367  case NEON::BI__builtin_neon_vcvtd_u64_f64:
7368  usgn = true;
7369  LLVM_FALLTHROUGH;
7370  case NEON::BI__builtin_neon_vcvts_s32_f32:
7371  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
7372  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7373  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7374  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7375  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7376  Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
7377  if (usgn)
7378  return Builder.CreateFPToUI(Ops[0], InTy);
7379  return Builder.CreateFPToSI(Ops[0], InTy);
7380  }
7381  case NEON::BI__builtin_neon_vcvts_f32_u32:
7382  case NEON::BI__builtin_neon_vcvtd_f64_u64:
7383  usgn = true;
7384  LLVM_FALLTHROUGH;
7385  case NEON::BI__builtin_neon_vcvts_f32_s32:
7386  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
7387  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7388  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
7389  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
7390  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
7391  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7392  if (usgn)
7393  return Builder.CreateUIToFP(Ops[0], FTy);
7394  return Builder.CreateSIToFP(Ops[0], FTy);
7395  }
7396  case NEON::BI__builtin_neon_vcvth_f16_u16:
7397  case NEON::BI__builtin_neon_vcvth_f16_u32:
7398  case NEON::BI__builtin_neon_vcvth_f16_u64:
7399  usgn = true;
7400  LLVM_FALLTHROUGH;
7401  case NEON::BI__builtin_neon_vcvth_f16_s16:
7402  case NEON::BI__builtin_neon_vcvth_f16_s32:
7403  case NEON::BI__builtin_neon_vcvth_f16_s64: {
7404  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7405  llvm::Type *FTy = HalfTy;
7406  llvm::Type *InTy;
7407  if (Ops[0]->getType()->getPrimitiveSizeInBits() == 64)
7408  InTy = Int64Ty;
7409  else if (Ops[0]->getType()->getPrimitiveSizeInBits() == 32)
7410  InTy = Int32Ty;
7411  else
7412  InTy = Int16Ty;
7413  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
7414  if (usgn)
7415  return Builder.CreateUIToFP(Ops[0], FTy);
7416  return Builder.CreateSIToFP(Ops[0], FTy);
7417  }
7418  case NEON::BI__builtin_neon_vcvth_u16_f16:
7419  usgn = true;
7420  LLVM_FALLTHROUGH;
7421  case NEON::BI__builtin_neon_vcvth_s16_f16: {
7422  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7423  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7424  if (usgn)
7425  return Builder.CreateFPToUI(Ops[0], Int16Ty);
7426  return Builder.CreateFPToSI(Ops[0], Int16Ty);
7427  }
7428  case NEON::BI__builtin_neon_vcvth_u32_f16:
7429  usgn = true;
7430  LLVM_FALLTHROUGH;
7431  case NEON::BI__builtin_neon_vcvth_s32_f16: {
7432  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7433  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7434  if (usgn)
7435  return Builder.CreateFPToUI(Ops[0], Int32Ty);
7436  return Builder.CreateFPToSI(Ops[0], Int32Ty);
7437  }
7438  case NEON::BI__builtin_neon_vcvth_u64_f16:
7439  usgn = true;
7440  LLVM_FALLTHROUGH;
7441  case NEON::BI__builtin_neon_vcvth_s64_f16: {
7442  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7443  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7444  if (usgn)
7445  return Builder.CreateFPToUI(Ops[0], Int64Ty);
7446  return Builder.CreateFPToSI(Ops[0], Int64Ty);
7447  }
7448  case NEON::BI__builtin_neon_vcvtah_u16_f16:
7449  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7450  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7451  case NEON::BI__builtin_neon_vcvtph_u16_f16:
7452  case NEON::BI__builtin_neon_vcvtah_s16_f16:
7453  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7454  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7455  case NEON::BI__builtin_neon_vcvtph_s16_f16: {
7456  unsigned Int;
7457  llvm::Type* InTy = Int32Ty;
7458  llvm::Type* FTy = HalfTy;
7459  llvm::Type *Tys[2] = {InTy, FTy};
7460  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7461  switch (BuiltinID) {
7462  default: llvm_unreachable("missing builtin ID in switch!");
7463  case NEON::BI__builtin_neon_vcvtah_u16_f16:
7464  Int = Intrinsic::aarch64_neon_fcvtau; break;
7465  case NEON::BI__builtin_neon_vcvtmh_u16_f16:
7466  Int = Intrinsic::aarch64_neon_fcvtmu; break;
7467  case NEON::BI__builtin_neon_vcvtnh_u16_f16:
7468  Int = Intrinsic::aarch64_neon_fcvtnu; break;
7469  case NEON::BI__builtin_neon_vcvtph_u16_f16:
7470  Int = Intrinsic::aarch64_neon_fcvtpu; break;
7471  case NEON::BI__builtin_neon_vcvtah_s16_f16:
7472  Int = Intrinsic::aarch64_neon_fcvtas; break;
7473  case NEON::BI__builtin_neon_vcvtmh_s16_f16:
7474  Int = Intrinsic::aarch64_neon_fcvtms; break;
7475  case NEON::BI__builtin_neon_vcvtnh_s16_f16:
7476  Int = Intrinsic::aarch64_neon_fcvtns; break;
7477  case NEON::BI__builtin_neon_vcvtph_s16_f16:
7478  Int = Intrinsic::aarch64_neon_fcvtps; break;
7479  }
7480  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvt");
7481  return Builder.CreateTrunc(Ops[0], Int16Ty);
7482  }
7483  case NEON::BI__builtin_neon_vcaleh_f16:
7484  case NEON::BI__builtin_neon_vcalth_f16:
7485  case NEON::BI__builtin_neon_vcageh_f16:
7486  case NEON::BI__builtin_neon_vcagth_f16: {
7487  unsigned Int;
7488  llvm::Type* InTy = Int32Ty;
7489  llvm::Type* FTy = HalfTy;
7490  llvm::Type *Tys[2] = {InTy, FTy};
7491  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7492  switch (BuiltinID) {
7493  default: llvm_unreachable("missing builtin ID in switch!");
7494  case NEON::BI__builtin_neon_vcageh_f16:
7495  Int = Intrinsic::aarch64_neon_facge; break;
7496  case NEON::BI__builtin_neon_vcagth_f16:
7497  Int = Intrinsic::aarch64_neon_facgt; break;
7498  case NEON::BI__builtin_neon_vcaleh_f16:
7499  Int = Intrinsic::aarch64_neon_facge; std::swap(Ops[0], Ops[1]); break;
7500  case NEON::BI__builtin_neon_vcalth_f16:
7501  Int = Intrinsic::aarch64_neon_facgt; std::swap(Ops[0], Ops[1]); break;
7502  }
7503  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "facg");
7504  return Builder.CreateTrunc(Ops[0], Int16Ty);
7505  }
7506  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7507  case NEON::BI__builtin_neon_vcvth_n_u16_f16: {
7508  unsigned Int;
7509  llvm::Type* InTy = Int32Ty;
7510  llvm::Type* FTy = HalfTy;
7511  llvm::Type *Tys[2] = {InTy, FTy};
7512  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7513  switch (BuiltinID) {
7514  default: llvm_unreachable("missing builtin ID in switch!");
7515  case NEON::BI__builtin_neon_vcvth_n_s16_f16:
7516  Int = Intrinsic::aarch64_neon_vcvtfp2fxs; break;
7517  case NEON::BI__builtin_neon_vcvth_n_u16_f16:
7518  Int = Intrinsic::aarch64_neon_vcvtfp2fxu; break;
7519  }
7520  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7521  return Builder.CreateTrunc(Ops[0], Int16Ty);
7522  }
7523  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7524  case NEON::BI__builtin_neon_vcvth_n_f16_u16: {
7525  unsigned Int;
7526  llvm::Type* FTy = HalfTy;
7527  llvm::Type* InTy = Int32Ty;
7528  llvm::Type *Tys[2] = {FTy, InTy};
7529  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7530  switch (BuiltinID) {
7531  default: llvm_unreachable("missing builtin ID in switch!");
7532  case NEON::BI__builtin_neon_vcvth_n_f16_s16:
7533  Int = Intrinsic::aarch64_neon_vcvtfxs2fp;
7534  Ops[0] = Builder.CreateSExt(Ops[0], InTy, "sext");
7535  break;
7536  case NEON::BI__builtin_neon_vcvth_n_f16_u16:
7537  Int = Intrinsic::aarch64_neon_vcvtfxu2fp;
7538  Ops[0] = Builder.CreateZExt(Ops[0], InTy);
7539  break;
7540  }
7541  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "fcvth_n");
7542  }
7543  case NEON::BI__builtin_neon_vpaddd_s64: {
7544  llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
7545  Value *Vec = EmitScalarExpr(E->getArg(0));
7546  // The vector is v2f64, so make sure it's bitcast to that.
7547  Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
7548  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7549  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7550  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7551  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7552  // Pairwise addition of a v2f64 into a scalar f64.
7553  return Builder.CreateAdd(Op0, Op1, "vpaddd");
7554  }
7555  case NEON::BI__builtin_neon_vpaddd_f64: {
7556  llvm::Type *Ty =
7557  llvm::VectorType::get(DoubleTy, 2);
7558  Value *Vec = EmitScalarExpr(E->getArg(0));
7559  // The vector is v2f64, so make sure it's bitcast to that.
7560  Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
7561  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7562  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7563  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7564  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7565  // Pairwise addition of a v2f64 into a scalar f64.
7566  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7567  }
7568  case NEON::BI__builtin_neon_vpadds_f32: {
7569  llvm::Type *Ty =
7570  llvm::VectorType::get(FloatTy, 2);
7571  Value *Vec = EmitScalarExpr(E->getArg(0));
7572  // The vector is v2f32, so make sure it's bitcast to that.
7573  Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
7574  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
7575  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
7576  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
7577  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
7578  // Pairwise addition of a v2f32 into a scalar f32.
7579  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
7580  }
7581  case NEON::BI__builtin_neon_vceqzd_s64:
7582  case NEON::BI__builtin_neon_vceqzd_f64:
7583  case NEON::BI__builtin_neon_vceqzs_f32:
7584  case NEON::BI__builtin_neon_vceqzh_f16:
7585  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7586  return EmitAArch64CompareBuiltinExpr(
7587  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7588  ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
7589  case NEON::BI__builtin_neon_vcgezd_s64:
7590  case NEON::BI__builtin_neon_vcgezd_f64:
7591  case NEON::BI__builtin_neon_vcgezs_f32:
7592  case NEON::BI__builtin_neon_vcgezh_f16:
7593  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7594  return EmitAArch64CompareBuiltinExpr(
7595  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7596  ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
7597  case NEON::BI__builtin_neon_vclezd_s64:
7598  case NEON::BI__builtin_neon_vclezd_f64:
7599  case NEON::BI__builtin_neon_vclezs_f32:
7600  case NEON::BI__builtin_neon_vclezh_f16:
7601  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7602  return EmitAArch64CompareBuiltinExpr(
7603  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7604  ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
7605  case NEON::BI__builtin_neon_vcgtzd_s64:
7606  case NEON::BI__builtin_neon_vcgtzd_f64:
7607  case NEON::BI__builtin_neon_vcgtzs_f32:
7608  case NEON::BI__builtin_neon_vcgtzh_f16:
7609  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7610  return EmitAArch64CompareBuiltinExpr(
7611  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7612  ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
7613  case NEON::BI__builtin_neon_vcltzd_s64:
7614  case NEON::BI__builtin_neon_vcltzd_f64:
7615  case NEON::BI__builtin_neon_vcltzs_f32:
7616  case NEON::BI__builtin_neon_vcltzh_f16:
7617  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7618  return EmitAArch64CompareBuiltinExpr(
7619  Ops[0], ConvertType(E->getCallReturnType(getContext())),
7620  ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
7621 
7622  case NEON::BI__builtin_neon_vceqzd_u64: {
7623  Ops.push_back(EmitScalarExpr(E->getArg(0)));
7624  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7625  Ops[0] =
7626  Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
7627  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
7628  }
7629  case NEON::BI__builtin_neon_vceqd_f64:
7630  case NEON::BI__builtin_neon_vcled_f64:
7631  case NEON::BI__builtin_neon_vcltd_f64:
7632  case NEON::BI__builtin_neon_vcged_f64:
7633  case NEON::BI__builtin_neon_vcgtd_f64: {
7634  llvm::CmpInst::Predicate P;
7635  switch (BuiltinID) {
7636  default: llvm_unreachable("missing builtin ID in switch!");
7637  case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
7638  case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
7639  case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
7640  case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
7641  case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
7642  }
7643  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7644  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
7645  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
7646  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7647  return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
7648  }
7649  case NEON::BI__builtin_neon_vceqs_f32:
7650  case NEON::BI__builtin_neon_vcles_f32:
7651  case NEON::BI__builtin_neon_vclts_f32:
7652  case NEON::BI__builtin_neon_vcges_f32:
7653  case NEON::BI__builtin_neon_vcgts_f32: {
7654  llvm::CmpInst::Predicate P;
7655  switch (BuiltinID) {
7656  default: llvm_unreachable("missing builtin ID in switch!");
7657  case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
7658  case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
7659  case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
7660  case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
7661  case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
7662  }
7663  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7664  Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
7665  Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
7666  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7667  return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
7668  }
7669  case NEON::BI__builtin_neon_vceqh_f16:
7670  case NEON::BI__builtin_neon_vcleh_f16:
7671  case NEON::BI__builtin_neon_vclth_f16:
7672  case NEON::BI__builtin_neon_vcgeh_f16:
7673  case NEON::BI__builtin_neon_vcgth_f16: {
7674  llvm::CmpInst::Predicate P;
7675  switch (BuiltinID) {
7676  default: llvm_unreachable("missing builtin ID in switch!");
7677  case NEON::BI__builtin_neon_vceqh_f16: P = llvm::FCmpInst::FCMP_OEQ; break;
7678  case NEON::BI__builtin_neon_vcleh_f16: P = llvm::FCmpInst::FCMP_OLE; break;
7679  case NEON::BI__builtin_neon_vclth_f16: P = llvm::FCmpInst::FCMP_OLT; break;
7680  case NEON::BI__builtin_neon_vcgeh_f16: P = llvm::FCmpInst::FCMP_OGE; break;
7681  case NEON::BI__builtin_neon_vcgth_f16: P = llvm::FCmpInst::FCMP_OGT; break;
7682  }
7683  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7684  Ops[0] = Builder.CreateBitCast(Ops[0], HalfTy);
7685  Ops[1] = Builder.CreateBitCast(Ops[1], HalfTy);
7686  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
7687  return Builder.CreateSExt(Ops[0], Int16Ty, "vcmpd");
7688  }
7689  case NEON::BI__builtin_neon_vceqd_s64:
7690  case NEON::BI__builtin_neon_vceqd_u64:
7691  case NEON::BI__builtin_neon_vcgtd_s64:
7692  case NEON::BI__builtin_neon_vcgtd_u64:
7693  case NEON::BI__builtin_neon_vcltd_s64:
7694  case NEON::BI__builtin_neon_vcltd_u64:
7695  case NEON::BI__builtin_neon_vcged_u64:
7696  case NEON::BI__builtin_neon_vcged_s64:
7697  case NEON::BI__builtin_neon_vcled_u64:
7698  case NEON::BI__builtin_neon_vcled_s64: {
7699  llvm::CmpInst::Predicate P;
7700  switch (BuiltinID) {
7701  default: llvm_unreachable("missing builtin ID in switch!");
7702  case NEON::BI__builtin_neon_vceqd_s64:
7703  case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
7704  case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
7705  case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
7706  case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
7707  case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
7708  case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
7709  case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
7710  case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
7711  case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
7712  }
7713  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7714  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7715  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7716  Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
7717  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
7718  }
7719  case NEON::BI__builtin_neon_vtstd_s64:
7720  case NEON::BI__builtin_neon_vtstd_u64: {
7721  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7722  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
7723  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7724  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
7725  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
7726  llvm::Constant::getNullValue(Int64Ty));
7727  return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
7728  }
7729  case NEON::BI__builtin_neon_vset_lane_i8:
7730  case NEON::BI__builtin_neon_vset_lane_i16:
7731  case NEON::BI__builtin_neon_vset_lane_i32:
7732  case NEON::BI__builtin_neon_vset_lane_i64:
7733  case NEON::BI__builtin_neon_vset_lane_f32:
7734  case NEON::BI__builtin_neon_vsetq_lane_i8:
7735  case NEON::BI__builtin_neon_vsetq_lane_i16:
7736  case NEON::BI__builtin_neon_vsetq_lane_i32:
7737  case NEON::BI__builtin_neon_vsetq_lane_i64:
7738  case NEON::BI__builtin_neon_vsetq_lane_f32:
7739  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7740  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7741  case NEON::BI__builtin_neon_vset_lane_f64:
7742  // The vector type needs a cast for the v1f64 variant.
7743  Ops[1] = Builder.CreateBitCast(Ops[1],
7744  llvm::VectorType::get(DoubleTy, 1));
7745  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7746  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7747  case NEON::BI__builtin_neon_vsetq_lane_f64:
7748  // The vector type needs a cast for the v2f64 variant.
7749  Ops[1] = Builder.CreateBitCast(Ops[1],
7750  llvm::VectorType::get(DoubleTy, 2));
7751  Ops.push_back(EmitScalarExpr(E->getArg(2)));
7752  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
7753 
7754  case NEON::BI__builtin_neon_vget_lane_i8:
7755  case NEON::BI__builtin_neon_vdupb_lane_i8:
7756  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
7757  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7758  "vget_lane");
7759  case NEON::BI__builtin_neon_vgetq_lane_i8:
7760  case NEON::BI__builtin_neon_vdupb_laneq_i8:
7761  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
7762  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7763  "vgetq_lane");
7764  case NEON::BI__builtin_neon_vget_lane_i16:
7765  case NEON::BI__builtin_neon_vduph_lane_i16:
7766  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
7767  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7768  "vget_lane");
7769  case NEON::BI__builtin_neon_vgetq_lane_i16:
7770  case NEON::BI__builtin_neon_vduph_laneq_i16:
7771  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
7772  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7773  "vgetq_lane");
7774  case NEON::BI__builtin_neon_vget_lane_i32:
7775  case NEON::BI__builtin_neon_vdups_lane_i32:
7776  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
7777  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7778  "vget_lane");
7779  case NEON::BI__builtin_neon_vdups_lane_f32:
7780  Ops[0] = Builder.CreateBitCast(Ops[0],
7781  llvm::VectorType::get(FloatTy, 2));
7782  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7783  "vdups_lane");
7784  case NEON::BI__builtin_neon_vgetq_lane_i32:
7785  case NEON::BI__builtin_neon_vdups_laneq_i32:
7786  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
7787  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7788  "vgetq_lane");
7789  case NEON::BI__builtin_neon_vget_lane_i64:
7790  case NEON::BI__builtin_neon_vdupd_lane_i64:
7791  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
7792  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7793  "vget_lane");
7794  case NEON::BI__builtin_neon_vdupd_lane_f64:
7795  Ops[0] = Builder.CreateBitCast(Ops[0],
7796  llvm::VectorType::get(DoubleTy, 1));
7797  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7798  "vdupd_lane");
7799  case NEON::BI__builtin_neon_vgetq_lane_i64:
7800  case NEON::BI__builtin_neon_vdupd_laneq_i64:
7801  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
7802  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7803  "vgetq_lane");
7804  case NEON::BI__builtin_neon_vget_lane_f32:
7805  Ops[0] = Builder.CreateBitCast(Ops[0],
7806  llvm::VectorType::get(FloatTy, 2));
7807  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7808  "vget_lane");
7809  case NEON::BI__builtin_neon_vget_lane_f64:
7810  Ops[0] = Builder.CreateBitCast(Ops[0],
7811  llvm::VectorType::get(DoubleTy, 1));
7812  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7813  "vget_lane");
7814  case NEON::BI__builtin_neon_vgetq_lane_f32:
7815  case NEON::BI__builtin_neon_vdups_laneq_f32:
7816  Ops[0] = Builder.CreateBitCast(Ops[0],
7817  llvm::VectorType::get(FloatTy, 4));
7818  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7819  "vgetq_lane");
7820  case NEON::BI__builtin_neon_vgetq_lane_f64:
7821  case NEON::BI__builtin_neon_vdupd_laneq_f64:
7822  Ops[0] = Builder.CreateBitCast(Ops[0],
7823  llvm::VectorType::get(DoubleTy, 2));
7824  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
7825  "vgetq_lane");
7826  case NEON::BI__builtin_neon_vaddh_f16:
7827  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7828  return Builder.CreateFAdd(Ops[0], Ops[1], "vaddh");
7829  case NEON::BI__builtin_neon_vsubh_f16:
7830  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7831  return Builder.CreateFSub(Ops[0], Ops[1], "vsubh");
7832  case NEON::BI__builtin_neon_vmulh_f16:
7833  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7834  return Builder.CreateFMul(Ops[0], Ops[1], "vmulh");
7835  case NEON::BI__builtin_neon_vdivh_f16:
7836  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7837  return Builder.CreateFDiv(Ops[0], Ops[1], "vdivh");
7838  case NEON::BI__builtin_neon_vfmah_f16: {
7839  Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7840  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7841  return Builder.CreateCall(F,
7842  {EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)), Ops[0]});
7843  }
7844  case NEON::BI__builtin_neon_vfmsh_f16: {
7845  Function *F = CGM.getIntrinsic(Intrinsic::fma, HalfTy);
7846  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(HalfTy);
7847  Value* Sub = Builder.CreateFSub(Zero, EmitScalarExpr(E->getArg(1)), "vsubh");
7848  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
7849  return Builder.CreateCall(F, {Sub, EmitScalarExpr(E->getArg(2)), Ops[0]});
7850  }
7851  case NEON::BI__builtin_neon_vaddd_s64:
7852  case NEON::BI__builtin_neon_vaddd_u64:
7853  return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
7854  case NEON::BI__builtin_neon_vsubd_s64:
7855  case NEON::BI__builtin_neon_vsubd_u64:
7856  return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
7857  case NEON::BI__builtin_neon_vqdmlalh_s16:
7858  case NEON::BI__builtin_neon_vqdmlslh_s16: {
7859  SmallVector<Value *, 2> ProductOps;
7860  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7861  ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
7862  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7863  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7864  ProductOps, "vqdmlXl");
7865  Constant *CI = ConstantInt::get(SizeTy, 0);
7866  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7867 
7868  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
7869  ? Intrinsic::aarch64_neon_sqadd
7870  : Intrinsic::aarch64_neon_sqsub;
7871  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
7872  }
7873  case NEON::BI__builtin_neon_vqshlud_n_s64: {
7874  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7875  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7876  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
7877  Ops, "vqshlu_n");
7878  }
7879  case NEON::BI__builtin_neon_vqshld_n_u64:
7880  case NEON::BI__builtin_neon_vqshld_n_s64: {
7881  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
7882  ? Intrinsic::aarch64_neon_uqshl
7883  : Intrinsic::aarch64_neon_sqshl;
7884  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7885  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
7886  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
7887  }
7888  case NEON::BI__builtin_neon_vrshrd_n_u64:
7889  case NEON::BI__builtin_neon_vrshrd_n_s64: {
7890  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
7891  ? Intrinsic::aarch64_neon_urshl
7892  : Intrinsic::aarch64_neon_srshl;
7893  Ops.push_back(EmitScalarExpr(E->getArg(1)));
7894  int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
7895  Ops[1] = ConstantInt::get(Int64Ty, -SV);
7896  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
7897  }
7898  case NEON::BI__builtin_neon_vrsrad_n_u64:
7899  case NEON::BI__builtin_neon_vrsrad_n_s64: {
7900  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
7901  ? Intrinsic::aarch64_neon_urshl
7902  : Intrinsic::aarch64_neon_srshl;
7903  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
7904  Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
7905  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
7906  {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
7907  return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
7908  }
7909  case NEON::BI__builtin_neon_vshld_n_s64:
7910  case NEON::BI__builtin_neon_vshld_n_u64: {
7911  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7912  return Builder.CreateShl(
7913  Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
7914  }
7915  case NEON::BI__builtin_neon_vshrd_n_s64: {
7916  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7917  return Builder.CreateAShr(
7918  Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7919  Amt->getZExtValue())),
7920  "shrd_n");
7921  }
7922  case NEON::BI__builtin_neon_vshrd_n_u64: {
7923  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
7924  uint64_t ShiftAmt = Amt->getZExtValue();
7925  // Right-shifting an unsigned value by its size yields 0.
7926  if (ShiftAmt == 64)
7927  return ConstantInt::get(Int64Ty, 0);
7928  return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
7929  "shrd_n");
7930  }
7931  case NEON::BI__builtin_neon_vsrad_n_s64: {
7932  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7933  Ops[1] = Builder.CreateAShr(
7934  Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
7935  Amt->getZExtValue())),
7936  "shrd_n");
7937  return Builder.CreateAdd(Ops[0], Ops[1]);
7938  }
7939  case NEON::BI__builtin_neon_vsrad_n_u64: {
7940  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
7941  uint64_t ShiftAmt = Amt->getZExtValue();
7942  // Right-shifting an unsigned value by its size yields 0.
7943  // As Op + 0 = Op, return Ops[0] directly.
7944  if (ShiftAmt == 64)
7945  return Ops[0];
7946  Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
7947  "shrd_n");
7948  return Builder.CreateAdd(Ops[0], Ops[1]);
7949  }
7950  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
7951  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
7952  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
7953  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
7954  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7955  "lane");
7956  SmallVector<Value *, 2> ProductOps;
7957  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
7958  ProductOps.push_back(vectorWrapScalar16(Ops[2]));
7959  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
7960  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
7961  ProductOps, "vqdmlXl");
7962  Constant *CI = ConstantInt::get(SizeTy, 0);
7963  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
7964  Ops.pop_back();
7965 
7966  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
7967  BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
7968  ? Intrinsic::aarch64_neon_sqadd
7969  : Intrinsic::aarch64_neon_sqsub;
7970  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
7971  }
7972  case NEON::BI__builtin_neon_vqdmlals_s32:
7973  case NEON::BI__builtin_neon_vqdmlsls_s32: {
7974  SmallVector<Value *, 2> ProductOps;
7975  ProductOps.push_back(Ops[1]);
7976  ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
7977  Ops[1] =
7978  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7979  ProductOps, "vqdmlXl");
7980 
7981  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
7982  ? Intrinsic::aarch64_neon_sqadd
7983  : Intrinsic::aarch64_neon_sqsub;
7984  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
7985  }
7986  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
7987  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
7988  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
7989  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
7990  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
7991  "lane");
7992  SmallVector<Value *, 2> ProductOps;
7993  ProductOps.push_back(Ops[1]);
7994  ProductOps.push_back(Ops[2]);
7995  Ops[1] =
7996  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
7997  ProductOps, "vqdmlXl");
7998  Ops.pop_back();
7999 
8000  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
8001  BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
8002  ? Intrinsic::aarch64_neon_sqadd
8003  : Intrinsic::aarch64_neon_sqsub;
8004  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
8005  }
8006  case NEON::BI__builtin_neon_vduph_lane_f16: {
8007  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
8008  "vget_lane");
8009  }
8010  case NEON::BI__builtin_neon_vduph_laneq_f16: {
8011  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
8012  "vgetq_lane");
8013  }
8014  case AArch64::BI_BitScanForward:
8015  case AArch64::BI_BitScanForward64:
8016  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8017  case AArch64::BI_BitScanReverse:
8018  case AArch64::BI_BitScanReverse64:
8019  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8020  case AArch64::BI_InterlockedAnd64:
8021  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8022  case AArch64::BI_InterlockedExchange64:
8023  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8024  case AArch64::BI_InterlockedExchangeAdd64:
8025  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8026  case AArch64::BI_InterlockedExchangeSub64:
8027  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8028  case AArch64::BI_InterlockedOr64:
8029  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8030  case AArch64::BI_InterlockedXor64:
8031  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8032  case AArch64::BI_InterlockedDecrement64:
8033  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8034  case AArch64::BI_InterlockedIncrement64:
8035  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8036  case AArch64::BI_InterlockedExchangeAdd8_acq:
8037  case AArch64::BI_InterlockedExchangeAdd16_acq:
8038  case AArch64::BI_InterlockedExchangeAdd_acq:
8039  case AArch64::BI_InterlockedExchangeAdd64_acq:
8040  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_acq, E);
8041  case AArch64::BI_InterlockedExchangeAdd8_rel:
8042  case AArch64::BI_InterlockedExchangeAdd16_rel:
8043  case AArch64::BI_InterlockedExchangeAdd_rel:
8044  case AArch64::BI_InterlockedExchangeAdd64_rel:
8045  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_rel, E);
8046  case AArch64::BI_InterlockedExchangeAdd8_nf:
8047  case AArch64::BI_InterlockedExchangeAdd16_nf:
8048  case AArch64::BI_InterlockedExchangeAdd_nf:
8049  case AArch64::BI_InterlockedExchangeAdd64_nf:
8050  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd_nf, E);
8051  case AArch64::BI_InterlockedExchange8_acq:
8052  case AArch64::BI_InterlockedExchange16_acq:
8053  case AArch64::BI_InterlockedExchange_acq:
8054  case AArch64::BI_InterlockedExchange64_acq:
8055  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_acq, E);
8056  case AArch64::BI_InterlockedExchange8_rel:
8057  case AArch64::BI_InterlockedExchange16_rel:
8058  case AArch64::BI_InterlockedExchange_rel:
8059  case AArch64::BI_InterlockedExchange64_rel:
8060  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_rel, E);
8061  case AArch64::BI_InterlockedExchange8_nf:
8062  case AArch64::BI_InterlockedExchange16_nf:
8063  case AArch64::BI_InterlockedExchange_nf:
8064  case AArch64::BI_InterlockedExchange64_nf:
8065  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange_nf, E);
8066  case AArch64::BI_InterlockedCompareExchange8_acq:
8067  case AArch64::BI_InterlockedCompareExchange16_acq:
8068  case AArch64::BI_InterlockedCompareExchange_acq:
8069  case AArch64::BI_InterlockedCompareExchange64_acq:
8070  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_acq, E);
8071  case AArch64::BI_InterlockedCompareExchange8_rel:
8072  case AArch64::BI_InterlockedCompareExchange16_rel:
8073  case AArch64::BI_InterlockedCompareExchange_rel:
8074  case AArch64::BI_InterlockedCompareExchange64_rel:
8075  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_rel, E);
8076  case AArch64::BI_InterlockedCompareExchange8_nf:
8077  case AArch64::BI_InterlockedCompareExchange16_nf:
8078  case AArch64::BI_InterlockedCompareExchange_nf:
8079  case AArch64::BI_InterlockedCompareExchange64_nf:
8080  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedCompareExchange_nf, E);
8081  case AArch64::BI_InterlockedOr8_acq:
8082  case AArch64::BI_InterlockedOr16_acq:
8083  case AArch64::BI_InterlockedOr_acq:
8084  case AArch64::BI_InterlockedOr64_acq:
8085  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_acq, E);
8086  case AArch64::BI_InterlockedOr8_rel:
8087  case AArch64::BI_InterlockedOr16_rel:
8088  case AArch64::BI_InterlockedOr_rel:
8089  case AArch64::BI_InterlockedOr64_rel:
8090  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_rel, E);
8091  case AArch64::BI_InterlockedOr8_nf:
8092  case AArch64::BI_InterlockedOr16_nf:
8093  case AArch64::BI_InterlockedOr_nf:
8094  case AArch64::BI_InterlockedOr64_nf:
8095  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr_nf, E);
8096  case AArch64::BI_InterlockedXor8_acq:
8097  case AArch64::BI_InterlockedXor16_acq:
8098  case AArch64::BI_InterlockedXor_acq:
8099  case AArch64::BI_InterlockedXor64_acq:
8100  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_acq, E);
8101  case AArch64::BI_InterlockedXor8_rel:
8102  case AArch64::BI_InterlockedXor16_rel:
8103  case AArch64::BI_InterlockedXor_rel:
8104  case AArch64::BI_InterlockedXor64_rel:
8105  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_rel, E);
8106  case AArch64::BI_InterlockedXor8_nf:
8107  case AArch64::BI_InterlockedXor16_nf:
8108  case AArch64::BI_InterlockedXor_nf:
8109  case AArch64::BI_InterlockedXor64_nf:
8110  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor_nf, E);
8111  case AArch64::BI_InterlockedAnd8_acq:
8112  case AArch64::BI_InterlockedAnd16_acq:
8113  case AArch64::BI_InterlockedAnd_acq:
8114  case AArch64::BI_InterlockedAnd64_acq:
8115  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_acq, E);
8116  case AArch64::BI_InterlockedAnd8_rel:
8117  case AArch64::BI_InterlockedAnd16_rel:
8118  case AArch64::BI_InterlockedAnd_rel:
8119  case AArch64::BI_InterlockedAnd64_rel:
8120  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_rel, E);
8121  case AArch64::BI_InterlockedAnd8_nf:
8122  case AArch64::BI_InterlockedAnd16_nf:
8123  case AArch64::BI_InterlockedAnd_nf:
8124  case AArch64::BI_InterlockedAnd64_nf:
8125  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd_nf, E);
8126  case AArch64::BI_InterlockedIncrement16_acq:
8127  case AArch64::BI_InterlockedIncrement_acq:
8128  case AArch64::BI_InterlockedIncrement64_acq:
8129  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_acq, E);
8130  case AArch64::BI_InterlockedIncrement16_rel:
8131  case AArch64::BI_InterlockedIncrement_rel:
8132  case AArch64::BI_InterlockedIncrement64_rel:
8133  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_rel, E);
8134  case AArch64::BI_InterlockedIncrement16_nf:
8135  case AArch64::BI_InterlockedIncrement_nf:
8136  case AArch64::BI_InterlockedIncrement64_nf:
8137  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement_nf, E);
8138  case AArch64::BI_InterlockedDecrement16_acq:
8139  case AArch64::BI_InterlockedDecrement_acq:
8140  case AArch64::BI_InterlockedDecrement64_acq:
8141  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_acq, E);
8142  case AArch64::BI_InterlockedDecrement16_rel:
8143  case AArch64::BI_InterlockedDecrement_rel:
8144  case AArch64::BI_InterlockedDecrement64_rel:
8145  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_rel, E);
8146  case AArch64::BI_InterlockedDecrement16_nf:
8147  case AArch64::BI_InterlockedDecrement_nf:
8148  case AArch64::BI_InterlockedDecrement64_nf:
8149  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement_nf, E);
8150 
8151  case AArch64::BI_InterlockedAdd: {
8152  Value *Arg0 = EmitScalarExpr(E->getArg(0));
8153  Value *Arg1 = EmitScalarExpr(E->getArg(1));
8154  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
8155  AtomicRMWInst::Add, Arg0, Arg1,
8156  llvm::AtomicOrdering::SequentiallyConsistent);
8157  return Builder.CreateAdd(RMWI, Arg1);
8158  }
8159  }
8160 
8161  llvm::VectorType *VTy = GetNeonType(this, Type);
8162  llvm::Type *Ty = VTy;
8163  if (!Ty)
8164  return nullptr;
8165 
8166  // Not all intrinsics handled by the common case work for AArch64 yet, so only
8167  // defer to common code if it's been added to our special map.
8168  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
8169  AArch64SIMDIntrinsicsProvenSorted);
8170 
8171  if (Builtin)
8172  return EmitCommonNeonBuiltinExpr(
8173  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
8174  Builtin->NameHint, Builtin->TypeModifier, E, Ops,
8175  /*never use addresses*/ Address::invalid(), Address::invalid(), Arch);
8176 
8177  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops, Arch))
8178  return V;
8179 
8180  unsigned Int;
8181  switch (BuiltinID) {
8182  default: return nullptr;
8183  case NEON::BI__builtin_neon_vbsl_v:
8184  case NEON::BI__builtin_neon_vbslq_v: {
8185  llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
8186  Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
8187  Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
8188  Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
8189 
8190  Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
8191  Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
8192  Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
8193  return Builder.CreateBitCast(Ops[0], Ty);
8194  }
8195  case NEON::BI__builtin_neon_vfma_lane_v:
8196  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
8197  // The ARM builtins (and instructions) have the addend as the first
8198  // operand, but the 'fma' intrinsics have it last. Swap it around here.
8199  Value *Addend = Ops[0];
8200  Value *Multiplicand = Ops[1];
8201  Value *LaneSource = Ops[2];
8202  Ops[0] = Multiplicand;
8203  Ops[1] = LaneSource;
8204  Ops[2] = Addend;
8205 
8206  // Now adjust things to handle the lane access.
8207  llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
8208  llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
8209  VTy;
8210  llvm::Constant *cst = cast<Constant>(Ops[3]);
8211  Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
8212  Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
8213  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
8214 
8215  Ops.pop_back();
8216  Int = Intrinsic::fma;
8217  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
8218  }
8219  case NEON::BI__builtin_neon_vfma_laneq_v: {
8220  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
8221  // v1f64 fma should be mapped to Neon scalar f64 fma
8222  if (VTy && VTy->getElementType() == DoubleTy) {
8223  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8224  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
8225  llvm::Type *VTy = GetNeonType(this,
8226  NeonTypeFlags(NeonTypeFlags::Float64, false, true));
8227  Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
8228  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
8229  Function *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
8230  Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
8231  return Builder.CreateBitCast(Result, Ty);
8232  }
8233  Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
8234  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8235  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8236 
8237  llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
8238  VTy->getNumElements() * 2);
8239  Ops[2] = Builder.CreateBitCast(Ops[2], STy);
8240  Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
8241  cast<ConstantInt>(Ops[3]));
8242  Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
8243 
8244  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
8245  }
8246  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
8247  Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
8248  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8249  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8250 
8251  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
8252  Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
8253  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
8254  }
8255  case NEON::BI__builtin_neon_vfmah_lane_f16:
8256  case NEON::BI__builtin_neon_vfmas_lane_f32:
8257  case NEON::BI__builtin_neon_vfmah_laneq_f16:
8258  case NEON::BI__builtin_neon_vfmas_laneq_f32:
8259  case NEON::BI__builtin_neon_vfmad_lane_f64:
8260  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
8261  Ops.push_back(EmitScalarExpr(E->getArg(3)));
8262  llvm::Type *Ty = ConvertType(E->getCallReturnType(getContext()));
8263  Function *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
8264  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
8265  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
8266  }
8267  case NEON::BI__builtin_neon_vmull_v:
8268  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8269  Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
8270  if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
8271  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
8272  case NEON::BI__builtin_neon_vmax_v:
8273  case NEON::BI__builtin_neon_vmaxq_v:
8274  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8275  Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
8276  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
8277  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
8278  case NEON::BI__builtin_neon_vmaxh_f16: {
8279  Ops.push_back(EmitScalarExpr(E->getArg(1)));
8280  Int = Intrinsic::aarch64_neon_fmax;
8281  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmax");
8282  }
8283  case NEON::BI__builtin_neon_vmin_v:
8284  case NEON::BI__builtin_neon_vminq_v:
8285  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8286  Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
8287  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
8288  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
8289  case NEON::BI__builtin_neon_vminh_f16: {
8290  Ops.push_back(EmitScalarExpr(E->getArg(1)));
8291  Int = Intrinsic::aarch64_neon_fmin;
8292  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmin");
8293  }
8294  case NEON::BI__builtin_neon_vabd_v:
8295  case NEON::BI__builtin_neon_vabdq_v:
8296  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8297  Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
8298  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
8299  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
8300  case NEON::BI__builtin_neon_vpadal_v:
8301  case NEON::BI__builtin_neon_vpadalq_v: {
8302  unsigned ArgElts = VTy->getNumElements();
8303  llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
8304  unsigned BitWidth = EltTy->getBitWidth();
8305  llvm::Type *ArgTy = llvm::VectorType::get(
8306  llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
8307  llvm::Type* Tys[2] = { VTy, ArgTy };
8308  Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
8310  TmpOps.push_back(Ops[1]);
8311  Function *F = CGM.getIntrinsic(Int, Tys);
8312  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
8313  llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
8314  return Builder.CreateAdd(tmp, addend);
8315  }
8316  case NEON::BI__builtin_neon_vpmin_v:
8317  case NEON::BI__builtin_neon_vpminq_v:
8318  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8319  Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
8320  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
8321  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
8322  case NEON::BI__builtin_neon_vpmax_v:
8323  case NEON::BI__builtin_neon_vpmaxq_v:
8324  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
8325  Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
8326  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
8327  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
8328  case NEON::BI__builtin_neon_vminnm_v:
8329  case NEON::BI__builtin_neon_vminnmq_v:
8330  Int = Intrinsic::aarch64_neon_fminnm;
8331  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
8332  case NEON::BI__builtin_neon_vminnmh_f16:
8333  Ops.push_back(EmitScalarExpr(E->getArg(1)));
8334  Int = Intrinsic::aarch64_neon_fminnm;
8335  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vminnm");
8336  case NEON::BI__builtin_neon_vmaxnm_v:
8337  case NEON::BI__builtin_neon_vmaxnmq_v:
8338  Int = Intrinsic::aarch64_neon_fmaxnm;
8339  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
8340  case NEON::BI__builtin_neon_vmaxnmh_f16:
8341  Ops.push_back(EmitScalarExpr(E->getArg(1)));
8342  Int = Intrinsic::aarch64_neon_fmaxnm;
8343  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmaxnm");
8344  case NEON::BI__builtin_neon_vrecpss_f32: {
8345  Ops.push_back(EmitScalarExpr(E->getArg(1)));
8346  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
8347  Ops, "vrecps");
8348  }
8349  case NEON::BI__builtin_neon_vrecpsd_f64:
8350  Ops.push_back(EmitScalarExpr(E->getArg(1)));
8351  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
8352  Ops, "vrecps");
8353  case NEON::BI__builtin_neon_vrecpsh_f16:
8354  Ops.push_back(EmitScalarExpr(E->getArg(1)));
8355  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, HalfTy),
8356  Ops, "vrecps");
8357  case NEON::BI__builtin_neon_vqshrun_n_v:
8358  Int = Intrinsic::aarch64_neon_sqshrun;
8359  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
8360  case NEON::BI__builtin_neon_vqrshrun_n_v:
8361  Int = Intrinsic::aarch64_neon_sqrshrun;
8362  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
8363  case NEON::BI__builtin_neon_vqshrn_n_v:
8364  Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
8365  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
8366  case NEON::BI__builtin_neon_vrshrn_n_v:
8367  Int = Intrinsic::aarch64_neon_rshrn;
8368  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
8369  case NEON::BI__builtin_neon_vqrshrn_n_v:
8370  Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
8371  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
8372  case NEON::BI__builtin_neon_vrndah_f16: {
8373  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8374  Int = Intrinsic::round;
8375  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrnda");
8376  }
8377  case NEON::BI__builtin_neon_vrnda_v:
8378  case NEON::BI__builtin_neon_vrndaq_v: {
8379  Int = Intrinsic::round;
8380  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
8381  }
8382  case NEON::BI__builtin_neon_vrndih_f16: {
8383  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8384  Int = Intrinsic::nearbyint;
8385  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndi");
8386  }
8387  case NEON::BI__builtin_neon_vrndmh_f16: {
8388  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8389  Int = Intrinsic::floor;
8390  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndm");
8391  }
8392  case NEON::BI__builtin_neon_vrndm_v:
8393  case NEON::BI__builtin_neon_vrndmq_v: {
8394  Int = Intrinsic::floor;
8395  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
8396  }
8397  case NEON::BI__builtin_neon_vrndnh_f16: {
8398  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8399  Int = Intrinsic::aarch64_neon_frintn;
8400  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndn");
8401  }
8402  case NEON::BI__builtin_neon_vrndn_v:
8403  case NEON::BI__builtin_neon_vrndnq_v: {
8404  Int = Intrinsic::aarch64_neon_frintn;
8405  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
8406  }
8407  case NEON::BI__builtin_neon_vrndns_f32: {
8408  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8409  Int = Intrinsic::aarch64_neon_frintn;
8410  return EmitNeonCall(CGM.getIntrinsic(Int, FloatTy), Ops, "vrndn");
8411  }
8412  case NEON::BI__builtin_neon_vrndph_f16: {
8413  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8414  Int = Intrinsic::ceil;
8415  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndp");
8416  }
8417  case NEON::BI__builtin_neon_vrndp_v:
8418  case NEON::BI__builtin_neon_vrndpq_v: {
8419  Int = Intrinsic::ceil;
8420  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
8421  }
8422  case NEON::BI__builtin_neon_vrndxh_f16: {
8423  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8424  Int = Intrinsic::rint;
8425  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndx");
8426  }
8427  case NEON::BI__builtin_neon_vrndx_v:
8428  case NEON::BI__builtin_neon_vrndxq_v: {
8429  Int = Intrinsic::rint;
8430  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
8431  }
8432  case NEON::BI__builtin_neon_vrndh_f16: {
8433  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8434  Int = Intrinsic::trunc;
8435  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vrndz");
8436  }
8437  case NEON::BI__builtin_neon_vrnd_v:
8438  case NEON::BI__builtin_neon_vrndq_v: {
8439  Int = Intrinsic::trunc;
8440  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
8441  }
8442  case NEON::BI__builtin_neon_vcvt_f64_v:
8443  case NEON::BI__builtin_neon_vcvtq_f64_v:
8444  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8445  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
8446  return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
8447  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
8448  case NEON::BI__builtin_neon_vcvt_f64_f32: {
8449  assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
8450  "unexpected vcvt_f64_f32 builtin");
8451  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
8452  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8453 
8454  return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
8455  }
8456  case NEON::BI__builtin_neon_vcvt_f32_f64: {
8457  assert(Type.getEltType() == NeonTypeFlags::Float32 &&
8458  "unexpected vcvt_f32_f64 builtin");
8459  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
8460  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
8461 
8462  return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
8463  }
8464  case NEON::BI__builtin_neon_vcvt_s32_v:
8465  case NEON::BI__builtin_neon_vcvt_u32_v:
8466  case NEON::BI__builtin_neon_vcvt_s64_v:
8467  case NEON::BI__builtin_neon_vcvt_u64_v:
8468  case NEON::BI__builtin_neon_vcvt_s16_v:
8469  case NEON::BI__builtin_neon_vcvt_u16_v:
8470  case NEON::BI__builtin_neon_vcvtq_s32_v:
8471  case NEON::BI__builtin_neon_vcvtq_u32_v:
8472  case NEON::BI__builtin_neon_vcvtq_s64_v:
8473  case NEON::BI__builtin_neon_vcvtq_u64_v:
8474  case NEON::BI__builtin_neon_vcvtq_s16_v:
8475  case NEON::BI__builtin_neon_vcvtq_u16_v: {
8476  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
8477  if (usgn)
8478  return Builder.CreateFPToUI(Ops[0], Ty);
8479  return Builder.CreateFPToSI(Ops[0], Ty);
8480  }
8481  case NEON::BI__builtin_neon_vcvta_s16_v:
8482  case NEON::BI__builtin_neon_vcvta_u16_v:
8483  case NEON::BI__builtin_neon_vcvta_s32_v:
8484  case NEON::BI__builtin_neon_vcvtaq_s16_v:
8485  case NEON::BI__builtin_neon_vcvtaq_s32_v:
8486  case NEON::BI__builtin_neon_vcvta_u32_v:
8487  case NEON::BI__builtin_neon_vcvtaq_u16_v:
8488  case NEON::BI__builtin_neon_vcvtaq_u32_v:
8489  case NEON::BI__builtin_neon_vcvta_s64_v:
8490  case NEON::BI__builtin_neon_vcvtaq_s64_v:
8491  case NEON::BI__builtin_neon_vcvta_u64_v:
8492  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
8493  Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
8494  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8495  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
8496  }
8497  case NEON::BI__builtin_neon_vcvtm_s16_v:
8498  case NEON::BI__builtin_neon_vcvtm_s32_v:
8499  case NEON::BI__builtin_neon_vcvtmq_s16_v:
8500  case NEON::BI__builtin_neon_vcvtmq_s32_v:
8501  case NEON::BI__builtin_neon_vcvtm_u16_v:
8502  case NEON::BI__builtin_neon_vcvtm_u32_v:
8503  case NEON::BI__builtin_neon_vcvtmq_u16_v:
8504  case NEON::BI__builtin_neon_vcvtmq_u32_v:
8505  case NEON::BI__builtin_neon_vcvtm_s64_v:
8506  case NEON::BI__builtin_neon_vcvtmq_s64_v:
8507  case NEON::BI__builtin_neon_vcvtm_u64_v:
8508  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
8509  Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
8510  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8511  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
8512  }
8513  case NEON::BI__builtin_neon_vcvtn_s16_v:
8514  case NEON::BI__builtin_neon_vcvtn_s32_v:
8515  case NEON::BI__builtin_neon_vcvtnq_s16_v:
8516  case NEON::BI__builtin_neon_vcvtnq_s32_v:
8517  case NEON::BI__builtin_neon_vcvtn_u16_v:
8518  case NEON::BI__builtin_neon_vcvtn_u32_v:
8519  case NEON::BI__builtin_neon_vcvtnq_u16_v:
8520  case NEON::BI__builtin_neon_vcvtnq_u32_v:
8521  case NEON::BI__builtin_neon_vcvtn_s64_v:
8522  case NEON::BI__builtin_neon_vcvtnq_s64_v:
8523  case NEON::BI__builtin_neon_vcvtn_u64_v:
8524  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
8525  Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
8526  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8527  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
8528  }
8529  case NEON::BI__builtin_neon_vcvtp_s16_v:
8530  case NEON::BI__builtin_neon_vcvtp_s32_v:
8531  case NEON::BI__builtin_neon_vcvtpq_s16_v:
8532  case NEON::BI__builtin_neon_vcvtpq_s32_v:
8533  case NEON::BI__builtin_neon_vcvtp_u16_v:
8534  case NEON::BI__builtin_neon_vcvtp_u32_v:
8535  case NEON::BI__builtin_neon_vcvtpq_u16_v:
8536  case NEON::BI__builtin_neon_vcvtpq_u32_v:
8537  case NEON::BI__builtin_neon_vcvtp_s64_v:
8538  case NEON::BI__builtin_neon_vcvtpq_s64_v:
8539  case NEON::BI__builtin_neon_vcvtp_u64_v:
8540  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
8541  Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
8542  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
8543  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
8544  }
8545  case NEON::BI__builtin_neon_vmulx_v:
8546  case NEON::BI__builtin_neon_vmulxq_v: {
8547  Int = Intrinsic::aarch64_neon_fmulx;
8548  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
8549  }
8550  case NEON::BI__builtin_neon_vmulxh_lane_f16:
8551  case NEON::BI__builtin_neon_vmulxh_laneq_f16: {
8552  // vmulx_lane should be mapped to Neon scalar mulx after
8553  // extracting the scalar element
8554  Ops.push_back(EmitScalarExpr(E->getArg(2)));
8555  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8556  Ops.pop_back();
8557  Int = Intrinsic::aarch64_neon_fmulx;
8558  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vmulx");
8559  }
8560  case NEON::BI__builtin_neon_vmul_lane_v:
8561  case NEON::BI__builtin_neon_vmul_laneq_v: {
8562  // v1f64 vmul_lane should be mapped to Neon scalar mul lane
8563  bool Quad = false;
8564  if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
8565  Quad = true;
8566  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8567  llvm::Type *VTy = GetNeonType(this,
8568  NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
8569  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8570  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
8571  Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
8572  return Builder.CreateBitCast(Result, Ty);
8573  }
8574  case NEON::BI__builtin_neon_vnegd_s64:
8575  return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
8576  case NEON::BI__builtin_neon_vnegh_f16:
8577  return Builder.CreateFNeg(EmitScalarExpr(E->getArg(0)), "vnegh");
8578  case NEON::BI__builtin_neon_vpmaxnm_v:
8579  case NEON::BI__builtin_neon_vpmaxnmq_v: {
8580  Int = Intrinsic::aarch64_neon_fmaxnmp;
8581  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
8582  }
8583  case NEON::BI__builtin_neon_vpminnm_v:
8584  case NEON::BI__builtin_neon_vpminnmq_v: {
8585  Int = Intrinsic::aarch64_neon_fminnmp;
8586  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
8587  }
8588  case NEON::BI__builtin_neon_vsqrth_f16: {
8589  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8590  Int = Intrinsic::sqrt;
8591  return EmitNeonCall(CGM.getIntrinsic(Int, HalfTy), Ops, "vsqrt");
8592  }
8593  case NEON::BI__builtin_neon_vsqrt_v:
8594  case NEON::BI__builtin_neon_vsqrtq_v: {
8595  Int = Intrinsic::sqrt;
8596  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8597  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
8598  }
8599  case NEON::BI__builtin_neon_vrbit_v:
8600  case NEON::BI__builtin_neon_vrbitq_v: {
8601  Int = Intrinsic::aarch64_neon_rbit;
8602  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
8603  }
8604  case NEON::BI__builtin_neon_vaddv_u8:
8605  // FIXME: These are handled by the AArch64 scalar code.
8606  usgn = true;
8607  LLVM_FALLTHROUGH;
8608  case NEON::BI__builtin_neon_vaddv_s8: {
8609  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8610  Ty = Int32Ty;
8611  VTy = llvm::VectorType::get(Int8Ty, 8);
8612  llvm::Type *Tys[2] = { Ty, VTy };
8613  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8614  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8615  return Builder.CreateTrunc(Ops[0], Int8Ty);
8616  }
8617  case NEON::BI__builtin_neon_vaddv_u16:
8618  usgn = true;
8619  LLVM_FALLTHROUGH;
8620  case NEON::BI__builtin_neon_vaddv_s16: {
8621  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8622  Ty = Int32Ty;
8623  VTy = llvm::VectorType::get(Int16Ty, 4);
8624  llvm::Type *Tys[2] = { Ty, VTy };
8625  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8626  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8627  return Builder.CreateTrunc(Ops[0], Int16Ty);
8628  }
8629  case NEON::BI__builtin_neon_vaddvq_u8:
8630  usgn = true;
8631  LLVM_FALLTHROUGH;
8632  case NEON::BI__builtin_neon_vaddvq_s8: {
8633  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8634  Ty = Int32Ty;
8635  VTy = llvm::VectorType::get(Int8Ty, 16);
8636  llvm::Type *Tys[2] = { Ty, VTy };
8637  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8638  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8639  return Builder.CreateTrunc(Ops[0], Int8Ty);
8640  }
8641  case NEON::BI__builtin_neon_vaddvq_u16:
8642  usgn = true;
8643  LLVM_FALLTHROUGH;
8644  case NEON::BI__builtin_neon_vaddvq_s16: {
8645  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
8646  Ty = Int32Ty;
8647  VTy = llvm::VectorType::get(Int16Ty, 8);
8648  llvm::Type *Tys[2] = { Ty, VTy };
8649  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8650  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
8651  return Builder.CreateTrunc(Ops[0], Int16Ty);
8652  }
8653  case NEON::BI__builtin_neon_vmaxv_u8: {
8654  Int = Intrinsic::aarch64_neon_umaxv;
8655  Ty = Int32Ty;
8656  VTy = llvm::VectorType::get(Int8Ty, 8);
8657  llvm::Type *Tys[2] = { Ty, VTy };
8658  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8659  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8660  return Builder.CreateTrunc(Ops[0], Int8Ty);
8661  }
8662  case NEON::BI__builtin_neon_vmaxv_u16: {
8663  Int = Intrinsic::aarch64_neon_umaxv;
8664  Ty = Int32Ty;
8665  VTy = llvm::VectorType::get(Int16Ty, 4);
8666  llvm::Type *Tys[2] = { Ty, VTy };
8667  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8668  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8669  return Builder.CreateTrunc(Ops[0], Int16Ty);
8670  }
8671  case NEON::BI__builtin_neon_vmaxvq_u8: {
8672  Int = Intrinsic::aarch64_neon_umaxv;
8673  Ty = Int32Ty;
8674  VTy = llvm::VectorType::get(Int8Ty, 16);
8675  llvm::Type *Tys[2] = { Ty, VTy };
8676  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8677  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8678  return Builder.CreateTrunc(Ops[0], Int8Ty);
8679  }
8680  case NEON::BI__builtin_neon_vmaxvq_u16: {
8681  Int = Intrinsic::aarch64_neon_umaxv;
8682  Ty = Int32Ty;
8683  VTy = llvm::VectorType::get(Int16Ty, 8);
8684  llvm::Type *Tys[2] = { Ty, VTy };
8685  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8686  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8687  return Builder.CreateTrunc(Ops[0], Int16Ty);
8688  }
8689  case NEON::BI__builtin_neon_vmaxv_s8: {
8690  Int = Intrinsic::aarch64_neon_smaxv;
8691  Ty = Int32Ty;
8692  VTy = llvm::VectorType::get(Int8Ty, 8);
8693  llvm::Type *Tys[2] = { Ty, VTy };
8694  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8695  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8696  return Builder.CreateTrunc(Ops[0], Int8Ty);
8697  }
8698  case NEON::BI__builtin_neon_vmaxv_s16: {
8699  Int = Intrinsic::aarch64_neon_smaxv;
8700  Ty = Int32Ty;
8701  VTy = llvm::VectorType::get(Int16Ty, 4);
8702  llvm::Type *Tys[2] = { Ty, VTy };
8703  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8704  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8705  return Builder.CreateTrunc(Ops[0], Int16Ty);
8706  }
8707  case NEON::BI__builtin_neon_vmaxvq_s8: {
8708  Int = Intrinsic::aarch64_neon_smaxv;
8709  Ty = Int32Ty;
8710  VTy = llvm::VectorType::get(Int8Ty, 16);
8711  llvm::Type *Tys[2] = { Ty, VTy };
8712  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8713  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8714  return Builder.CreateTrunc(Ops[0], Int8Ty);
8715  }
8716  case NEON::BI__builtin_neon_vmaxvq_s16: {
8717  Int = Intrinsic::aarch64_neon_smaxv;
8718  Ty = Int32Ty;
8719  VTy = llvm::VectorType::get(Int16Ty, 8);
8720  llvm::Type *Tys[2] = { Ty, VTy };
8721  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8722  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8723  return Builder.CreateTrunc(Ops[0], Int16Ty);
8724  }
8725  case NEON::BI__builtin_neon_vmaxv_f16: {
8726  Int = Intrinsic::aarch64_neon_fmaxv;
8727  Ty = HalfTy;
8728  VTy = llvm::VectorType::get(HalfTy, 4);
8729  llvm::Type *Tys[2] = { Ty, VTy };
8730  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8731  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8732  return Builder.CreateTrunc(Ops[0], HalfTy);
8733  }
8734  case NEON::BI__builtin_neon_vmaxvq_f16: {
8735  Int = Intrinsic::aarch64_neon_fmaxv;
8736  Ty = HalfTy;
8737  VTy = llvm::VectorType::get(HalfTy, 8);
8738  llvm::Type *Tys[2] = { Ty, VTy };
8739  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8740  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
8741  return Builder.CreateTrunc(Ops[0], HalfTy);
8742  }
8743  case NEON::BI__builtin_neon_vminv_u8: {
8744  Int = Intrinsic::aarch64_neon_uminv;
8745  Ty = Int32Ty;
8746  VTy = llvm::VectorType::get(Int8Ty, 8);
8747  llvm::Type *Tys[2] = { Ty, VTy };
8748  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8749  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8750  return Builder.CreateTrunc(Ops[0], Int8Ty);
8751  }
8752  case NEON::BI__builtin_neon_vminv_u16: {
8753  Int = Intrinsic::aarch64_neon_uminv;
8754  Ty = Int32Ty;
8755  VTy = llvm::VectorType::get(Int16Ty, 4);
8756  llvm::Type *Tys[2] = { Ty, VTy };
8757  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8758  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8759  return Builder.CreateTrunc(Ops[0], Int16Ty);
8760  }
8761  case NEON::BI__builtin_neon_vminvq_u8: {
8762  Int = Intrinsic::aarch64_neon_uminv;
8763  Ty = Int32Ty;
8764  VTy = llvm::VectorType::get(Int8Ty, 16);
8765  llvm::Type *Tys[2] = { Ty, VTy };
8766  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8767  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8768  return Builder.CreateTrunc(Ops[0], Int8Ty);
8769  }
8770  case NEON::BI__builtin_neon_vminvq_u16: {
8771  Int = Intrinsic::aarch64_neon_uminv;
8772  Ty = Int32Ty;
8773  VTy = llvm::VectorType::get(Int16Ty, 8);
8774  llvm::Type *Tys[2] = { Ty, VTy };
8775  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8776  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8777  return Builder.CreateTrunc(Ops[0], Int16Ty);
8778  }
8779  case NEON::BI__builtin_neon_vminv_s8: {
8780  Int = Intrinsic::aarch64_neon_sminv;
8781  Ty = Int32Ty;
8782  VTy = llvm::VectorType::get(Int8Ty, 8);
8783  llvm::Type *Tys[2] = { Ty, VTy };
8784  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8785  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8786  return Builder.CreateTrunc(Ops[0], Int8Ty);
8787  }
8788  case NEON::BI__builtin_neon_vminv_s16: {
8789  Int = Intrinsic::aarch64_neon_sminv;
8790  Ty = Int32Ty;
8791  VTy = llvm::VectorType::get(Int16Ty, 4);
8792  llvm::Type *Tys[2] = { Ty, VTy };
8793  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8794  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8795  return Builder.CreateTrunc(Ops[0], Int16Ty);
8796  }
8797  case NEON::BI__builtin_neon_vminvq_s8: {
8798  Int = Intrinsic::aarch64_neon_sminv;
8799  Ty = Int32Ty;
8800  VTy = llvm::VectorType::get(Int8Ty, 16);
8801  llvm::Type *Tys[2] = { Ty, VTy };
8802  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8803  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8804  return Builder.CreateTrunc(Ops[0], Int8Ty);
8805  }
8806  case NEON::BI__builtin_neon_vminvq_s16: {
8807  Int = Intrinsic::aarch64_neon_sminv;
8808  Ty = Int32Ty;
8809  VTy = llvm::VectorType::get(Int16Ty, 8);
8810  llvm::Type *Tys[2] = { Ty, VTy };
8811  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8812  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8813  return Builder.CreateTrunc(Ops[0], Int16Ty);
8814  }
8815  case NEON::BI__builtin_neon_vminv_f16: {
8816  Int = Intrinsic::aarch64_neon_fminv;
8817  Ty = HalfTy;
8818  VTy = llvm::VectorType::get(HalfTy, 4);
8819  llvm::Type *Tys[2] = { Ty, VTy };
8820  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8821  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8822  return Builder.CreateTrunc(Ops[0], HalfTy);
8823  }
8824  case NEON::BI__builtin_neon_vminvq_f16: {
8825  Int = Intrinsic::aarch64_neon_fminv;
8826  Ty = HalfTy;
8827  VTy = llvm::VectorType::get(HalfTy, 8);
8828  llvm::Type *Tys[2] = { Ty, VTy };
8829  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8830  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
8831  return Builder.CreateTrunc(Ops[0], HalfTy);
8832  }
8833  case NEON::BI__builtin_neon_vmaxnmv_f16: {
8834  Int = Intrinsic::aarch64_neon_fmaxnmv;
8835  Ty = HalfTy;
8836  VTy = llvm::VectorType::get(HalfTy, 4);
8837  llvm::Type *Tys[2] = { Ty, VTy };
8838  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8839  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8840  return Builder.CreateTrunc(Ops[0], HalfTy);
8841  }
8842  case NEON::BI__builtin_neon_vmaxnmvq_f16: {
8843  Int = Intrinsic::aarch64_neon_fmaxnmv;
8844  Ty = HalfTy;
8845  VTy = llvm::VectorType::get(HalfTy, 8);
8846  llvm::Type *Tys[2] = { Ty, VTy };
8847  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8848  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxnmv");
8849  return Builder.CreateTrunc(Ops[0], HalfTy);
8850  }
8851  case NEON::BI__builtin_neon_vminnmv_f16: {
8852  Int = Intrinsic::aarch64_neon_fminnmv;
8853  Ty = HalfTy;
8854  VTy = llvm::VectorType::get(HalfTy, 4);
8855  llvm::Type *Tys[2] = { Ty, VTy };
8856  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8857  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8858  return Builder.CreateTrunc(Ops[0], HalfTy);
8859  }
8860  case NEON::BI__builtin_neon_vminnmvq_f16: {
8861  Int = Intrinsic::aarch64_neon_fminnmv;
8862  Ty = HalfTy;
8863  VTy = llvm::VectorType::get(HalfTy, 8);
8864  llvm::Type *Tys[2] = { Ty, VTy };
8865  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8866  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminnmv");
8867  return Builder.CreateTrunc(Ops[0], HalfTy);
8868  }
8869  case NEON::BI__builtin_neon_vmul_n_f64: {
8870  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
8871  Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
8872  return Builder.CreateFMul(Ops[0], RHS);
8873  }
8874  case NEON::BI__builtin_neon_vaddlv_u8: {
8875  Int = Intrinsic::aarch64_neon_uaddlv;
8876  Ty = Int32Ty;
8877  VTy = llvm::VectorType::get(Int8Ty, 8);
8878  llvm::Type *Tys[2] = { Ty, VTy };
8879  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8880  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8881  return Builder.CreateTrunc(Ops[0], Int16Ty);
8882  }
8883  case NEON::BI__builtin_neon_vaddlv_u16: {
8884  Int = Intrinsic::aarch64_neon_uaddlv;
8885  Ty = Int32Ty;
8886  VTy = llvm::VectorType::get(Int16Ty, 4);
8887  llvm::Type *Tys[2] = { Ty, VTy };
8888  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8889  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8890  }
8891  case NEON::BI__builtin_neon_vaddlvq_u8: {
8892  Int = Intrinsic::aarch64_neon_uaddlv;
8893  Ty = Int32Ty;
8894  VTy = llvm::VectorType::get(Int8Ty, 16);
8895  llvm::Type *Tys[2] = { Ty, VTy };
8896  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8897  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8898  return Builder.CreateTrunc(Ops[0], Int16Ty);
8899  }
8900  case NEON::BI__builtin_neon_vaddlvq_u16: {
8901  Int = Intrinsic::aarch64_neon_uaddlv;
8902  Ty = Int32Ty;
8903  VTy = llvm::VectorType::get(Int16Ty, 8);
8904  llvm::Type *Tys[2] = { Ty, VTy };
8905  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8906  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8907  }
8908  case NEON::BI__builtin_neon_vaddlv_s8: {
8909  Int = Intrinsic::aarch64_neon_saddlv;
8910  Ty = Int32Ty;
8911  VTy = llvm::VectorType::get(Int8Ty, 8);
8912  llvm::Type *Tys[2] = { Ty, VTy };
8913  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8914  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8915  return Builder.CreateTrunc(Ops[0], Int16Ty);
8916  }
8917  case NEON::BI__builtin_neon_vaddlv_s16: {
8918  Int = Intrinsic::aarch64_neon_saddlv;
8919  Ty = Int32Ty;
8920  VTy = llvm::VectorType::get(Int16Ty, 4);
8921  llvm::Type *Tys[2] = { Ty, VTy };
8922  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8923  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8924  }
8925  case NEON::BI__builtin_neon_vaddlvq_s8: {
8926  Int = Intrinsic::aarch64_neon_saddlv;
8927  Ty = Int32Ty;
8928  VTy = llvm::VectorType::get(Int8Ty, 16);
8929  llvm::Type *Tys[2] = { Ty, VTy };
8930  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8931  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8932  return Builder.CreateTrunc(Ops[0], Int16Ty);
8933  }
8934  case NEON::BI__builtin_neon_vaddlvq_s16: {
8935  Int = Intrinsic::aarch64_neon_saddlv;
8936  Ty = Int32Ty;
8937  VTy = llvm::VectorType::get(Int16Ty, 8);
8938  llvm::Type *Tys[2] = { Ty, VTy };
8939  Ops.push_back(EmitScalarExpr(E->getArg(0)));
8940  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
8941  }
8942  case NEON::BI__builtin_neon_vsri_n_v:
8943  case NEON::BI__builtin_neon_vsriq_n_v: {
8944  Int = Intrinsic::aarch64_neon_vsri;
8945  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8946  return EmitNeonCall(Intrin, Ops, "vsri_n");
8947  }
8948  case NEON::BI__builtin_neon_vsli_n_v:
8949  case NEON::BI__builtin_neon_vsliq_n_v: {
8950  Int = Intrinsic::aarch64_neon_vsli;
8951  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
8952  return EmitNeonCall(Intrin, Ops, "vsli_n");
8953  }
8954  case NEON::BI__builtin_neon_vsra_n_v:
8955  case NEON::BI__builtin_neon_vsraq_n_v:
8956  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8957  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
8958  return Builder.CreateAdd(Ops[0], Ops[1]);
8959  case NEON::BI__builtin_neon_vrsra_n_v:
8960  case NEON::BI__builtin_neon_vrsraq_n_v: {
8961  Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
8963  TmpOps.push_back(Ops[1]);
8964  TmpOps.push_back(Ops[2]);
8965  Function* F = CGM.getIntrinsic(Int, Ty);
8966  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
8967  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
8968  return Builder.CreateAdd(Ops[0], tmp);
8969  }
8970  case NEON::BI__builtin_neon_vld1_v:
8971  case NEON::BI__builtin_neon_vld1q_v: {
8972  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8973  auto Alignment = CharUnits::fromQuantity(
8974  BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
8975  return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
8976  }
8977  case NEON::BI__builtin_neon_vst1_v:
8978  case NEON::BI__builtin_neon_vst1q_v:
8979  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
8980  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
8981  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
8982  case NEON::BI__builtin_neon_vld1_lane_v:
8983  case NEON::BI__builtin_neon_vld1q_lane_v: {
8984  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
8985  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8986  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8987  auto Alignment = CharUnits::fromQuantity(
8988  BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
8989  Ops[0] =
8990  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
8991  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
8992  }
8993  case NEON::BI__builtin_neon_vld1_dup_v:
8994  case NEON::BI__builtin_neon_vld1q_dup_v: {
8995  Value *V = UndefValue::get(Ty);
8996  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
8997  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
8998  auto Alignment = CharUnits::fromQuantity(
8999  BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
9000  Ops[0] =
9001  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
9002  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
9003  Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
9004  return EmitNeonSplat(Ops[0], CI);
9005  }
9006  case NEON::BI__builtin_neon_vst1_lane_v:
9007  case NEON::BI__builtin_neon_vst1q_lane_v:
9008  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9009  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
9010  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
9011  return Builder.CreateDefaultAlignedStore(Ops[1],
9012  Builder.CreateBitCast(Ops[0], Ty));
9013  case NEON::BI__builtin_neon_vld2_v:
9014  case NEON::BI__builtin_neon_vld2q_v: {
9015  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
9016  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
9017  llvm::Type *Tys[2] = { VTy, PTy };
9018  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
9019  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
9020  Ops[0] = Builder.CreateBitCast(Ops[0],
9021  llvm::PointerType::getUnqual(Ops[1]->getType()));
9022  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9023  }
9024  case NEON::BI__builtin_neon_vld3_v:
9025  case NEON::BI__builtin_neon_vld3q_v: {
9026  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
9027  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
9028  llvm::Type *Tys[2] = { VTy, PTy };
9029  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
9030  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
9031  Ops[0] = Builder.CreateBitCast(Ops[0],
9032  llvm::PointerType::getUnqual(Ops[1]->getType()));
9033  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9034  }
9035  case NEON::BI__builtin_neon_vld4_v:
9036  case NEON::BI__builtin_neon_vld4q_v: {
9037  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
9038  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
9039  llvm::Type *Tys[2] = { VTy, PTy };
9040  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
9041  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
9042  Ops[0] = Builder.CreateBitCast(Ops[0],
9043  llvm::PointerType::getUnqual(Ops[1]->getType()));
9044  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9045  }
9046  case NEON::BI__builtin_neon_vld2_dup_v:
9047  case NEON::BI__builtin_neon_vld2q_dup_v: {
9048  llvm::Type *PTy =
9049  llvm::PointerType::getUnqual(VTy->getElementType());
9050  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
9051  llvm::Type *Tys[2] = { VTy, PTy };
9052  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
9053  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
9054  Ops[0] = Builder.CreateBitCast(Ops[0],
9055  llvm::PointerType::getUnqual(Ops[1]->getType()));
9056  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9057  }
9058  case NEON::BI__builtin_neon_vld3_dup_v:
9059  case NEON::BI__builtin_neon_vld3q_dup_v: {
9060  llvm::Type *PTy =
9061  llvm::PointerType::getUnqual(VTy->getElementType());
9062  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
9063  llvm::Type *Tys[2] = { VTy, PTy };
9064  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
9065  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
9066  Ops[0] = Builder.CreateBitCast(Ops[0],
9067  llvm::PointerType::getUnqual(Ops[1]->getType()));
9068  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9069  }
9070  case NEON::BI__builtin_neon_vld4_dup_v:
9071  case NEON::BI__builtin_neon_vld4q_dup_v: {
9072  llvm::Type *PTy =
9073  llvm::PointerType::getUnqual(VTy->getElementType());
9074  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
9075  llvm::Type *Tys[2] = { VTy, PTy };
9076  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
9077  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
9078  Ops[0] = Builder.CreateBitCast(Ops[0],
9079  llvm::PointerType::getUnqual(Ops[1]->getType()));
9080  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9081  }
9082  case NEON::BI__builtin_neon_vld2_lane_v:
9083  case NEON::BI__builtin_neon_vld2q_lane_v: {
9084  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
9085  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
9086  Ops.push_back(Ops[1]);
9087  Ops.erase(Ops.begin()+1);
9088  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9089  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
9090  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
9091  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
9092  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
9093  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9094  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9095  }
9096  case NEON::BI__builtin_neon_vld3_lane_v:
9097  case NEON::BI__builtin_neon_vld3q_lane_v: {
9098  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
9099  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
9100  Ops.push_back(Ops[1]);
9101  Ops.erase(Ops.begin()+1);
9102  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9103  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
9104  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
9105  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
9106  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
9107  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
9108  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9109  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9110  }
9111  case NEON::BI__builtin_neon_vld4_lane_v:
9112  case NEON::BI__builtin_neon_vld4q_lane_v: {
9113  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
9114  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
9115  Ops.push_back(Ops[1]);
9116  Ops.erase(Ops.begin()+1);
9117  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9118  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
9119  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
9120  Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
9121  Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
9122  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
9123  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
9124  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
9125  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
9126  }
9127  case NEON::BI__builtin_neon_vst2_v:
9128  case NEON::BI__builtin_neon_vst2q_v: {
9129  Ops.push_back(Ops[0]);
9130  Ops.erase(Ops.begin());
9131  llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
9132  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
9133  Ops, "");
9134  }
9135  case NEON::BI__builtin_neon_vst2_lane_v:
9136  case NEON::BI__builtin_neon_vst2q_lane_v: {
9137  Ops.push_back(Ops[0]);
9138  Ops.erase(Ops.begin());
9139  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
9140  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
9141  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
9142  Ops, "");
9143  }
9144  case NEON::BI__builtin_neon_vst3_v:
9145  case NEON::BI__builtin_neon_vst3q_v: {
9146  Ops.push_back(Ops[0]);
9147  Ops.erase(Ops.begin());
9148  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
9149  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
9150  Ops, "");
9151  }
9152  case NEON::BI__builtin_neon_vst3_lane_v:
9153  case NEON::BI__builtin_neon_vst3q_lane_v: {
9154  Ops.push_back(Ops[0]);
9155  Ops.erase(Ops.begin());
9156  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
9157  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
9158  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
9159  Ops, "");
9160  }
9161  case NEON::BI__builtin_neon_vst4_v:
9162  case NEON::BI__builtin_neon_vst4q_v: {
9163  Ops.push_back(Ops[0]);
9164  Ops.erase(Ops.begin());
9165  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
9166  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
9167  Ops, "");
9168  }
9169  case NEON::BI__builtin_neon_vst4_lane_v:
9170  case NEON::BI__builtin_neon_vst4q_lane_v: {
9171  Ops.push_back(Ops[0]);
9172  Ops.erase(Ops.begin());
9173  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
9174  llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
9175  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
9176  Ops, "");
9177  }
9178  case NEON::BI__builtin_neon_vtrn_v:
9179  case NEON::BI__builtin_neon_vtrnq_v: {
9180  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
9181  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9182  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
9183  Value *SV = nullptr;
9184 
9185  for (unsigned vi = 0; vi != 2; ++vi) {
9186  SmallVector<uint32_t, 16> Indices;
9187  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
9188  Indices.push_back(i+vi);
9189  Indices.push_back(i+e+vi);
9190  }
9191  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
9192  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
9193  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
9194  }
9195  return SV;
9196  }
9197  case NEON::BI__builtin_neon_vuzp_v:
9198  case NEON::BI__builtin_neon_vuzpq_v: {
9199  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
9200  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9201  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
9202  Value *SV = nullptr;
9203 
9204  for (unsigned vi = 0; vi != 2; ++vi) {
9205  SmallVector<uint32_t, 16> Indices;
9206  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
9207  Indices.push_back(2*i+vi);
9208 
9209  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
9210  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
9211  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
9212  }
9213  return SV;
9214  }
9215  case NEON::BI__builtin_neon_vzip_v:
9216  case NEON::BI__builtin_neon_vzipq_v: {
9217  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
9218  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
9219  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
9220  Value *SV = nullptr;
9221 
9222  for (unsigned vi = 0; vi != 2; ++vi) {
9223  SmallVector<uint32_t, 16> Indices;
9224  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
9225  Indices.push_back((i + vi*e) >> 1);
9226  Indices.push_back(((i + vi*e) >> 1)+e);
9227  }
9228  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
9229  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
9230  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
9231  }
9232  return SV;
9233  }
9234  case NEON::BI__builtin_neon_vqtbl1q_v: {
9235  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
9236  Ops, "vtbl1");
9237  }
9238  case NEON::BI__builtin_neon_vqtbl2q_v: {
9239  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
9240  Ops, "vtbl2");
9241  }
9242  case NEON::BI__builtin_neon_vqtbl3q_v: {
9243  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
9244  Ops, "vtbl3");
9245  }
9246  case NEON::BI__builtin_neon_vqtbl4q_v: {
9247  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
9248  Ops, "vtbl4");
9249  }
9250  case NEON::BI__builtin_neon_vqtbx1q_v: {
9251  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
9252  Ops, "vtbx1");
9253  }
9254  case NEON::BI__builtin_neon_vqtbx2q_v: {
9255  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
9256  Ops, "vtbx2");
9257  }
9258  case NEON::BI__builtin_neon_vqtbx3q_v: {
9259  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
9260  Ops, "vtbx3");
9261  }
9262  case NEON::BI__builtin_neon_vqtbx4q_v: {
9263  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
9264  Ops, "vtbx4");
9265  }
9266  case NEON::BI__builtin_neon_vsqadd_v:
9267  case NEON::BI__builtin_neon_vsqaddq_v: {
9268  Int = Intrinsic::aarch64_neon_usqadd;
9269  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
9270  }
9271  case NEON::BI__builtin_neon_vuqadd_v:
9272  case NEON::BI__builtin_neon_vuqaddq_v: {
9273  Int = Intrinsic::aarch64_neon_suqadd;
9274  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
9275  }
9276  }
9277 }
9278 
9281  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
9282  "Not a power-of-two sized vector!");
9283  bool AllConstants = true;
9284  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
9285  AllConstants &= isa<Constant>(Ops[i]);
9286 
9287  // If this is a constant vector, create a ConstantVector.
9288  if (AllConstants) {
9290  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9291  CstOps.push_back(cast<Constant>(Ops[i]));
9292  return llvm::ConstantVector::get(CstOps);
9293  }
9294 
9295  // Otherwise, insertelement the values to build the vector.
9296  Value *Result =
9297  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
9298 
9299  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
9300  Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
9301 
9302  return Result;
9303 }
9304 
9305 // Convert the mask from an integer type to a vector of i1.
9307  unsigned NumElts) {
9308 
9309  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9310  cast<IntegerType>(Mask->getType())->getBitWidth());
9311  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
9312 
9313  // If we have less than 8 elements, then the starting mask was an i8 and
9314  // we need to extract down to the right number of elements.
9315  if (NumElts < 8) {
9316  uint32_t Indices[4];
9317  for (unsigned i = 0; i != NumElts; ++i)
9318  Indices[i] = i;
9319  MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
9320  makeArrayRef(Indices, NumElts),
9321  "extract");
9322  }
9323  return MaskVec;
9324 }
9325 
9327  ArrayRef<Value *> Ops,
9328  unsigned Align) {
9329  // Cast the pointer to right type.
9330  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9331  llvm::PointerType::getUnqual(Ops[1]->getType()));
9332 
9333  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9334  Ops[1]->getType()->getVectorNumElements());
9335 
9336  return CGF.Builder.CreateMaskedStore(Ops[1], Ptr, Align, MaskVec);
9337 }
9338 
9340  ArrayRef<Value *> Ops, unsigned Align) {
9341  // Cast the pointer to right type.
9342  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9343  llvm::PointerType::getUnqual(Ops[1]->getType()));
9344 
9345  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9346  Ops[1]->getType()->getVectorNumElements());
9347 
9348  return CGF.Builder.CreateMaskedLoad(Ptr, Align, MaskVec, Ops[1]);
9349 }
9350 
9352  ArrayRef<Value *> Ops) {
9353  llvm::Type *ResultTy = Ops[1]->getType();
9354  llvm::Type *PtrTy = ResultTy->getVectorElementType();
9355 
9356  // Cast the pointer to element type.
9357  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9358  llvm::PointerType::getUnqual(PtrTy));
9359 
9360  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9361  ResultTy->getVectorNumElements());
9362 
9363  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_expandload,
9364  ResultTy);
9365  return CGF.Builder.CreateCall(F, { Ptr, MaskVec, Ops[1] });
9366 }
9367 
9369  ArrayRef<Value *> Ops,
9370  bool IsCompress) {
9371  llvm::Type *ResultTy = Ops[1]->getType();
9372 
9373  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9374  ResultTy->getVectorNumElements());
9375 
9376  Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
9377  : Intrinsic::x86_avx512_mask_expand;
9378  llvm::Function *F = CGF.CGM.getIntrinsic(IID, ResultTy);
9379  return CGF.Builder.CreateCall(F, { Ops[0], Ops[1], MaskVec });
9380 }
9381 
9383  ArrayRef<Value *> Ops) {
9384  llvm::Type *ResultTy = Ops[1]->getType();
9385  llvm::Type *PtrTy = ResultTy->getVectorElementType();
9386 
9387  // Cast the pointer to element type.
9388  Value *Ptr = CGF.Builder.CreateBitCast(Ops[0],
9389  llvm::PointerType::getUnqual(PtrTy));
9390 
9391  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
9392  ResultTy->getVectorNumElements());
9393 
9394  llvm::Function *F = CGF.CGM.getIntrinsic(Intrinsic::masked_compressstore,
9395  ResultTy);
9396  return CGF.Builder.CreateCall(F, { Ops[1], Ptr, MaskVec });
9397 }
9398 
9399 static Value *EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc,
9400  ArrayRef<Value *> Ops,
9401  bool InvertLHS = false) {
9402  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
9403  Value *LHS = getMaskVecValue(CGF, Ops[0], NumElts);
9404  Value *RHS = getMaskVecValue(CGF, Ops[1], NumElts);
9405 
9406  if (InvertLHS)
9407  LHS = CGF.Builder.CreateNot(LHS);
9408 
9409  return CGF.Builder.CreateBitCast(CGF.Builder.CreateBinOp(Opc, LHS, RHS),
9410  Ops[0]->getType());
9411 }
9412 
9414  Value *Amt, bool IsRight) {
9415  llvm::Type *Ty = Op0->getType();
9416 
9417  // Amount may be scalar immediate, in which case create a splat vector.
9418  // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
9419  // we only care about the lowest log2 bits anyway.
9420  if (Amt->getType() != Ty) {
9421  unsigned NumElts = Ty->getVectorNumElements();
9422  Amt = CGF.Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
9423  Amt = CGF.Builder.CreateVectorSplat(NumElts, Amt);
9424  }
9425 
9426  unsigned IID = IsRight ? Intrinsic::fshr : Intrinsic::fshl;
9427  Function *F = CGF.CGM.getIntrinsic(IID, Ty);
9428  return CGF.Builder.CreateCall(F, {Op0, Op1, Amt});
9429 }
9430 
9432  bool IsSigned) {
9433  Value *Op0 = Ops[0];
9434  Value *Op1 = Ops[1];
9435  llvm::Type *Ty = Op0->getType();
9436  uint64_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
9437 
9438  CmpInst::Predicate Pred;
9439  switch (Imm) {
9440  case 0x0:
9441  Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
9442  break;
9443  case 0x1:
9444  Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
9445  break;
9446  case 0x2:
9447  Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
9448  break;
9449  case 0x3:
9450  Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
9451  break;
9452  case 0x4:
9453  Pred = ICmpInst::ICMP_EQ;
9454  break;
9455  case 0x5:
9456  Pred = ICmpInst::ICMP_NE;
9457  break;
9458  case 0x6:
9459  return llvm::Constant::getNullValue(Ty); // FALSE
9460  case 0x7:
9461  return llvm::Constant::getAllOnesValue(Ty); // TRUE
9462  default:
9463  llvm_unreachable("Unexpected XOP vpcom/vpcomu predicate");
9464  }
9465 
9466  Value *Cmp = CGF.Builder.CreateICmp(Pred, Op0, Op1);
9467  Value *Res = CGF.Builder.CreateSExt(Cmp, Ty);
9468  return Res;
9469 }
9470 
9472  Value *Mask, Value *Op0, Value *Op1) {
9473 
9474  // If the mask is all ones just return first argument.
9475  if (const auto *C = dyn_cast<Constant>(Mask))
9476  if (C->isAllOnesValue())
9477  return Op0;
9478 
9479  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
9480 
9481  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9482 }
9483 
9485  Value *Mask, Value *Op0, Value *Op1) {
9486  // If the mask is all ones just return first argument.
9487  if (const auto *C = dyn_cast<Constant>(Mask))
9488  if (C->isAllOnesValue())
9489  return Op0;
9490 
9491  llvm::VectorType *MaskTy =
9492  llvm::VectorType::get(CGF.Builder.getInt1Ty(),
9493  Mask->getType()->getIntegerBitWidth());
9494  Mask = CGF.Builder.CreateBitCast(Mask, MaskTy);
9495  Mask = CGF.Builder.CreateExtractElement(Mask, (uint64_t)0);
9496  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
9497 }
9498 
9500  unsigned NumElts, Value *MaskIn) {
9501  if (MaskIn) {
9502  const auto *C = dyn_cast<Constant>(MaskIn);
9503  if (!C || !C->isAllOnesValue())
9504  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, MaskIn, NumElts));
9505  }
9506 
9507  if (NumElts < 8) {
9508  uint32_t Indices[8];
9509  for (unsigned i = 0; i != NumElts; ++i)
9510  Indices[i] = i;
9511  for (unsigned i = NumElts; i != 8; ++i)
9512  Indices[i] = i % NumElts + NumElts;
9513  Cmp = CGF.Builder.CreateShuffleVector(
9514  Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
9515  }
9516 
9517  return CGF.Builder.CreateBitCast(Cmp,
9518  IntegerType::get(CGF.getLLVMContext(),
9519  std::max(NumElts, 8U)));
9520 }
9521 
9522 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
9523  bool Signed, ArrayRef<Value *> Ops) {
9524  assert((Ops.size() == 2 || Ops.size() == 4) &&
9525  "Unexpected number of arguments");
9526  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
9527  Value *Cmp;
9528 
9529  if (CC == 3) {
9530  Cmp = Constant::getNullValue(
9531  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9532  } else if (CC == 7) {
9533  Cmp = Constant::getAllOnesValue(
9534  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
9535  } else {
9536  ICmpInst::Predicate Pred;
9537  switch (CC) {
9538  default: llvm_unreachable("Unknown condition code");
9539  case 0: Pred = ICmpInst::ICMP_EQ; break;
9540  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
9541  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
9542  case 4: Pred = ICmpInst::ICMP_NE; break;
9543  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
9544  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
9545  }
9546  Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9547  }
9548 
9549  Value *MaskIn = nullptr;
9550  if (Ops.size() == 4)
9551  MaskIn = Ops[3];
9552 
9553  return EmitX86MaskedCompareResult(CGF, Cmp, NumElts, MaskIn);
9554 }
9555 
9557  Value *Zero = Constant::getNullValue(In->getType());
9558  return EmitX86MaskedCompare(CGF, 1, true, { In, Zero });
9559 }
9560 
9562  ArrayRef<Value *> Ops, bool IsSigned) {
9563  unsigned Rnd = cast<llvm::ConstantInt>(Ops[3])->getZExtValue();
9564  llvm::Type *Ty = Ops[1]->getType();
9565 
9566  Value *Res;
9567  if (Rnd != 4) {
9568  Intrinsic::ID IID = IsSigned ? Intrinsic::x86_avx512_sitofp_round
9569  : Intrinsic::x86_avx512_uitofp_round;
9570  Function *F = CGF.CGM.getIntrinsic(IID, { Ty, Ops[0]->getType() });
9571  Res = CGF.Builder.CreateCall(F, { Ops[0], Ops[3] });
9572  } else {
9573  Res = IsSigned ? CGF.Builder.CreateSIToFP(Ops[0], Ty)
9574  : CGF.Builder.CreateUIToFP(Ops[0], Ty);
9575  }
9576 
9577  return EmitX86Select(CGF, Ops[2], Res, Ops[1]);
9578 }
9579 
9581 
9582  llvm::Type *Ty = Ops[0]->getType();
9583  Value *Zero = llvm::Constant::getNullValue(Ty);
9584  Value *Sub = CGF.Builder.CreateSub(Zero, Ops[0]);
9585  Value *Cmp = CGF.Builder.CreateICmp(ICmpInst::ICMP_SGT, Ops[0], Zero);
9586  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Sub);
9587  return Res;
9588 }
9589 
9590 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
9591  ArrayRef<Value *> Ops) {
9592  Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
9593  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
9594 
9595  assert(Ops.size() == 2);
9596  return Res;
9597 }
9598 
9599 // Lowers X86 FMA intrinsics to IR.
9601  unsigned BuiltinID, bool IsAddSub) {
9602 
9603  bool Subtract = false;
9604  Intrinsic::ID IID = Intrinsic::not_intrinsic;
9605  switch (BuiltinID) {
9606  default: break;
9607  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9608  Subtract = true;
9609  LLVM_FALLTHROUGH;
9610  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9611  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9612  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9613  IID = llvm::Intrinsic::x86_avx512_vfmadd_ps_512; break;
9614  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9615  Subtract = true;
9616  LLVM_FALLTHROUGH;
9617  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9618  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9619  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9620  IID = llvm::Intrinsic::x86_avx512_vfmadd_pd_512; break;
9621  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9622  Subtract = true;
9623  LLVM_FALLTHROUGH;
9624  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9625  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9626  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9627  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_ps_512;
9628  break;
9629  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9630  Subtract = true;
9631  LLVM_FALLTHROUGH;
9632  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9633  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9634  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9635  IID = llvm::Intrinsic::x86_avx512_vfmaddsub_pd_512;
9636  break;
9637  }
9638 
9639  Value *A = Ops[0];
9640  Value *B = Ops[1];
9641  Value *C = Ops[2];
9642 
9643  if (Subtract)
9644  C = CGF.Builder.CreateFNeg(C);
9645 
9646  Value *Res;
9647 
9648  // Only handle in case of _MM_FROUND_CUR_DIRECTION/4 (no rounding).
9649  if (IID != Intrinsic::not_intrinsic &&
9650  cast<llvm::ConstantInt>(Ops.back())->getZExtValue() != (uint64_t)4) {
9651  Function *Intr = CGF.CGM.getIntrinsic(IID);
9652  Res = CGF.Builder.CreateCall(Intr, {A, B, C, Ops.back() });
9653  } else {
9654  llvm::Type *Ty = A->getType();
9655  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ty);
9656  Res = CGF.Builder.CreateCall(FMA, {A, B, C} );
9657 
9658  if (IsAddSub) {
9659  // Negate even elts in C using a mask.
9660  unsigned NumElts = Ty->getVectorNumElements();
9661  SmallVector<uint32_t, 16> Indices(NumElts);
9662  for (unsigned i = 0; i != NumElts; ++i)
9663  Indices[i] = i + (i % 2) * NumElts;
9664 
9665  Value *NegC = CGF.Builder.CreateFNeg(C);
9666  Value *FMSub = CGF.Builder.CreateCall(FMA, {A, B, NegC} );
9667  Res = CGF.Builder.CreateShuffleVector(FMSub, Res, Indices);
9668  }
9669  }
9670 
9671  // Handle any required masking.
9672  Value *MaskFalseVal = nullptr;
9673  switch (BuiltinID) {
9674  case clang::X86::BI__builtin_ia32_vfmaddps512_mask:
9675  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask:
9676  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask:
9677  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask:
9678  MaskFalseVal = Ops[0];
9679  break;
9680  case clang::X86::BI__builtin_ia32_vfmaddps512_maskz:
9681  case clang::X86::BI__builtin_ia32_vfmaddpd512_maskz:
9682  case clang::X86::BI__builtin_ia32_vfmaddsubps512_maskz:
9683  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
9684  MaskFalseVal = Constant::getNullValue(Ops[0]->getType());
9685  break;
9686  case clang::X86::BI__builtin_ia32_vfmsubps512_mask3:
9687  case clang::X86::BI__builtin_ia32_vfmaddps512_mask3:
9688  case clang::X86::BI__builtin_ia32_vfmsubpd512_mask3:
9689  case clang::X86::BI__builtin_ia32_vfmaddpd512_mask3:
9690  case clang::X86::BI__builtin_ia32_vfmsubaddps512_mask3:
9691  case clang::X86::BI__builtin_ia32_vfmaddsubps512_mask3:
9692  case clang::X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
9693  case clang::X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
9694  MaskFalseVal = Ops[2];
9695  break;
9696  }
9697 
9698  if (MaskFalseVal)
9699  return EmitX86Select(CGF, Ops[3], Res, MaskFalseVal);
9700 
9701  return Res;
9702 }
9703 
9704 static Value *
9706  Value *Upper, bool ZeroMask = false, unsigned PTIdx = 0,
9707  bool NegAcc = false) {
9708  unsigned Rnd = 4;
9709  if (Ops.size() > 4)
9710  Rnd = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
9711 
9712  if (NegAcc)
9713  Ops[2] = CGF.Builder.CreateFNeg(Ops[2]);
9714 
9715  Ops[0] = CGF.Builder.CreateExtractElement(Ops[0], (uint64_t)0);
9716  Ops[1] = CGF.Builder.CreateExtractElement(Ops[1], (uint64_t)0);
9717  Ops[2] = CGF.Builder.CreateExtractElement(Ops[2], (uint64_t)0);
9718  Value *Res;
9719  if (Rnd != 4) {
9720  Intrinsic::ID IID = Ops[0]->getType()->getPrimitiveSizeInBits() == 32 ?
9721  Intrinsic::x86_avx512_vfmadd_f32 :
9722  Intrinsic::x86_avx512_vfmadd_f64;
9723  Res = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9724  {Ops[0], Ops[1], Ops[2], Ops[4]});
9725  } else {
9726  Function *FMA = CGF.CGM.getIntrinsic(Intrinsic::fma, Ops[0]->getType());
9727  Res = CGF.Builder.CreateCall(FMA, Ops.slice(0, 3));
9728  }
9729  // If we have more than 3 arguments, we need to do masking.
9730  if (Ops.size() > 3) {
9731  Value *PassThru = ZeroMask ? Constant::getNullValue(Res->getType())
9732  : Ops[PTIdx];
9733 
9734  // If we negated the accumulator and the its the PassThru value we need to
9735  // bypass the negate. Conveniently Upper should be the same thing in this
9736  // case.
9737  if (NegAcc && PTIdx == 2)
9738  PassThru = CGF.Builder.CreateExtractElement(Upper, (uint64_t)0);
9739 
9740  Res = EmitX86ScalarSelect(CGF, Ops[3], Res, PassThru);
9741  }
9742  return CGF.Builder.CreateInsertElement(Upper, Res, (uint64_t)0);
9743 }
9744 
9745 static Value *EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned,
9746  ArrayRef<Value *> Ops) {
9747  llvm::Type *Ty = Ops[0]->getType();
9748  // Arguments have a vXi32 type so cast to vXi64.
9749  Ty = llvm::VectorType::get(CGF.Int64Ty,
9750  Ty->getPrimitiveSizeInBits() / 64);
9751  Value *LHS = CGF.Builder.CreateBitCast(Ops[0], Ty);
9752  Value *RHS = CGF.Builder.CreateBitCast(Ops[1], Ty);
9753 
9754  if (IsSigned) {
9755  // Shift left then arithmetic shift right.
9756  Constant *ShiftAmt = ConstantInt::get(Ty, 32);
9757  LHS = CGF.Builder.CreateShl(LHS, ShiftAmt);
9758  LHS = CGF.Builder.CreateAShr(LHS, ShiftAmt);
9759  RHS = CGF.Builder.CreateShl(RHS, ShiftAmt);
9760  RHS = CGF.Builder.CreateAShr(RHS, ShiftAmt);
9761  } else {
9762  // Clear the upper bits.
9763  Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
9764  LHS = CGF.Builder.CreateAnd(LHS, Mask);
9765  RHS = CGF.Builder.CreateAnd(RHS, Mask);
9766  }
9767 
9768  return CGF.Builder.CreateMul(LHS, RHS);
9769 }
9770 
9771 // Emit a masked pternlog intrinsic. This only exists because the header has to
9772 // use a macro and we aren't able to pass the input argument to a pternlog
9773 // builtin and a select builtin without evaluating it twice.
9774 static Value *EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask,
9775  ArrayRef<Value *> Ops) {
9776  llvm::Type *Ty = Ops[0]->getType();
9777 
9778  unsigned VecWidth = Ty->getPrimitiveSizeInBits();
9779  unsigned EltWidth = Ty->getScalarSizeInBits();
9780  Intrinsic::ID IID;
9781  if (VecWidth == 128 && EltWidth == 32)
9782  IID = Intrinsic::x86_avx512_pternlog_d_128;
9783  else if (VecWidth == 256 && EltWidth == 32)
9784  IID = Intrinsic::x86_avx512_pternlog_d_256;
9785  else if (VecWidth == 512 && EltWidth == 32)
9786  IID = Intrinsic::x86_avx512_pternlog_d_512;
9787  else if (VecWidth == 128 && EltWidth == 64)
9788  IID = Intrinsic::x86_avx512_pternlog_q_128;
9789  else if (VecWidth == 256 && EltWidth == 64)
9790  IID = Intrinsic::x86_avx512_pternlog_q_256;
9791  else if (VecWidth == 512 && EltWidth == 64)
9792  IID = Intrinsic::x86_avx512_pternlog_q_512;
9793  else
9794  llvm_unreachable("Unexpected intrinsic");
9795 
9796  Value *Ternlog = CGF.Builder.CreateCall(CGF.CGM.getIntrinsic(IID),
9797  Ops.drop_back());
9798  Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty) : Ops[0];
9799  return EmitX86Select(CGF, Ops[4], Ternlog, PassThru);
9800 }
9801 
9803  llvm::Type *DstTy) {
9804  unsigned NumberOfElements = DstTy->getVectorNumElements();
9805  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
9806  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
9807 }
9808 
9809 // Emit addition or subtraction with signed/unsigned saturation.
9811  ArrayRef<Value *> Ops, bool IsSigned,
9812  bool IsAddition) {
9813  Intrinsic::ID IID =
9814  IsSigned ? (IsAddition ? Intrinsic::sadd_sat : Intrinsic::ssub_sat)
9815  : (IsAddition ? Intrinsic::uadd_sat : Intrinsic::usub_sat);
9816  llvm::Function *F = CGF.CGM.getIntrinsic(IID, Ops[0]->getType());
9817  return CGF.Builder.CreateCall(F, {Ops[0], Ops[1]});
9818 }
9819 
9820 Value *CodeGenFunction::EmitX86CpuIs(const CallExpr *E) {
9821  const Expr *CPUExpr = E->getArg(0)->IgnoreParenCasts();
9822  StringRef CPUStr = cast<clang::StringLiteral>(CPUExpr)->getString();
9823  return EmitX86CpuIs(CPUStr);
9824 }
9825 
9826 // Convert a BF16 to a float.
9828  const CallExpr *E,
9829  ArrayRef<Value *> Ops) {
9830  llvm::Type *Int32Ty = CGF.Builder.getInt32Ty();
9831  Value *ZeroExt = CGF.Builder.CreateZExt(Ops[0], Int32Ty);
9832  Value *Shl = CGF.Builder.CreateShl(ZeroExt, 16);
9833  llvm::Type *ResultType = CGF.ConvertType(E->getType());
9834  Value *BitCast = CGF.Builder.CreateBitCast(Shl, ResultType);
9835  return BitCast;
9836 }
9837 
9838 Value *CodeGenFunction::EmitX86CpuIs(StringRef CPUStr) {
9839 
9840  llvm::Type *Int32Ty = Builder.getInt32Ty();
9841 
9842  // Matching the struct layout from the compiler-rt/libgcc structure that is
9843  // filled in:
9844  // unsigned int __cpu_vendor;
9845  // unsigned int __cpu_type;
9846  // unsigned int __cpu_subtype;
9847  // unsigned int __cpu_features[1];
9848  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9849  llvm::ArrayType::get(Int32Ty, 1));
9850 
9851  // Grab the global __cpu_model.
9852  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9853  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9854 
9855  // Calculate the index needed to access the correct field based on the
9856  // range. Also adjust the expected value.
9857  unsigned Index;
9858  unsigned Value;
9859  std::tie(Index, Value) = StringSwitch<std::pair<unsigned, unsigned>>(CPUStr)
9860 #define X86_VENDOR(ENUM, STRING) \
9861  .Case(STRING, {0u, static_cast<unsigned>(llvm::X86::ENUM)})
9862 #define X86_CPU_TYPE_COMPAT_WITH_ALIAS(ARCHNAME, ENUM, STR, ALIAS) \
9863  .Cases(STR, ALIAS, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9864 #define X86_CPU_TYPE_COMPAT(ARCHNAME, ENUM, STR) \
9865  .Case(STR, {1u, static_cast<unsigned>(llvm::X86::ENUM)})
9866 #define X86_CPU_SUBTYPE_COMPAT(ARCHNAME, ENUM, STR) \
9867  .Case(STR, {2u, static_cast<unsigned>(llvm::X86::ENUM)})
9868 #include "llvm/Support/X86TargetParser.def"
9869  .Default({0, 0});
9870  assert(Value != 0 && "Invalid CPUStr passed to CpuIs");
9871 
9872  // Grab the appropriate field from __cpu_model.
9873  llvm::Value *Idxs[] = {ConstantInt::get(Int32Ty, 0),
9874  ConstantInt::get(Int32Ty, Index)};
9875  llvm::Value *CpuValue = Builder.CreateGEP(STy, CpuModel, Idxs);
9876  CpuValue = Builder.CreateAlignedLoad(CpuValue, CharUnits::fromQuantity(4));
9877 
9878  // Check the value of the field against the requested value.
9879  return Builder.CreateICmpEQ(CpuValue,
9880  llvm::ConstantInt::get(Int32Ty, Value));
9881 }
9882 
9883 Value *CodeGenFunction::EmitX86CpuSupports(const CallExpr *E) {
9884  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
9885  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
9886  return EmitX86CpuSupports(FeatureStr);
9887 }
9888 
9889 uint64_t
9891  // Processor features and mapping to processor feature value.
9892  uint64_t FeaturesMask = 0;
9893  for (const StringRef &FeatureStr : FeatureStrs) {
9894  unsigned Feature =
9895  StringSwitch<unsigned>(FeatureStr)
9896 #define X86_FEATURE_COMPAT(VAL, ENUM, STR) .Case(STR, VAL)
9897 #include "llvm/Support/X86TargetParser.def"
9898  ;
9899  FeaturesMask |= (1ULL << Feature);
9900  }
9901  return FeaturesMask;
9902 }
9903 
9904 Value *CodeGenFunction::EmitX86CpuSupports(ArrayRef<StringRef> FeatureStrs) {
9905  return EmitX86CpuSupports(GetX86CpuSupportsMask(FeatureStrs));
9906 }
9907 
9908 llvm::Value *CodeGenFunction::EmitX86CpuSupports(uint64_t FeaturesMask) {
9909  uint32_t Features1 = Lo_32(FeaturesMask);
9910  uint32_t Features2 = Hi_32(FeaturesMask);
9911 
9912  Value *Result = Builder.getTrue();
9913 
9914  if (Features1 != 0) {
9915  // Matching the struct layout from the compiler-rt/libgcc structure that is
9916  // filled in:
9917  // unsigned int __cpu_vendor;
9918  // unsigned int __cpu_type;
9919  // unsigned int __cpu_subtype;
9920  // unsigned int __cpu_features[1];
9921  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
9922  llvm::ArrayType::get(Int32Ty, 1));
9923 
9924  // Grab the global __cpu_model.
9925  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
9926  cast<llvm::GlobalValue>(CpuModel)->setDSOLocal(true);
9927 
9928  // Grab the first (0th) element from the field __cpu_features off of the
9929  // global in the struct STy.
9930  Value *Idxs[] = {Builder.getInt32(0), Builder.getInt32(3),
9931  Builder.getInt32(0)};
9932  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
9933  Value *Features =
9934  Builder.CreateAlignedLoad(CpuFeatures, CharUnits::fromQuantity(4));
9935 
9936  // Check the value of the bit corresponding to the feature requested.
9937  Value *Mask = Builder.getInt32(Features1);
9938  Value *Bitset = Builder.CreateAnd(Features, Mask);
9939  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9940  Result = Builder.CreateAnd(Result, Cmp);
9941  }
9942 
9943  if (Features2 != 0) {
9944  llvm::Constant *CpuFeatures2 = CGM.CreateRuntimeVariable(Int32Ty,
9945  "__cpu_features2");
9946  cast<llvm::GlobalValue>(CpuFeatures2)->setDSOLocal(true);
9947 
9948  Value *Features =
9949  Builder.CreateAlignedLoad(CpuFeatures2, CharUnits::fromQuantity(4));
9950 
9951  // Check the value of the bit corresponding to the feature requested.
9952  Value *Mask = Builder.getInt32(Features2);
9953  Value *Bitset = Builder.CreateAnd(Features, Mask);
9954  Value *Cmp = Builder.CreateICmpEQ(Bitset, Mask);
9955  Result = Builder.CreateAnd(Result, Cmp);
9956  }
9957 
9958  return Result;
9959 }
9960 
9961 Value *CodeGenFunction::EmitX86CpuInit() {
9962  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy,
9963  /*Variadic*/ false);
9964  llvm::FunctionCallee Func =
9965  CGM.CreateRuntimeFunction(FTy, "__cpu_indicator_init");
9966  cast<llvm::GlobalValue>(Func.getCallee())->setDSOLocal(true);
9967  cast<llvm::GlobalValue>(Func.getCallee())
9968  ->setDLLStorageClass(llvm::GlobalValue::DefaultStorageClass);
9969  return Builder.CreateCall(Func);
9970 }
9971 
9973  const CallExpr *E) {
9974  if (BuiltinID == X86::BI__builtin_cpu_is)
9975  return EmitX86CpuIs(E);
9976  if (BuiltinID == X86::BI__builtin_cpu_supports)
9977  return EmitX86CpuSupports(E);
9978  if (BuiltinID == X86::BI__builtin_cpu_init)
9979  return EmitX86CpuInit();
9980 
9982 
9983  // Find out if any arguments are required to be integer constant expressions.
9984  unsigned ICEArguments = 0;
9986  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
9987  assert(Error == ASTContext::GE_None && "Should not codegen an error");
9988 
9989  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
9990  // If this is a normal argument, just emit it as a scalar.
9991  if ((ICEArguments & (1 << i)) == 0) {
9992  Ops.push_back(EmitScalarExpr(E->getArg(i)));
9993  continue;
9994  }
9995 
9996  // If this is required to be a constant, constant fold it so that we know
9997  // that the generated intrinsic gets a ConstantInt.
9998  llvm::APSInt Result;
9999  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
10000  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
10001  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
10002  }
10003 
10004  // These exist so that the builtin that takes an immediate can be bounds
10005  // checked by clang to avoid passing bad immediates to the backend. Since
10006  // AVX has a larger immediate than SSE we would need separate builtins to
10007  // do the different bounds checking. Rather than create a clang specific
10008  // SSE only builtin, this implements eight separate builtins to match gcc
10009  // implementation.
10010  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
10011  Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
10012  llvm::Function *F = CGM.getIntrinsic(ID);
10013  return Builder.CreateCall(F, Ops);
10014  };
10015 
10016  // For the vector forms of FP comparisons, translate the builtins directly to
10017  // IR.
10018  // TODO: The builtins could be removed if the SSE header files used vector
10019  // extension comparisons directly (vector ordered/unordered may need
10020  // additional support via __builtin_isnan()).
10021  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
10022  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
10023  llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
10024  llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
10025  Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
10026  return Builder.CreateBitCast(Sext, FPVecTy);
10027  };
10028 
10029  switch (BuiltinID) {
10030  default: return nullptr;
10031  case X86::BI_mm_prefetch: {
10032  Value *Address = Ops[0];
10033  ConstantInt *C = cast<ConstantInt>(Ops[1]);
10034  Value *RW = ConstantInt::get(Int32Ty, (C->getZExtValue() >> 2) & 0x1);
10035  Value *Locality = ConstantInt::get(Int32Ty, C->getZExtValue() & 0x3);
10036  Value *Data = ConstantInt::get(Int32Ty, 1);
10037  Function *F = CGM.getIntrinsic(Intrinsic::prefetch);
10038  return Builder.CreateCall(F, {Address, RW, Locality, Data});
10039  }
10040  case X86::BI_mm_clflush: {
10041  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
10042  Ops[0]);
10043  }
10044  case X86::BI_mm_lfence: {
10045  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
10046  }
10047  case X86::BI_mm_mfence: {
10048  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
10049  }
10050  case X86::BI_mm_sfence: {
10051  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
10052  }
10053  case X86::BI_mm_pause: {
10054  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
10055  }
10056  case X86::BI__rdtsc: {
10057  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
10058  }
10059  case X86::BI__builtin_ia32_rdtscp: {
10060  Value *Call = Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtscp));
10061  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
10062  Ops[0]);
10063  return Builder.CreateExtractValue(Call, 0);
10064  }
10065  case X86::BI__builtin_ia32_lzcnt_u16:
10066  case X86::BI__builtin_ia32_lzcnt_u32:
10067  case X86::BI__builtin_ia32_lzcnt_u64: {
10068  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
10069  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
10070  }
10071  case X86::BI__builtin_ia32_tzcnt_u16:
10072  case X86::BI__builtin_ia32_tzcnt_u32:
10073  case X86::BI__builtin_ia32_tzcnt_u64: {
10074  Function *F = CGM.getIntrinsic(Intrinsic::cttz, Ops[0]->getType());
10075  return Builder.CreateCall(F, {Ops[0], Builder.getInt1(false)});
10076  }
10077  case X86::BI__builtin_ia32_undef128:
10078  case X86::BI__builtin_ia32_undef256:
10079  case X86::BI__builtin_ia32_undef512:
10080  // The x86 definition of "undef" is not the same as the LLVM definition
10081  // (PR32176). We leave optimizing away an unnecessary zero constant to the
10082  // IR optimizer and backend.
10083  // TODO: If we had a "freeze" IR instruction to generate a fixed undef
10084  // value, we should use that here instead of a zero.
10085  return llvm::Constant::getNullValue(ConvertType(E->getType()));
10086  case X86::BI__builtin_ia32_vec_init_v8qi:
10087  case X86::BI__builtin_ia32_vec_init_v4hi:
10088  case X86::BI__builtin_ia32_vec_init_v2si:
10089  return Builder.CreateBitCast(BuildVector(Ops),
10090  llvm::Type::getX86_MMXTy(getLLVMContext()));
10091  case X86::BI__builtin_ia32_vec_ext_v2si:
10092  case X86::BI__builtin_ia32_vec_ext_v16qi:
10093  case X86::BI__builtin_ia32_vec_ext_v8hi:
10094  case X86::BI__builtin_ia32_vec_ext_v4si:
10095  case X86::BI__builtin_ia32_vec_ext_v4sf:
10096  case X86::BI__builtin_ia32_vec_ext_v2di:
10097  case X86::BI__builtin_ia32_vec_ext_v32qi:
10098  case X86::BI__builtin_ia32_vec_ext_v16hi:
10099  case X86::BI__builtin_ia32_vec_ext_v8si:
10100  case X86::BI__builtin_ia32_vec_ext_v4di: {
10101  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10102  uint64_t Index = cast<ConstantInt>(Ops[1])->getZExtValue();
10103  Index &= NumElts - 1;
10104  // These builtins exist so we can ensure the index is an ICE and in range.
10105  // Otherwise we could just do this in the header file.
10106  return Builder.CreateExtractElement(Ops[0], Index);
10107  }
10108  case X86::BI__builtin_ia32_vec_set_v16qi:
10109  case X86::BI__builtin_ia32_vec_set_v8hi:
10110  case X86::BI__builtin_ia32_vec_set_v4si:
10111  case X86::BI__builtin_ia32_vec_set_v2di:
10112  case X86::BI__builtin_ia32_vec_set_v32qi:
10113  case X86::BI__builtin_ia32_vec_set_v16hi:
10114  case X86::BI__builtin_ia32_vec_set_v8si:
10115  case X86::BI__builtin_ia32_vec_set_v4di: {
10116  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10117  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
10118  Index &= NumElts - 1;
10119  // These builtins exist so we can ensure the index is an ICE and in range.
10120  // Otherwise we could just do this in the header file.
10121  return Builder.CreateInsertElement(Ops[0], Ops[1], Index);
10122  }
10123  case X86::BI_mm_setcsr:
10124  case X86::BI__builtin_ia32_ldmxcsr: {
10125  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
10126  Builder.CreateStore(Ops[0], Tmp);
10127  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
10128  Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
10129  }
10130  case X86::BI_mm_getcsr:
10131  case X86::BI__builtin_ia32_stmxcsr: {
10132  Address Tmp = CreateMemTemp(E->getType());
10133  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
10134  Builder.CreateBitCast(Tmp.getPointer(), Int8PtrTy));
10135  return Builder.CreateLoad(Tmp, "stmxcsr");
10136  }
10137  case X86::BI__builtin_ia32_xsave:
10138  case X86::BI__builtin_ia32_xsave64:
10139  case X86::BI__builtin_ia32_xrstor:
10140  case X86::BI__builtin_ia32_xrstor64:
10141  case X86::BI__builtin_ia32_xsaveopt:
10142  case X86::BI__builtin_ia32_xsaveopt64:
10143  case X86::BI__builtin_ia32_xrstors:
10144  case X86::BI__builtin_ia32_xrstors64:
10145  case X86::BI__builtin_ia32_xsavec:
10146  case X86::BI__builtin_ia32_xsavec64:
10147  case X86::BI__builtin_ia32_xsaves:
10148  case X86::BI__builtin_ia32_xsaves64:
10149  case X86::BI__builtin_ia32_xsetbv:
10150  case X86::BI_xsetbv: {
10151  Intrinsic::ID ID;
10152 #define INTRINSIC_X86_XSAVE_ID(NAME) \
10153  case X86::BI__builtin_ia32_##NAME: \
10154  ID = Intrinsic::x86_##NAME; \
10155  break
10156  switch (BuiltinID) {
10157  default: llvm_unreachable("Unsupported intrinsic!");
10158  INTRINSIC_X86_XSAVE_ID(xsave);
10159  INTRINSIC_X86_XSAVE_ID(xsave64);
10160  INTRINSIC_X86_XSAVE_ID(xrstor);
10161  INTRINSIC_X86_XSAVE_ID(xrstor64);
10162  INTRINSIC_X86_XSAVE_ID(xsaveopt);
10163  INTRINSIC_X86_XSAVE_ID(xsaveopt64);
10164  INTRINSIC_X86_XSAVE_ID(xrstors);
10165  INTRINSIC_X86_XSAVE_ID(xrstors64);
10166  INTRINSIC_X86_XSAVE_ID(xsavec);
10167  INTRINSIC_X86_XSAVE_ID(xsavec64);
10168  INTRINSIC_X86_XSAVE_ID(xsaves);
10169  INTRINSIC_X86_XSAVE_ID(xsaves64);
10170  INTRINSIC_X86_XSAVE_ID(xsetbv);
10171  case X86::BI_xsetbv:
10172  ID = Intrinsic::x86_xsetbv;
10173  break;
10174  }
10175 #undef INTRINSIC_X86_XSAVE_ID
10176  Value *Mhi = Builder.CreateTrunc(
10177  Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
10178  Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
10179  Ops[1] = Mhi;
10180  Ops.push_back(Mlo);
10181  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
10182  }
10183  case X86::BI__builtin_ia32_xgetbv:
10184  case X86::BI_xgetbv:
10185  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_xgetbv), Ops);
10186  case X86::BI__builtin_ia32_storedqudi128_mask:
10187  case X86::BI__builtin_ia32_storedqusi128_mask:
10188  case X86::BI__builtin_ia32_storedquhi128_mask:
10189  case X86::BI__builtin_ia32_storedquqi128_mask:
10190  case X86::BI__builtin_ia32_storeupd128_mask:
10191  case X86::BI__builtin_ia32_storeups128_mask:
10192  case X86::BI__builtin_ia32_storedqudi256_mask:
10193  case X86::BI__builtin_ia32_storedqusi256_mask:
10194  case X86::BI__builtin_ia32_storedquhi256_mask:
10195  case X86::BI__builtin_ia32_storedquqi256_mask:
10196  case X86::BI__builtin_ia32_storeupd256_mask:
10197  case X86::BI__builtin_ia32_storeups256_mask:
10198  case X86::BI__builtin_ia32_storedqudi512_mask:
10199  case X86::BI__builtin_ia32_storedqusi512_mask:
10200  case X86::BI__builtin_ia32_storedquhi512_mask:
10201  case X86::BI__builtin_ia32_storedquqi512_mask:
10202  case X86::BI__builtin_ia32_storeupd512_mask:
10203  case X86::BI__builtin_ia32_storeups512_mask:
10204  return EmitX86MaskedStore(*this, Ops, 1);
10205 
10206  case X86::BI__builtin_ia32_storess128_mask:
10207  case X86::BI__builtin_ia32_storesd128_mask: {
10208  return EmitX86MaskedStore(*this, Ops, 1);
10209  }
10210  case X86::BI__builtin_ia32_vpopcntb_128:
10211  case X86::BI__builtin_ia32_vpopcntd_128:
10212  case X86::BI__builtin_ia32_vpopcntq_128:
10213  case X86::BI__builtin_ia32_vpopcntw_128:
10214  case X86::BI__builtin_ia32_vpopcntb_256:
10215  case X86::BI__builtin_ia32_vpopcntd_256:
10216  case X86::BI__builtin_ia32_vpopcntq_256:
10217  case X86::BI__builtin_ia32_vpopcntw_256:
10218  case X86::BI__builtin_ia32_vpopcntb_512:
10219  case X86::BI__builtin_ia32_vpopcntd_512:
10220  case X86::BI__builtin_ia32_vpopcntq_512:
10221  case X86::BI__builtin_ia32_vpopcntw_512: {
10222  llvm::Type *ResultType = ConvertType(E->getType());
10223  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
10224  return Builder.CreateCall(F, Ops);
10225  }
10226  case X86::BI__builtin_ia32_cvtmask2b128:
10227  case X86::BI__builtin_ia32_cvtmask2b256:
10228  case X86::BI__builtin_ia32_cvtmask2b512:
10229  case X86::BI__builtin_ia32_cvtmask2w128:
10230  case X86::BI__builtin_ia32_cvtmask2w256:
10231  case X86::BI__builtin_ia32_cvtmask2w512:
10232  case X86::BI__builtin_ia32_cvtmask2d128:
10233  case X86::BI__builtin_ia32_cvtmask2d256:
10234  case X86::BI__builtin_ia32_cvtmask2d512:
10235  case X86::BI__builtin_ia32_cvtmask2q128:
10236  case X86::BI__builtin_ia32_cvtmask2q256:
10237  case X86::BI__builtin_ia32_cvtmask2q512:
10238  return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
10239 
10240  case X86::BI__builtin_ia32_cvtb2mask128:
10241  case X86::BI__builtin_ia32_cvtb2mask256:
10242  case X86::BI__builtin_ia32_cvtb2mask512:
10243  case X86::BI__builtin_ia32_cvtw2mask128:
10244  case X86::BI__builtin_ia32_cvtw2mask256:
10245  case X86::BI__builtin_ia32_cvtw2mask512:
10246  case X86::BI__builtin_ia32_cvtd2mask128:
10247  case X86::BI__builtin_ia32_cvtd2mask256:
10248  case X86::BI__builtin_ia32_cvtd2mask512:
10249  case X86::BI__builtin_ia32_cvtq2mask128:
10250  case X86::BI__builtin_ia32_cvtq2mask256:
10251  case X86::BI__builtin_ia32_cvtq2mask512:
10252  return EmitX86ConvertToMask(*this, Ops[0]);
10253 
10254  case X86::BI__builtin_ia32_cvtdq2ps512_mask:
10255  case X86::BI__builtin_ia32_cvtqq2ps512_mask:
10256  case X86::BI__builtin_ia32_cvtqq2pd512_mask:
10257  return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/true);
10258  case X86::BI__builtin_ia32_cvtudq2ps512_mask:
10259  case X86::BI__builtin_ia32_cvtuqq2ps512_mask:
10260  case X86::BI__builtin_ia32_cvtuqq2pd512_mask:
10261  return EmitX86ConvertIntToFp(*this, Ops, /*IsSigned*/false);
10262 
10263  case X86::BI__builtin_ia32_vfmaddss3:
10264  case X86::BI__builtin_ia32_vfmaddsd3:
10265  case X86::BI__builtin_ia32_vfmaddss3_mask:
10266  case X86::BI__builtin_ia32_vfmaddsd3_mask:
10267  return EmitScalarFMAExpr(*this, Ops, Ops[0]);
10268  case X86::BI__builtin_ia32_vfmaddss:
10269  case X86::BI__builtin_ia32_vfmaddsd:
10270  return EmitScalarFMAExpr(*this, Ops,
10271  Constant::getNullValue(Ops[0]->getType()));
10272  case X86::BI__builtin_ia32_vfmaddss3_maskz:
10273  case X86::BI__builtin_ia32_vfmaddsd3_maskz:
10274  return EmitScalarFMAExpr(*this, Ops, Ops[0], /*ZeroMask*/true);
10275  case X86::BI__builtin_ia32_vfmaddss3_mask3:
10276  case X86::BI__builtin_ia32_vfmaddsd3_mask3:
10277  return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2);
10278  case X86::BI__builtin_ia32_vfmsubss3_mask3:
10279  case X86::BI__builtin_ia32_vfmsubsd3_mask3:
10280  return EmitScalarFMAExpr(*this, Ops, Ops[2], /*ZeroMask*/false, 2,
10281  /*NegAcc*/true);
10282  case X86::BI__builtin_ia32_vfmaddps:
10283  case X86::BI__builtin_ia32_vfmaddpd:
10284  case X86::BI__builtin_ia32_vfmaddps256:
10285  case X86::BI__builtin_ia32_vfmaddpd256:
10286  case X86::BI__builtin_ia32_vfmaddps512_mask:
10287  case X86::BI__builtin_ia32_vfmaddps512_maskz:
10288  case X86::BI__builtin_ia32_vfmaddps512_mask3:
10289  case X86::BI__builtin_ia32_vfmsubps512_mask3:
10290  case X86::BI__builtin_ia32_vfmaddpd512_mask:
10291  case X86::BI__builtin_ia32_vfmaddpd512_maskz:
10292  case X86::BI__builtin_ia32_vfmaddpd512_mask3:
10293  case X86::BI__builtin_ia32_vfmsubpd512_mask3:
10294  return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/false);
10295  case X86::BI__builtin_ia32_vfmaddsubps:
10296  case X86::BI__builtin_ia32_vfmaddsubpd:
10297  case X86::BI__builtin_ia32_vfmaddsubps256:
10298  case X86::BI__builtin_ia32_vfmaddsubpd256:
10299  case X86::BI__builtin_ia32_vfmaddsubps512_mask:
10300  case X86::BI__builtin_ia32_vfmaddsubps512_maskz:
10301  case X86::BI__builtin_ia32_vfmaddsubps512_mask3:
10302  case X86::BI__builtin_ia32_vfmsubaddps512_mask3:
10303  case X86::BI__builtin_ia32_vfmaddsubpd512_mask:
10304  case X86::BI__builtin_ia32_vfmaddsubpd512_maskz:
10305  case X86::BI__builtin_ia32_vfmaddsubpd512_mask3:
10306  case X86::BI__builtin_ia32_vfmsubaddpd512_mask3:
10307  return EmitX86FMAExpr(*this, Ops, BuiltinID, /*IsAddSub*/true);
10308 
10309  case X86::BI__builtin_ia32_movdqa32store128_mask:
10310  case X86::BI__builtin_ia32_movdqa64store128_mask:
10311  case X86::BI__builtin_ia32_storeaps128_mask:
10312  case X86::BI__builtin_ia32_storeapd128_mask:
10313  case X86::BI__builtin_ia32_movdqa32store256_mask:
10314  case X86::BI__builtin_ia32_movdqa64store256_mask:
10315  case X86::BI__builtin_ia32_storeaps256_mask:
10316  case X86::BI__builtin_ia32_storeapd256_mask:
10317  case X86::BI__builtin_ia32_movdqa32store512_mask:
10318  case X86::BI__builtin_ia32_movdqa64store512_mask:
10319  case X86::BI__builtin_ia32_storeaps512_mask:
10320  case X86::BI__builtin_ia32_storeapd512_mask: {
10321  unsigned Align =
10322  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
10323  return EmitX86MaskedStore(*this, Ops, Align);
10324  }
10325  case X86::BI__builtin_ia32_loadups128_mask:
10326  case X86::BI__builtin_ia32_loadups256_mask:
10327  case X86::BI__builtin_ia32_loadups512_mask:
10328  case X86::BI__builtin_ia32_loadupd128_mask:
10329  case X86::BI__builtin_ia32_loadupd256_mask:
10330  case X86::BI__builtin_ia32_loadupd512_mask:
10331  case X86::BI__builtin_ia32_loaddquqi128_mask:
10332  case X86::BI__builtin_ia32_loaddquqi256_mask:
10333  case X86::BI__builtin_ia32_loaddquqi512_mask:
10334  case X86::BI__builtin_ia32_loaddquhi128_mask:
10335  case X86::BI__builtin_ia32_loaddquhi256_mask:
10336  case X86::BI__builtin_ia32_loaddquhi512_mask:
10337  case X86::BI__builtin_ia32_loaddqusi128_mask:
10338  case X86::BI__builtin_ia32_loaddqusi256_mask:
10339  case X86::BI__builtin_ia32_loaddqusi512_mask:
10340  case X86::BI__builtin_ia32_loaddqudi128_mask:
10341  case X86::BI__builtin_ia32_loaddqudi256_mask:
10342  case X86::BI__builtin_ia32_loaddqudi512_mask:
10343  return EmitX86MaskedLoad(*this, Ops, 1);
10344 
10345  case X86::BI__builtin_ia32_loadss128_mask:
10346  case X86::BI__builtin_ia32_loadsd128_mask:
10347  return EmitX86MaskedLoad(*this, Ops, 1);
10348 
10349  case X86::BI__builtin_ia32_loadaps128_mask:
10350  case X86::BI__builtin_ia32_loadaps256_mask:
10351  case X86::BI__builtin_ia32_loadaps512_mask:
10352  case X86::BI__builtin_ia32_loadapd128_mask:
10353  case X86::BI__builtin_ia32_loadapd256_mask:
10354  case X86::BI__builtin_ia32_loadapd512_mask:
10355  case X86::BI__builtin_ia32_movdqa32load128_mask:
10356  case X86::BI__builtin_ia32_movdqa32load256_mask:
10357  case X86::BI__builtin_ia32_movdqa32load512_mask:
10358  case X86::BI__builtin_ia32_movdqa64load128_mask:
10359  case X86::BI__builtin_ia32_movdqa64load256_mask:
10360  case X86::BI__builtin_ia32_movdqa64load512_mask: {
10361  unsigned Align =
10362  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
10363  return EmitX86MaskedLoad(*this, Ops, Align);
10364  }
10365 
10366  case X86::BI__builtin_ia32_expandloaddf128_mask:
10367  case X86::BI__builtin_ia32_expandloaddf256_mask:
10368  case X86::BI__builtin_ia32_expandloaddf512_mask:
10369  case X86::BI__builtin_ia32_expandloadsf128_mask:
10370  case X86::BI__builtin_ia32_expandloadsf256_mask:
10371  case X86::BI__builtin_ia32_expandloadsf512_mask:
10372  case X86::BI__builtin_ia32_expandloaddi128_mask:
10373  case X86::BI__builtin_ia32_expandloaddi256_mask:
10374  case X86::BI__builtin_ia32_expandloaddi512_mask:
10375  case X86::BI__builtin_ia32_expandloadsi128_mask:
10376  case X86::BI__builtin_ia32_expandloadsi256_mask:
10377  case X86::BI__builtin_ia32_expandloadsi512_mask:
10378  case X86::BI__builtin_ia32_expandloadhi128_mask:
10379  case X86::BI__builtin_ia32_expandloadhi256_mask:
10380  case X86::BI__builtin_ia32_expandloadhi512_mask:
10381  case X86::BI__builtin_ia32_expandloadqi128_mask:
10382  case X86::BI__builtin_ia32_expandloadqi256_mask:
10383  case X86::BI__builtin_ia32_expandloadqi512_mask:
10384  return EmitX86ExpandLoad(*this, Ops);
10385 
10386  case X86::BI__builtin_ia32_compressstoredf128_mask:
10387  case X86::BI__builtin_ia32_compressstoredf256_mask:
10388  case X86::BI__builtin_ia32_compressstoredf512_mask:
10389  case X86::BI__builtin_ia32_compressstoresf128_mask:
10390  case X86::BI__builtin_ia32_compressstoresf256_mask:
10391  case X86::BI__builtin_ia32_compressstoresf512_mask:
10392  case X86::BI__builtin_ia32_compressstoredi128_mask:
10393  case X86::BI__builtin_ia32_compressstoredi256_mask:
10394  case X86::BI__builtin_ia32_compressstoredi512_mask:
10395  case X86::BI__builtin_ia32_compressstoresi128_mask:
10396  case X86::BI__builtin_ia32_compressstoresi256_mask:
10397  case X86::BI__builtin_ia32_compressstoresi512_mask:
10398  case X86::BI__builtin_ia32_compressstorehi128_mask:
10399  case X86::BI__builtin_ia32_compressstorehi256_mask:
10400  case X86::BI__builtin_ia32_compressstorehi512_mask:
10401  case X86::BI__builtin_ia32_compressstoreqi128_mask:
10402  case X86::BI__builtin_ia32_compressstoreqi256_mask:
10403  case X86::BI__builtin_ia32_compressstoreqi512_mask:
10404  return EmitX86CompressStore(*this, Ops);
10405 
10406  case X86::BI__builtin_ia32_expanddf128_mask:
10407  case X86::BI__builtin_ia32_expanddf256_mask:
10408  case X86::BI__builtin_ia32_expanddf512_mask:
10409  case X86::BI__builtin_ia32_expandsf128_mask:
10410  case X86::BI__builtin_ia32_expandsf256_mask:
10411  case X86::BI__builtin_ia32_expandsf512_mask:
10412  case X86::BI__builtin_ia32_expanddi128_mask:
10413  case X86::BI__builtin_ia32_expanddi256_mask:
10414  case X86::BI__builtin_ia32_expanddi512_mask:
10415  case X86::BI__builtin_ia32_expandsi128_mask:
10416  case X86::BI__builtin_ia32_expandsi256_mask:
10417  case X86::BI__builtin_ia32_expandsi512_mask:
10418  case X86::BI__builtin_ia32_expandhi128_mask:
10419  case X86::BI__builtin_ia32_expandhi256_mask:
10420  case X86::BI__builtin_ia32_expandhi512_mask:
10421  case X86::BI__builtin_ia32_expandqi128_mask:
10422  case X86::BI__builtin_ia32_expandqi256_mask:
10423  case X86::BI__builtin_ia32_expandqi512_mask:
10424  return EmitX86CompressExpand(*this, Ops, /*IsCompress*/false);
10425 
10426  case X86::BI__builtin_ia32_compressdf128_mask:
10427  case X86::BI__builtin_ia32_compressdf256_mask:
10428  case X86::BI__builtin_ia32_compressdf512_mask:
10429  case X86::BI__builtin_ia32_compresssf128_mask:
10430  case X86::BI__builtin_ia32_compresssf256_mask:
10431  case X86::BI__builtin_ia32_compresssf512_mask:
10432  case X86::BI__builtin_ia32_compressdi128_mask:
10433  case X86::BI__builtin_ia32_compressdi256_mask:
10434  case X86::BI__builtin_ia32_compressdi512_mask:
10435  case X86::BI__builtin_ia32_compresssi128_mask:
10436  case X86::BI__builtin_ia32_compresssi256_mask:
10437  case X86::BI__builtin_ia32_compresssi512_mask:
10438  case X86::BI__builtin_ia32_compresshi128_mask:
10439  case X86::BI__builtin_ia32_compresshi256_mask:
10440  case X86::BI__builtin_ia32_compresshi512_mask:
10441  case X86::BI__builtin_ia32_compressqi128_mask:
10442  case X86::BI__builtin_ia32_compressqi256_mask:
10443  case X86::BI__builtin_ia32_compressqi512_mask:
10444  return EmitX86CompressExpand(*this, Ops, /*IsCompress*/true);
10445 
10446  case X86::BI__builtin_ia32_gather3div2df:
10447  case X86::BI__builtin_ia32_gather3div2di:
10448  case X86::BI__builtin_ia32_gather3div4df:
10449  case X86::BI__builtin_ia32_gather3div4di:
10450  case X86::BI__builtin_ia32_gather3div4sf:
10451  case X86::BI__builtin_ia32_gather3div4si:
10452  case X86::BI__builtin_ia32_gather3div8sf:
10453  case X86::BI__builtin_ia32_gather3div8si:
10454  case X86::BI__builtin_ia32_gather3siv2df:
10455  case X86::BI__builtin_ia32_gather3siv2di:
10456  case X86::BI__builtin_ia32_gather3siv4df:
10457  case X86::BI__builtin_ia32_gather3siv4di:
10458  case X86::BI__builtin_ia32_gather3siv4sf:
10459  case X86::BI__builtin_ia32_gather3siv4si:
10460  case X86::BI__builtin_ia32_gather3siv8sf:
10461  case X86::BI__builtin_ia32_gather3siv8si:
10462  case X86::BI__builtin_ia32_gathersiv8df:
10463  case X86::BI__builtin_ia32_gathersiv16sf:
10464  case X86::BI__builtin_ia32_gatherdiv8df:
10465  case X86::BI__builtin_ia32_gatherdiv16sf:
10466  case X86::BI__builtin_ia32_gathersiv8di:
10467  case X86::BI__builtin_ia32_gathersiv16si:
10468  case X86::BI__builtin_ia32_gatherdiv8di:
10469  case X86::BI__builtin_ia32_gatherdiv16si: {
10470  Intrinsic::ID IID;
10471  switch (BuiltinID) {
10472  default: llvm_unreachable("Unexpected builtin");
10473  case X86::BI__builtin_ia32_gather3div2df:
10474  IID = Intrinsic::x86_avx512_mask_gather3div2_df;
10475  break;
10476  case X86::BI__builtin_ia32_gather3div2di:
10477  IID = Intrinsic::x86_avx512_mask_gather3div2_di;
10478  break;
10479  case X86::BI__builtin_ia32_gather3div4df:
10480  IID = Intrinsic::x86_avx512_mask_gather3div4_df;
10481  break;
10482  case X86::BI__builtin_ia32_gather3div4di:
10483  IID = Intrinsic::x86_avx512_mask_gather3div4_di;
10484  break;
10485  case X86::BI__builtin_ia32_gather3div4sf:
10486  IID = Intrinsic::x86_avx512_mask_gather3div4_sf;
10487  break;
10488  case X86::BI__builtin_ia32_gather3div4si:
10489  IID = Intrinsic::x86_avx512_mask_gather3div4_si;
10490  break;
10491  case X86::BI__builtin_ia32_gather3div8sf:
10492  IID = Intrinsic::x86_avx512_mask_gather3div8_sf;
10493  break;
10494  case X86::BI__builtin_ia32_gather3div8si:
10495  IID = Intrinsic::x86_avx512_mask_gather3div8_si;
10496  break;
10497  case X86::BI__builtin_ia32_gather3siv2df:
10498  IID = Intrinsic::x86_avx512_mask_gather3siv2_df;
10499  break;
10500  case X86::BI__builtin_ia32_gather3siv2di:
10501  IID = Intrinsic::x86_avx512_mask_gather3siv2_di;
10502  break;
10503  case X86::BI__builtin_ia32_gather3siv4df:
10504  IID = Intrinsic::x86_avx512_mask_gather3siv4_df;
10505  break;
10506  case X86::BI__builtin_ia32_gather3siv4di:
10507  IID = Intrinsic::x86_avx512_mask_gather3siv4_di;
10508  break;
10509  case X86::BI__builtin_ia32_gather3siv4sf:
10510  IID = Intrinsic::x86_avx512_mask_gather3siv4_sf;
10511  break;
10512  case X86::BI__builtin_ia32_gather3siv4si:
10513  IID = Intrinsic::x86_avx512_mask_gather3siv4_si;
10514  break;
10515  case X86::BI__builtin_ia32_gather3siv8sf:
10516  IID = Intrinsic::x86_avx512_mask_gather3siv8_sf;
10517  break;
10518  case X86::BI__builtin_ia32_gather3siv8si:
10519  IID = Intrinsic::x86_avx512_mask_gather3siv8_si;
10520  break;
10521  case X86::BI__builtin_ia32_gathersiv8df:
10522  IID = Intrinsic::x86_avx512_mask_gather_dpd_512;
10523  break;
10524  case X86::BI__builtin_ia32_gathersiv16sf:
10525  IID = Intrinsic::x86_avx512_mask_gather_dps_512;
10526  break;
10527  case X86::BI__builtin_ia32_gatherdiv8df:
10528  IID = Intrinsic::x86_avx512_mask_gather_qpd_512;
10529  break;
10530  case X86::BI__builtin_ia32_gatherdiv16sf:
10531  IID = Intrinsic::x86_avx512_mask_gather_qps_512;
10532  break;
10533  case X86::BI__builtin_ia32_gathersiv8di:
10534  IID = Intrinsic::x86_avx512_mask_gather_dpq_512;
10535  break;
10536  case X86::BI__builtin_ia32_gathersiv16si:
10537  IID = Intrinsic::x86_avx512_mask_gather_dpi_512;
10538  break;
10539  case X86::BI__builtin_ia32_gatherdiv8di:
10540  IID = Intrinsic::x86_avx512_mask_gather_qpq_512;
10541  break;
10542  case X86::BI__builtin_ia32_gatherdiv16si:
10543  IID = Intrinsic::x86_avx512_mask_gather_qpi_512;
10544  break;
10545  }
10546 
10547  unsigned MinElts = std::min(Ops[0]->getType()->getVectorNumElements(),
10548  Ops[2]->getType()->getVectorNumElements());
10549  Ops[3] = getMaskVecValue(*this, Ops[3], MinElts);
10550  Function *Intr = CGM.getIntrinsic(IID);
10551  return Builder.CreateCall(Intr, Ops);
10552  }
10553 
10554  case X86::BI__builtin_ia32_scattersiv8df:
10555  case X86::BI__builtin_ia32_scattersiv16sf:
10556  case X86::BI__builtin_ia32_scatterdiv8df:
10557  case X86::BI__builtin_ia32_scatterdiv16sf:
10558  case X86::BI__builtin_ia32_scattersiv8di:
10559  case X86::BI__builtin_ia32_scattersiv16si:
10560  case X86::BI__builtin_ia32_scatterdiv8di:
10561  case X86::BI__builtin_ia32_scatterdiv16si:
10562  case X86::BI__builtin_ia32_scatterdiv2df:
10563  case X86::BI__builtin_ia32_scatterdiv2di:
10564  case X86::BI__builtin_ia32_scatterdiv4df:
10565  case X86::BI__builtin_ia32_scatterdiv4di:
10566  case X86::BI__builtin_ia32_scatterdiv4sf:
10567  case X86::BI__builtin_ia32_scatterdiv4si:
10568  case X86::BI__builtin_ia32_scatterdiv8sf:
10569  case X86::BI__builtin_ia32_scatterdiv8si:
10570  case X86::BI__builtin_ia32_scattersiv2df:
10571  case X86::BI__builtin_ia32_scattersiv2di:
10572  case X86::BI__builtin_ia32_scattersiv4df:
10573  case X86::BI__builtin_ia32_scattersiv4di:
10574  case X86::BI__builtin_ia32_scattersiv4sf:
10575  case X86::BI__builtin_ia32_scattersiv4si:
10576  case X86::BI__builtin_ia32_scattersiv8sf:
10577  case X86::BI__builtin_ia32_scattersiv8si: {
10578  Intrinsic::ID IID;
10579  switch (BuiltinID) {
10580  default: llvm_unreachable("Unexpected builtin");
10581  case X86::BI__builtin_ia32_scattersiv8df:
10582  IID = Intrinsic::x86_avx512_mask_scatter_dpd_512;
10583  break;
10584  case X86::BI__builtin_ia32_scattersiv16sf:
10585  IID = Intrinsic::x86_avx512_mask_scatter_dps_512;
10586  break;
10587  case X86::BI__builtin_ia32_scatterdiv8df:
10588  IID = Intrinsic::x86_avx512_mask_scatter_qpd_512;
10589  break;
10590  case X86::BI__builtin_ia32_scatterdiv16sf:
10591  IID = Intrinsic::x86_avx512_mask_scatter_qps_512;
10592  break;
10593  case X86::BI__builtin_ia32_scattersiv8di:
10594  IID = Intrinsic::x86_avx512_mask_scatter_dpq_512;
10595  break;
10596  case X86::BI__builtin_ia32_scattersiv16si:
10597  IID = Intrinsic::x86_avx512_mask_scatter_dpi_512;
10598  break;
10599  case X86::BI__builtin_ia32_scatterdiv8di:
10600  IID = Intrinsic::x86_avx512_mask_scatter_qpq_512;
10601  break;
10602  case X86::BI__builtin_ia32_scatterdiv16si:
10603  IID = Intrinsic::x86_avx512_mask_scatter_qpi_512;
10604  break;
10605  case X86::BI__builtin_ia32_scatterdiv2df:
10606  IID = Intrinsic::x86_avx512_mask_scatterdiv2_df;
10607  break;
10608  case X86::BI__builtin_ia32_scatterdiv2di:
10609  IID = Intrinsic::x86_avx512_mask_scatterdiv2_di;
10610  break;
10611  case X86::BI__builtin_ia32_scatterdiv4df:
10612  IID = Intrinsic::x86_avx512_mask_scatterdiv4_df;
10613  break;
10614  case X86::BI__builtin_ia32_scatterdiv4di:
10615  IID = Intrinsic::x86_avx512_mask_scatterdiv4_di;
10616  break;
10617  case X86::BI__builtin_ia32_scatterdiv4sf:
10618  IID = Intrinsic::x86_avx512_mask_scatterdiv4_sf;
10619  break;
10620  case X86::BI__builtin_ia32_scatterdiv4si:
10621  IID = Intrinsic::x86_avx512_mask_scatterdiv4_si;
10622  break;
10623  case X86::BI__builtin_ia32_scatterdiv8sf:
10624  IID = Intrinsic::x86_avx512_mask_scatterdiv8_sf;
10625  break;
10626  case X86::BI__builtin_ia32_scatterdiv8si:
10627  IID = Intrinsic::x86_avx512_mask_scatterdiv8_si;
10628  break;
10629  case X86::BI__builtin_ia32_scattersiv2df:
10630  IID = Intrinsic::x86_avx512_mask_scattersiv2_df;
10631  break;
10632  case X86::BI__builtin_ia32_scattersiv2di:
10633  IID = Intrinsic::x86_avx512_mask_scattersiv2_di;
10634  break;
10635  case X86::BI__builtin_ia32_scattersiv4df:
10636  IID = Intrinsic::x86_avx512_mask_scattersiv4_df;
10637  break;
10638  case X86::BI__builtin_ia32_scattersiv4di:
10639  IID = Intrinsic::x86_avx512_mask_scattersiv4_di;
10640  break;
10641  case X86::BI__builtin_ia32_scattersiv4sf:
10642  IID = Intrinsic::x86_avx512_mask_scattersiv4_sf;
10643  break;
10644  case X86::BI__builtin_ia32_scattersiv4si:
10645  IID = Intrinsic::x86_avx512_mask_scattersiv4_si;
10646  break;
10647  case X86::BI__builtin_ia32_scattersiv8sf:
10648  IID = Intrinsic::x86_avx512_mask_scattersiv8_sf;
10649  break;
10650  case X86::BI__builtin_ia32_scattersiv8si:
10651  IID = Intrinsic::x86_avx512_mask_scattersiv8_si;
10652  break;
10653  }
10654 
10655  unsigned MinElts = std::min(Ops[2]->getType()->getVectorNumElements(),
10656  Ops[3]->getType()->getVectorNumElements());
10657  Ops[1] = getMaskVecValue(*this, Ops[1], MinElts);
10658  Function *Intr = CGM.getIntrinsic(IID);
10659  return Builder.CreateCall(Intr, Ops);
10660  }
10661 
10662  case X86::BI__builtin_ia32_vextractf128_pd256:
10663  case X86::BI__builtin_ia32_vextractf128_ps256:
10664  case X86::BI__builtin_ia32_vextractf128_si256:
10665  case X86::BI__builtin_ia32_extract128i256:
10666  case X86::BI__builtin_ia32_extractf64x4_mask:
10667  case X86::BI__builtin_ia32_extractf32x4_mask:
10668  case X86::BI__builtin_ia32_extracti64x4_mask:
10669  case X86::BI__builtin_ia32_extracti32x4_mask:
10670  case X86::BI__builtin_ia32_extractf32x8_mask:
10671  case X86::BI__builtin_ia32_extracti32x8_mask:
10672  case X86::BI__builtin_ia32_extractf32x4_256_mask:
10673  case X86::BI__builtin_ia32_extracti32x4_256_mask:
10674  case X86::BI__builtin_ia32_extractf64x2_256_mask:
10675  case X86::BI__builtin_ia32_extracti64x2_256_mask:
10676  case X86::BI__builtin_ia32_extractf64x2_512_mask:
10677  case X86::BI__builtin_ia32_extracti64x2_512_mask: {
10678  llvm::Type *DstTy = ConvertType(E->getType());
10679  unsigned NumElts = DstTy->getVectorNumElements();
10680  unsigned SrcNumElts = Ops[0]->getType()->getVectorNumElements();
10681  unsigned SubVectors = SrcNumElts / NumElts;
10682  unsigned Index = cast<ConstantInt>(Ops[1])->getZExtValue();
10683  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10684  Index &= SubVectors - 1; // Remove any extra bits.
10685  Index *= NumElts;
10686 
10687  uint32_t Indices[16];
10688  for (unsigned i = 0; i != NumElts; ++i)
10689  Indices[i] = i + Index;
10690 
10691  Value *Res = Builder.CreateShuffleVector(Ops[0],
10692  UndefValue::get(Ops[0]->getType()),
10693  makeArrayRef(Indices, NumElts),
10694  "extract");
10695 
10696  if (Ops.size() == 4)
10697  Res = EmitX86Select(*this, Ops[3], Res, Ops[2]);
10698 
10699  return Res;
10700  }
10701  case X86::BI__builtin_ia32_vinsertf128_pd256:
10702  case X86::BI__builtin_ia32_vinsertf128_ps256:
10703  case X86::BI__builtin_ia32_vinsertf128_si256:
10704  case X86::BI__builtin_ia32_insert128i256:
10705  case X86::BI__builtin_ia32_insertf64x4:
10706  case X86::BI__builtin_ia32_insertf32x4:
10707  case X86::BI__builtin_ia32_inserti64x4:
10708  case X86::BI__builtin_ia32_inserti32x4:
10709  case X86::BI__builtin_ia32_insertf32x8:
10710  case X86::BI__builtin_ia32_inserti32x8:
10711  case X86::BI__builtin_ia32_insertf32x4_256:
10712  case X86::BI__builtin_ia32_inserti32x4_256:
10713  case X86::BI__builtin_ia32_insertf64x2_256:
10714  case X86::BI__builtin_ia32_inserti64x2_256:
10715  case X86::BI__builtin_ia32_insertf64x2_512:
10716  case X86::BI__builtin_ia32_inserti64x2_512: {
10717  unsigned DstNumElts = Ops[0]->getType()->getVectorNumElements();
10718  unsigned SrcNumElts = Ops[1]->getType()->getVectorNumElements();
10719  unsigned SubVectors = DstNumElts / SrcNumElts;
10720  unsigned Index = cast<ConstantInt>(Ops[2])->getZExtValue();
10721  assert(llvm::isPowerOf2_32(SubVectors) && "Expected power of 2 subvectors");
10722  Index &= SubVectors - 1; // Remove any extra bits.
10723  Index *= SrcNumElts;
10724 
10725  uint32_t Indices[16];
10726  for (unsigned i = 0; i != DstNumElts; ++i)
10727  Indices[i] = (i >= SrcNumElts) ? SrcNumElts + (i % SrcNumElts) : i;
10728 
10729  Value *Op1 = Builder.CreateShuffleVector(Ops[1],
10730  UndefValue::get(Ops[1]->getType()),
10731  makeArrayRef(Indices, DstNumElts),
10732  "widen");
10733 
10734  for (unsigned i = 0; i != DstNumElts; ++i) {
10735  if (i >= Index && i < (Index + SrcNumElts))
10736  Indices[i] = (i - Index) + DstNumElts;
10737  else
10738  Indices[i] = i;
10739  }
10740 
10741  return Builder.CreateShuffleVector(Ops[0], Op1,
10742  makeArrayRef(Indices, DstNumElts),
10743  "insert");
10744  }
10745  case X86::BI__builtin_ia32_pmovqd512_mask:
10746  case X86::BI__builtin_ia32_pmovwb512_mask: {
10747  Value *Res = Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10748  return EmitX86Select(*this, Ops[2], Res, Ops[1]);
10749  }
10750  case X86::BI__builtin_ia32_pmovdb512_mask:
10751  case X86::BI__builtin_ia32_pmovdw512_mask:
10752  case X86::BI__builtin_ia32_pmovqw512_mask: {
10753  if (const auto *C = dyn_cast<Constant>(Ops[2]))
10754  if (C->isAllOnesValue())
10755  return Builder.CreateTrunc(Ops[0], Ops[1]->getType());
10756 
10757  Intrinsic::ID IID;
10758  switch (BuiltinID) {
10759  default: llvm_unreachable("Unsupported intrinsic!");
10760  case X86::BI__builtin_ia32_pmovdb512_mask:
10761  IID = Intrinsic::x86_avx512_mask_pmov_db_512;
10762  break;
10763  case X86::BI__builtin_ia32_pmovdw512_mask:
10764  IID = Intrinsic::x86_avx512_mask_pmov_dw_512;
10765  break;
10766  case X86::BI__builtin_ia32_pmovqw512_mask:
10767  IID = Intrinsic::x86_avx512_mask_pmov_qw_512;
10768  break;
10769  }
10770 
10771  Function *Intr = CGM.getIntrinsic(IID);
10772  return Builder.CreateCall(Intr, Ops);
10773  }
10774  case X86::BI__builtin_ia32_pblendw128:
10775  case X86::BI__builtin_ia32_blendpd:
10776  case X86::BI__builtin_ia32_blendps:
10777  case X86::BI__builtin_ia32_blendpd256:
10778  case X86::BI__builtin_ia32_blendps256:
10779  case X86::BI__builtin_ia32_pblendw256:
10780  case X86::BI__builtin_ia32_pblendd128:
10781  case X86::BI__builtin_ia32_pblendd256: {
10782  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10783  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10784 
10785  uint32_t Indices[16];
10786  // If there are more than 8 elements, the immediate is used twice so make
10787  // sure we handle that.
10788  for (unsigned i = 0; i != NumElts; ++i)
10789  Indices[i] = ((Imm >> (i % 8)) & 0x1) ? NumElts + i : i;
10790 
10791  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10792  makeArrayRef(Indices, NumElts),
10793  "blend");
10794  }
10795  case X86::BI__builtin_ia32_pshuflw:
10796  case X86::BI__builtin_ia32_pshuflw256:
10797  case X86::BI__builtin_ia32_pshuflw512: {
10798  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10799  llvm::Type *Ty = Ops[0]->getType();
10800  unsigned NumElts = Ty->getVectorNumElements();
10801 
10802  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10803  Imm = (Imm & 0xff) * 0x01010101;
10804 
10805  uint32_t Indices[32];
10806  for (unsigned l = 0; l != NumElts; l += 8) {
10807  for (unsigned i = 0; i != 4; ++i) {
10808  Indices[l + i] = l + (Imm & 3);
10809  Imm >>= 2;
10810  }
10811  for (unsigned i = 4; i != 8; ++i)
10812  Indices[l + i] = l + i;
10813  }
10814 
10815  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10816  makeArrayRef(Indices, NumElts),
10817  "pshuflw");
10818  }
10819  case X86::BI__builtin_ia32_pshufhw:
10820  case X86::BI__builtin_ia32_pshufhw256:
10821  case X86::BI__builtin_ia32_pshufhw512: {
10822  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10823  llvm::Type *Ty = Ops[0]->getType();
10824  unsigned NumElts = Ty->getVectorNumElements();
10825 
10826  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10827  Imm = (Imm & 0xff) * 0x01010101;
10828 
10829  uint32_t Indices[32];
10830  for (unsigned l = 0; l != NumElts; l += 8) {
10831  for (unsigned i = 0; i != 4; ++i)
10832  Indices[l + i] = l + i;
10833  for (unsigned i = 4; i != 8; ++i) {
10834  Indices[l + i] = l + 4 + (Imm & 3);
10835  Imm >>= 2;
10836  }
10837  }
10838 
10839  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10840  makeArrayRef(Indices, NumElts),
10841  "pshufhw");
10842  }
10843  case X86::BI__builtin_ia32_pshufd:
10844  case X86::BI__builtin_ia32_pshufd256:
10845  case X86::BI__builtin_ia32_pshufd512:
10846  case X86::BI__builtin_ia32_vpermilpd:
10847  case X86::BI__builtin_ia32_vpermilps:
10848  case X86::BI__builtin_ia32_vpermilpd256:
10849  case X86::BI__builtin_ia32_vpermilps256:
10850  case X86::BI__builtin_ia32_vpermilpd512:
10851  case X86::BI__builtin_ia32_vpermilps512: {
10852  uint32_t Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10853  llvm::Type *Ty = Ops[0]->getType();
10854  unsigned NumElts = Ty->getVectorNumElements();
10855  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10856  unsigned NumLaneElts = NumElts / NumLanes;
10857 
10858  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10859  Imm = (Imm & 0xff) * 0x01010101;
10860 
10861  uint32_t Indices[16];
10862  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10863  for (unsigned i = 0; i != NumLaneElts; ++i) {
10864  Indices[i + l] = (Imm % NumLaneElts) + l;
10865  Imm /= NumLaneElts;
10866  }
10867  }
10868 
10869  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10870  makeArrayRef(Indices, NumElts),
10871  "permil");
10872  }
10873  case X86::BI__builtin_ia32_shufpd:
10874  case X86::BI__builtin_ia32_shufpd256:
10875  case X86::BI__builtin_ia32_shufpd512:
10876  case X86::BI__builtin_ia32_shufps:
10877  case X86::BI__builtin_ia32_shufps256:
10878  case X86::BI__builtin_ia32_shufps512: {
10879  uint32_t Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10880  llvm::Type *Ty = Ops[0]->getType();
10881  unsigned NumElts = Ty->getVectorNumElements();
10882  unsigned NumLanes = Ty->getPrimitiveSizeInBits() / 128;
10883  unsigned NumLaneElts = NumElts / NumLanes;
10884 
10885  // Splat the 8-bits of immediate 4 times to help the loop wrap around.
10886  Imm = (Imm & 0xff) * 0x01010101;
10887 
10888  uint32_t Indices[16];
10889  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10890  for (unsigned i = 0; i != NumLaneElts; ++i) {
10891  unsigned Index = Imm % NumLaneElts;
10892  Imm /= NumLaneElts;
10893  if (i >= (NumLaneElts / 2))
10894  Index += NumElts;
10895  Indices[l + i] = l + Index;
10896  }
10897  }
10898 
10899  return Builder.CreateShuffleVector(Ops[0], Ops[1],
10900  makeArrayRef(Indices, NumElts),
10901  "shufp");
10902  }
10903  case X86::BI__builtin_ia32_permdi256:
10904  case X86::BI__builtin_ia32_permdf256:
10905  case X86::BI__builtin_ia32_permdi512:
10906  case X86::BI__builtin_ia32_permdf512: {
10907  unsigned Imm = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
10908  llvm::Type *Ty = Ops[0]->getType();
10909  unsigned NumElts = Ty->getVectorNumElements();
10910 
10911  // These intrinsics operate on 256-bit lanes of four 64-bit elements.
10912  uint32_t Indices[8];
10913  for (unsigned l = 0; l != NumElts; l += 4)
10914  for (unsigned i = 0; i != 4; ++i)
10915  Indices[l + i] = l + ((Imm >> (2 * i)) & 0x3);
10916 
10917  return Builder.CreateShuffleVector(Ops[0], UndefValue::get(Ty),
10918  makeArrayRef(Indices, NumElts),
10919  "perm");
10920  }
10921  case X86::BI__builtin_ia32_palignr128:
10922  case X86::BI__builtin_ia32_palignr256:
10923  case X86::BI__builtin_ia32_palignr512: {
10924  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10925 
10926  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10927  assert(NumElts % 16 == 0);
10928 
10929  // If palignr is shifting the pair of vectors more than the size of two
10930  // lanes, emit zero.
10931  if (ShiftVal >= 32)
10932  return llvm::Constant::getNullValue(ConvertType(E->getType()));
10933 
10934  // If palignr is shifting the pair of input vectors more than one lane,
10935  // but less than two lanes, convert to shifting in zeroes.
10936  if (ShiftVal > 16) {
10937  ShiftVal -= 16;
10938  Ops[1] = Ops[0];
10939  Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
10940  }
10941 
10942  uint32_t Indices[64];
10943  // 256-bit palignr operates on 128-bit lanes so we need to handle that
10944  for (unsigned l = 0; l != NumElts; l += 16) {
10945  for (unsigned i = 0; i != 16; ++i) {
10946  unsigned Idx = ShiftVal + i;
10947  if (Idx >= 16)
10948  Idx += NumElts - 16; // End of lane, switch operand.
10949  Indices[l + i] = Idx + l;
10950  }
10951  }
10952 
10953  return Builder.CreateShuffleVector(Ops[1], Ops[0],
10954  makeArrayRef(Indices, NumElts),
10955  "palignr");
10956  }
10957  case X86::BI__builtin_ia32_alignd128:
10958  case X86::BI__builtin_ia32_alignd256:
10959  case X86::BI__builtin_ia32_alignd512:
10960  case X86::BI__builtin_ia32_alignq128:
10961  case X86::BI__builtin_ia32_alignq256:
10962  case X86::BI__builtin_ia32_alignq512: {
10963  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
10964  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0xff;
10965 
10966  // Mask the shift amount to width of two vectors.
10967  ShiftVal &= (2 * NumElts) - 1;
10968 
10969  uint32_t Indices[16];
10970  for (unsigned i = 0; i != NumElts; ++i)
10971  Indices[i] = i + ShiftVal;
10972 
10973  return Builder.CreateShuffleVector(Ops[1], Ops[0],
10974  makeArrayRef(Indices, NumElts),
10975  "valign");
10976  }
10977  case X86::BI__builtin_ia32_shuf_f32x4_256:
10978  case X86::BI__builtin_ia32_shuf_f64x2_256:
10979  case X86::BI__builtin_ia32_shuf_i32x4_256:
10980  case X86::BI__builtin_ia32_shuf_i64x2_256:
10981  case X86::BI__builtin_ia32_shuf_f32x4:
10982  case X86::BI__builtin_ia32_shuf_f64x2:
10983  case X86::BI__builtin_ia32_shuf_i32x4:
10984  case X86::BI__builtin_ia32_shuf_i64x2: {
10985  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
10986  llvm::Type *Ty = Ops[0]->getType();
10987  unsigned NumElts = Ty->getVectorNumElements();
10988  unsigned NumLanes = Ty->getPrimitiveSizeInBits() == 512 ? 4 : 2;
10989  unsigned NumLaneElts = NumElts / NumLanes;
10990 
10991  uint32_t Indices[16];
10992  for (unsigned l = 0; l != NumElts; l += NumLaneElts) {
10993  unsigned Index = (Imm % NumLanes) * NumLaneElts;
10994  Imm /= NumLanes; // Discard the bits we just used.
10995  if (l >= (NumElts / 2))
10996  Index += NumElts; // Switch to other source.
10997  for (unsigned i = 0; i != NumLaneElts; ++i) {
10998  Indices[l + i] = Index + i;
10999  }
11000  }
11001 
11002  return Builder.CreateShuffleVector(Ops[0], Ops[1],
11003  makeArrayRef(Indices, NumElts),
11004  "shuf");
11005  }
11006 
11007  case X86::BI__builtin_ia32_vperm2f128_pd256:
11008  case X86::BI__builtin_ia32_vperm2f128_ps256:
11009  case X86::BI__builtin_ia32_vperm2f128_si256:
11010  case X86::BI__builtin_ia32_permti256: {
11011  unsigned Imm = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
11012  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11013 
11014  // This takes a very simple approach since there are two lanes and a
11015  // shuffle can have 2 inputs. So we reserve the first input for the first
11016  // lane and the second input for the second lane. This may result in
11017  // duplicate sources, but this can be dealt with in the backend.
11018 
11019  Value *OutOps[2];
11020  uint32_t Indices[8];
11021  for (unsigned l = 0; l != 2; ++l) {
11022  // Determine the source for this lane.
11023  if (Imm & (1 << ((l * 4) + 3)))
11024  OutOps[l] = llvm::ConstantAggregateZero::get(Ops[0]->getType());
11025  else if (Imm & (1 << ((l * 4) + 1)))
11026  OutOps[l] = Ops[1];
11027  else
11028  OutOps[l] = Ops[0];
11029 
11030  for (unsigned i = 0; i != NumElts/2; ++i) {
11031  // Start with ith element of the source for this lane.
11032  unsigned Idx = (l * NumElts) + i;
11033  // If bit 0 of the immediate half is set, switch to the high half of
11034  // the source.
11035  if (Imm & (1 << (l * 4)))
11036  Idx += NumElts/2;
11037  Indices[(l * (NumElts/2)) + i] = Idx;
11038  }
11039  }
11040 
11041  return Builder.CreateShuffleVector(OutOps[0], OutOps[1],
11042  makeArrayRef(Indices, NumElts),
11043  "vperm");
11044  }
11045 
11046  case X86::BI__builtin_ia32_pslldqi128_byteshift:
11047  case X86::BI__builtin_ia32_pslldqi256_byteshift:
11048  case X86::BI__builtin_ia32_pslldqi512_byteshift: {
11049  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
11050  llvm::Type *ResultType = Ops[0]->getType();
11051  // Builtin type is vXi64 so multiply by 8 to get bytes.
11052  unsigned NumElts = ResultType->getVectorNumElements() * 8;
11053 
11054  // If pslldq is shifting the vector more than 15 bytes, emit zero.
11055  if (ShiftVal >= 16)
11056  return llvm::Constant::getNullValue(ResultType);
11057 
11058  uint32_t Indices[64];
11059  // 256/512-bit pslldq operates on 128-bit lanes so we need to handle that
11060  for (unsigned l = 0; l != NumElts; l += 16) {
11061  for (unsigned i = 0; i != 16; ++i) {
11062  unsigned Idx = NumElts + i - ShiftVal;
11063  if (Idx < NumElts) Idx -= NumElts - 16; // end of lane, switch operand.
11064  Indices[l + i] = Idx + l;
11065  }
11066  }
11067 
11068  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
11069  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
11070  Value *Zero = llvm::Constant::getNullValue(VecTy);
11071  Value *SV = Builder.CreateShuffleVector(Zero, Cast,
11072  makeArrayRef(Indices, NumElts),
11073  "pslldq");
11074  return Builder.CreateBitCast(SV, Ops[0]->getType(), "cast");
11075  }
11076  case X86::BI__builtin_ia32_psrldqi128_byteshift:
11077  case X86::BI__builtin_ia32_psrldqi256_byteshift:
11078  case X86::BI__builtin_ia32_psrldqi512_byteshift: {
11079  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
11080  llvm::Type *ResultType = Ops[0]->getType();
11081  // Builtin type is vXi64 so multiply by 8 to get bytes.
11082  unsigned NumElts = ResultType->getVectorNumElements() * 8;
11083 
11084  // If psrldq is shifting the vector more than 15 bytes, emit zero.
11085  if (ShiftVal >= 16)
11086  return llvm::Constant::getNullValue(ResultType);
11087 
11088  uint32_t Indices[64];
11089  // 256/512-bit psrldq operates on 128-bit lanes so we need to handle that
11090  for (unsigned l = 0; l != NumElts; l += 16) {
11091  for (unsigned i = 0; i != 16; ++i) {
11092  unsigned Idx = i + ShiftVal;
11093  if (Idx >= 16) Idx += NumElts - 16; // end of lane, switch operand.
11094  Indices[l + i] = Idx + l;
11095  }
11096  }
11097 
11098  llvm::Type *VecTy = llvm::VectorType::get(Int8Ty, NumElts);
11099  Value *Cast = Builder.CreateBitCast(Ops[0], VecTy, "cast");
11100  Value *Zero = llvm::Constant::getNullValue(VecTy);
11101  Value *SV = Builder.CreateShuffleVector(Cast, Zero,
11102  makeArrayRef(Indices, NumElts),
11103  "psrldq");
11104  return Builder.CreateBitCast(SV, ResultType, "cast");
11105  }
11106  case X86::BI__builtin_ia32_kshiftliqi:
11107  case X86::BI__builtin_ia32_kshiftlihi:
11108  case X86::BI__builtin_ia32_kshiftlisi:
11109  case X86::BI__builtin_ia32_kshiftlidi: {
11110  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
11111  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11112 
11113  if (ShiftVal >= NumElts)
11114  return llvm::Constant::getNullValue(Ops[0]->getType());
11115 
11116  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
11117 
11118  uint32_t Indices[64];
11119  for (unsigned i = 0; i != NumElts; ++i)
11120  Indices[i] = NumElts + i - ShiftVal;
11121 
11122  Value *Zero = llvm::Constant::getNullValue(In->getType());
11123  Value *SV = Builder.CreateShuffleVector(Zero, In,
11124  makeArrayRef(Indices, NumElts),
11125  "kshiftl");
11126  return Builder.CreateBitCast(SV, Ops[0]->getType());
11127  }
11128  case X86::BI__builtin_ia32_kshiftriqi:
11129  case X86::BI__builtin_ia32_kshiftrihi:
11130  case X86::BI__builtin_ia32_kshiftrisi:
11131  case X86::BI__builtin_ia32_kshiftridi: {
11132  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[1])->getZExtValue() & 0xff;
11133  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11134 
11135  if (ShiftVal >= NumElts)
11136  return llvm::Constant::getNullValue(Ops[0]->getType());
11137 
11138  Value *In = getMaskVecValue(*this, Ops[0], NumElts);
11139 
11140  uint32_t Indices[64];
11141  for (unsigned i = 0; i != NumElts; ++i)
11142  Indices[i] = i + ShiftVal;
11143 
11144  Value *Zero = llvm::Constant::getNullValue(In->getType());
11145  Value *SV = Builder.CreateShuffleVector(In, Zero,
11146  makeArrayRef(Indices, NumElts),
11147  "kshiftr");
11148  return Builder.CreateBitCast(SV, Ops[0]->getType());
11149  }
11150  case X86::BI__builtin_ia32_movnti:
11151  case X86::BI__builtin_ia32_movnti64:
11152  case X86::BI__builtin_ia32_movntsd:
11153  case X86::BI__builtin_ia32_movntss: {
11154  llvm::MDNode *Node = llvm::MDNode::get(
11155  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
11156 
11157  Value *Ptr = Ops[0];
11158  Value *Src = Ops[1];
11159 
11160  // Extract the 0'th element of the source vector.
11161  if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
11162  BuiltinID == X86::BI__builtin_ia32_movntss)
11163  Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
11164 
11165  // Convert the type of the pointer to a pointer to the stored type.
11166  Value *BC = Builder.CreateBitCast(
11167  Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
11168 
11169  // Unaligned nontemporal store of the scalar value.
11170  StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
11171  SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
11172  SI->setAlignment(1);
11173  return SI;
11174  }
11175  // Rotate is a special case of funnel shift - 1st 2 args are the same.
11176  case X86::BI__builtin_ia32_vprotb:
11177  case X86::BI__builtin_ia32_vprotw:
11178  case X86::BI__builtin_ia32_vprotd:
11179  case X86::BI__builtin_ia32_vprotq:
11180  case X86::BI__builtin_ia32_vprotbi:
11181  case X86::BI__builtin_ia32_vprotwi:
11182  case X86::BI__builtin_ia32_vprotdi:
11183  case X86::BI__builtin_ia32_vprotqi:
11184  case X86::BI__builtin_ia32_prold128:
11185  case X86::BI__builtin_ia32_prold256:
11186  case X86::BI__builtin_ia32_prold512:
11187  case X86::BI__builtin_ia32_prolq128:
11188  case X86::BI__builtin_ia32_prolq256:
11189  case X86::BI__builtin_ia32_prolq512:
11190  case X86::BI__builtin_ia32_prolvd128:
11191  case X86::BI__builtin_ia32_prolvd256:
11192  case X86::BI__builtin_ia32_prolvd512:
11193  case X86::BI__builtin_ia32_prolvq128:
11194  case X86::BI__builtin_ia32_prolvq256:
11195  case X86::BI__builtin_ia32_prolvq512:
11196  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], false);
11197  case X86::BI__builtin_ia32_prord128:
11198  case X86::BI__builtin_ia32_prord256:
11199  case X86::BI__builtin_ia32_prord512:
11200  case X86::BI__builtin_ia32_prorq128:
11201  case X86::BI__builtin_ia32_prorq256:
11202  case X86::BI__builtin_ia32_prorq512:
11203  case X86::BI__builtin_ia32_prorvd128:
11204  case X86::BI__builtin_ia32_prorvd256:
11205  case X86::BI__builtin_ia32_prorvd512:
11206  case X86::BI__builtin_ia32_prorvq128:
11207  case X86::BI__builtin_ia32_prorvq256:
11208  case X86::BI__builtin_ia32_prorvq512:
11209  return EmitX86FunnelShift(*this, Ops[0], Ops[0], Ops[1], true);
11210  case X86::BI__builtin_ia32_selectb_128:
11211  case X86::BI__builtin_ia32_selectb_256:
11212  case X86::BI__builtin_ia32_selectb_512:
11213  case X86::BI__builtin_ia32_selectw_128:
11214  case X86::BI__builtin_ia32_selectw_256:
11215  case X86::BI__builtin_ia32_selectw_512:
11216  case X86::BI__builtin_ia32_selectd_128:
11217  case X86::BI__builtin_ia32_selectd_256:
11218  case X86::BI__builtin_ia32_selectd_512:
11219  case X86::BI__builtin_ia32_selectq_128:
11220  case X86::BI__builtin_ia32_selectq_256:
11221  case X86::BI__builtin_ia32_selectq_512:
11222  case X86::BI__builtin_ia32_selectps_128:
11223  case X86::BI__builtin_ia32_selectps_256:
11224  case X86::BI__builtin_ia32_selectps_512:
11225  case X86::BI__builtin_ia32_selectpd_128:
11226  case X86::BI__builtin_ia32_selectpd_256:
11227  case X86::BI__builtin_ia32_selectpd_512:
11228  return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
11229  case X86::BI__builtin_ia32_selectss_128:
11230  case X86::BI__builtin_ia32_selectsd_128: {
11231  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
11232  Value *B = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
11233  A = EmitX86ScalarSelect(*this, Ops[0], A, B);
11234  return Builder.CreateInsertElement(Ops[1], A, (uint64_t)0);
11235  }
11236  case X86::BI__builtin_ia32_cmpb128_mask:
11237  case X86::BI__builtin_ia32_cmpb256_mask:
11238  case X86::BI__builtin_ia32_cmpb512_mask:
11239  case X86::BI__builtin_ia32_cmpw128_mask:
11240  case X86::BI__builtin_ia32_cmpw256_mask:
11241  case X86::BI__builtin_ia32_cmpw512_mask:
11242  case X86::BI__builtin_ia32_cmpd128_mask:
11243  case X86::BI__builtin_ia32_cmpd256_mask:
11244  case X86::BI__builtin_ia32_cmpd512_mask:
11245  case X86::BI__builtin_ia32_cmpq128_mask:
11246  case X86::BI__builtin_ia32_cmpq256_mask:
11247  case X86::BI__builtin_ia32_cmpq512_mask: {
11248  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
11249  return EmitX86MaskedCompare(*this, CC, true, Ops);
11250  }
11251  case X86::BI__builtin_ia32_ucmpb128_mask:
11252  case X86::BI__builtin_ia32_ucmpb256_mask:
11253  case X86::BI__builtin_ia32_ucmpb512_mask:
11254  case X86::BI__builtin_ia32_ucmpw128_mask:
11255  case X86::BI__builtin_ia32_ucmpw256_mask:
11256  case X86::BI__builtin_ia32_ucmpw512_mask:
11257  case X86::BI__builtin_ia32_ucmpd128_mask:
11258  case X86::BI__builtin_ia32_ucmpd256_mask:
11259  case X86::BI__builtin_ia32_ucmpd512_mask:
11260  case X86::BI__builtin_ia32_ucmpq128_mask:
11261  case X86::BI__builtin_ia32_ucmpq256_mask:
11262  case X86::BI__builtin_ia32_ucmpq512_mask: {
11263  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
11264  return EmitX86MaskedCompare(*this, CC, false, Ops);
11265  }
11266  case X86::BI__builtin_ia32_vpcomb:
11267  case X86::BI__builtin_ia32_vpcomw:
11268  case X86::BI__builtin_ia32_vpcomd:
11269  case X86::BI__builtin_ia32_vpcomq:
11270  return EmitX86vpcom(*this, Ops, true);
11271  case X86::BI__builtin_ia32_vpcomub:
11272  case X86::BI__builtin_ia32_vpcomuw:
11273  case X86::BI__builtin_ia32_vpcomud:
11274  case X86::BI__builtin_ia32_vpcomuq:
11275  return EmitX86vpcom(*this, Ops, false);
11276 
11277  case X86::BI__builtin_ia32_kortestcqi:
11278  case X86::BI__builtin_ia32_kortestchi:
11279  case X86::BI__builtin_ia32_kortestcsi:
11280  case X86::BI__builtin_ia32_kortestcdi: {
11281  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
11282  Value *C = llvm::Constant::getAllOnesValue(Ops[0]->getType());
11283  Value *Cmp = Builder.CreateICmpEQ(Or, C);
11284  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
11285  }
11286  case X86::BI__builtin_ia32_kortestzqi:
11287  case X86::BI__builtin_ia32_kortestzhi:
11288  case X86::BI__builtin_ia32_kortestzsi:
11289  case X86::BI__builtin_ia32_kortestzdi: {
11290  Value *Or = EmitX86MaskLogic(*this, Instruction::Or, Ops);
11291  Value *C = llvm::Constant::getNullValue(Ops[0]->getType());
11292  Value *Cmp = Builder.CreateICmpEQ(Or, C);
11293  return Builder.CreateZExt(Cmp, ConvertType(E->getType()));
11294  }
11295 
11296  case X86::BI__builtin_ia32_ktestcqi:
11297  case X86::BI__builtin_ia32_ktestzqi:
11298  case X86::BI__builtin_ia32_ktestchi:
11299  case X86::BI__builtin_ia32_ktestzhi:
11300  case X86::BI__builtin_ia32_ktestcsi:
11301  case X86::BI__builtin_ia32_ktestzsi:
11302  case X86::BI__builtin_ia32_ktestcdi:
11303  case X86::BI__builtin_ia32_ktestzdi: {
11304  Intrinsic::ID IID;
11305  switch (BuiltinID) {
11306  default: llvm_unreachable("Unsupported intrinsic!");
11307  case X86::BI__builtin_ia32_ktestcqi:
11308  IID = Intrinsic::x86_avx512_ktestc_b;
11309  break;
11310  case X86::BI__builtin_ia32_ktestzqi:
11311  IID = Intrinsic::x86_avx512_ktestz_b;
11312  break;
11313  case X86::BI__builtin_ia32_ktestchi:
11314  IID = Intrinsic::x86_avx512_ktestc_w;
11315  break;
11316  case X86::BI__builtin_ia32_ktestzhi:
11317  IID = Intrinsic::x86_avx512_ktestz_w;
11318  break;
11319  case X86::BI__builtin_ia32_ktestcsi:
11320  IID = Intrinsic::x86_avx512_ktestc_d;
11321  break;
11322  case X86::BI__builtin_ia32_ktestzsi:
11323  IID = Intrinsic::x86_avx512_ktestz_d;
11324  break;
11325  case X86::BI__builtin_ia32_ktestcdi:
11326  IID = Intrinsic::x86_avx512_ktestc_q;
11327  break;
11328  case X86::BI__builtin_ia32_ktestzdi:
11329  IID = Intrinsic::x86_avx512_ktestz_q;
11330  break;
11331  }
11332 
11333  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11334  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
11335  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
11336  Function *Intr = CGM.getIntrinsic(IID);
11337  return Builder.CreateCall(Intr, {LHS, RHS});
11338  }
11339 
11340  case X86::BI__builtin_ia32_kaddqi:
11341  case X86::BI__builtin_ia32_kaddhi:
11342  case X86::BI__builtin_ia32_kaddsi:
11343  case X86::BI__builtin_ia32_kadddi: {
11344  Intrinsic::ID IID;
11345  switch (BuiltinID) {
11346  default: llvm_unreachable("Unsupported intrinsic!");
11347  case X86::BI__builtin_ia32_kaddqi:
11348  IID = Intrinsic::x86_avx512_kadd_b;
11349  break;
11350  case X86::BI__builtin_ia32_kaddhi:
11351  IID = Intrinsic::x86_avx512_kadd_w;
11352  break;
11353  case X86::BI__builtin_ia32_kaddsi:
11354  IID = Intrinsic::x86_avx512_kadd_d;
11355  break;
11356  case X86::BI__builtin_ia32_kadddi:
11357  IID = Intrinsic::x86_avx512_kadd_q;
11358  break;
11359  }
11360 
11361  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11362  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
11363  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
11364  Function *Intr = CGM.getIntrinsic(IID);
11365  Value *Res = Builder.CreateCall(Intr, {LHS, RHS});
11366  return Builder.CreateBitCast(Res, Ops[0]->getType());
11367  }
11368  case X86::BI__builtin_ia32_kandqi:
11369  case X86::BI__builtin_ia32_kandhi:
11370  case X86::BI__builtin_ia32_kandsi:
11371  case X86::BI__builtin_ia32_kanddi:
11372  return EmitX86MaskLogic(*this, Instruction::And, Ops);
11373  case X86::BI__builtin_ia32_kandnqi:
11374  case X86::BI__builtin_ia32_kandnhi:
11375  case X86::BI__builtin_ia32_kandnsi:
11376  case X86::BI__builtin_ia32_kandndi:
11377  return EmitX86MaskLogic(*this, Instruction::And, Ops, true);
11378  case X86::BI__builtin_ia32_korqi:
11379  case X86::BI__builtin_ia32_korhi:
11380  case X86::BI__builtin_ia32_korsi:
11381  case X86::BI__builtin_ia32_kordi:
11382  return EmitX86MaskLogic(*this, Instruction::Or, Ops);
11383  case X86::BI__builtin_ia32_kxnorqi:
11384  case X86::BI__builtin_ia32_kxnorhi:
11385  case X86::BI__builtin_ia32_kxnorsi:
11386  case X86::BI__builtin_ia32_kxnordi:
11387  return EmitX86MaskLogic(*this, Instruction::Xor, Ops, true);
11388  case X86::BI__builtin_ia32_kxorqi:
11389  case X86::BI__builtin_ia32_kxorhi:
11390  case X86::BI__builtin_ia32_kxorsi:
11391  case X86::BI__builtin_ia32_kxordi:
11392  return EmitX86MaskLogic(*this, Instruction::Xor, Ops);
11393  case X86::BI__builtin_ia32_knotqi:
11394  case X86::BI__builtin_ia32_knothi:
11395  case X86::BI__builtin_ia32_knotsi:
11396  case X86::BI__builtin_ia32_knotdi: {
11397  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11398  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
11399  return Builder.CreateBitCast(Builder.CreateNot(Res),
11400  Ops[0]->getType());
11401  }
11402  case X86::BI__builtin_ia32_kmovb:
11403  case X86::BI__builtin_ia32_kmovw:
11404  case X86::BI__builtin_ia32_kmovd:
11405  case X86::BI__builtin_ia32_kmovq: {
11406  // Bitcast to vXi1 type and then back to integer. This gets the mask
11407  // register type into the IR, but might be optimized out depending on
11408  // what's around it.
11409  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11410  Value *Res = getMaskVecValue(*this, Ops[0], NumElts);
11411  return Builder.CreateBitCast(Res, Ops[0]->getType());
11412  }
11413 
11414  case X86::BI__builtin_ia32_kunpckdi:
11415  case X86::BI__builtin_ia32_kunpcksi:
11416  case X86::BI__builtin_ia32_kunpckhi: {
11417  unsigned NumElts = Ops[0]->getType()->getIntegerBitWidth();
11418  Value *LHS = getMaskVecValue(*this, Ops[0], NumElts);
11419  Value *RHS = getMaskVecValue(*this, Ops[1], NumElts);
11420  uint32_t Indices[64];
11421  for (unsigned i = 0; i != NumElts; ++i)
11422  Indices[i] = i;
11423 
11424  // First extract half of each vector. This gives better codegen than
11425  // doing it in a single shuffle.
11426  LHS = Builder.CreateShuffleVector(LHS, LHS,
11427  makeArrayRef(Indices, NumElts / 2));
11428  RHS = Builder.CreateShuffleVector(RHS, RHS,
11429  makeArrayRef(Indices, NumElts / 2));
11430  // Concat the vectors.
11431  // NOTE: Operands are swapped to match the intrinsic definition.
11432  Value *Res = Builder.CreateShuffleVector(RHS, LHS,
11433  makeArrayRef(Indices, NumElts));
11434  return Builder.CreateBitCast(Res, Ops[0]->getType());
11435  }
11436 
11437  case X86::BI__builtin_ia32_vplzcntd_128:
11438  case X86::BI__builtin_ia32_vplzcntd_256:
11439  case X86::BI__builtin_ia32_vplzcntd_512:
11440  case X86::BI__builtin_ia32_vplzcntq_128:
11441  case X86::BI__builtin_ia32_vplzcntq_256:
11442  case X86::BI__builtin_ia32_vplzcntq_512: {
11443  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
11444  return Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)});
11445  }
11446  case X86::BI__builtin_ia32_sqrtss:
11447  case X86::BI__builtin_ia32_sqrtsd: {
11448  Value *A = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
11449  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
11450  A = Builder.CreateCall(F, {A});
11451  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
11452  }
11453  case X86::BI__builtin_ia32_sqrtsd_round_mask:
11454  case X86::BI__builtin_ia32_sqrtss_round_mask: {
11455  unsigned CC = cast<llvm::ConstantInt>(Ops[4])->getZExtValue();
11456  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
11457  // otherwise keep the intrinsic.
11458  if (CC != 4) {
11459  Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtsd_round_mask ?
11460  Intrinsic::x86_avx512_mask_sqrt_sd :
11461  Intrinsic::x86_avx512_mask_sqrt_ss;
11462  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
11463  }
11464  Value *A = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
11465  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, A->getType());
11466  A = Builder.CreateCall(F, A);
11467  Value *Src = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
11468  A = EmitX86ScalarSelect(*this, Ops[3], A, Src);
11469  return Builder.CreateInsertElement(Ops[0], A, (uint64_t)0);
11470  }
11471  case X86::BI__builtin_ia32_sqrtpd256:
11472  case X86::BI__builtin_ia32_sqrtpd:
11473  case X86::BI__builtin_ia32_sqrtps256:
11474  case X86::BI__builtin_ia32_sqrtps:
11475  case X86::BI__builtin_ia32_sqrtps512:
11476  case X86::BI__builtin_ia32_sqrtpd512: {
11477  if (Ops.size() == 2) {
11478  unsigned CC = cast<llvm::ConstantInt>(Ops[1])->getZExtValue();
11479  // Support only if the rounding mode is 4 (AKA CUR_DIRECTION),
11480  // otherwise keep the intrinsic.
11481  if (CC != 4) {
11482  Intrinsic::ID IID = BuiltinID == X86::BI__builtin_ia32_sqrtps512 ?
11483  Intrinsic::x86_avx512_sqrt_ps_512 :
11484  Intrinsic::x86_avx512_sqrt_pd_512;
11485  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
11486  }
11487  }
11488  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, Ops[0]->getType());
11489  return Builder.CreateCall(F, Ops[0]);
11490  }
11491  case X86::BI__builtin_ia32_pabsb128:
11492  case X86::BI__builtin_ia32_pabsw128:
11493  case X86::BI__builtin_ia32_pabsd128:
11494  case X86::BI__builtin_ia32_pabsb256:
11495  case X86::BI__builtin_ia32_pabsw256:
11496  case X86::BI__builtin_ia32_pabsd256:
11497  case X86::BI__builtin_ia32_pabsq128:
11498  case X86::BI__builtin_ia32_pabsq256:
11499  case X86::BI__builtin_ia32_pabsb512:
11500  case X86::BI__builtin_ia32_pabsw512:
11501  case X86::BI__builtin_ia32_pabsd512:
11502  case X86::BI__builtin_ia32_pabsq512:
11503  return EmitX86Abs(*this, Ops);
11504 
11505  case X86::BI__builtin_ia32_pmaxsb128:
11506  case X86::BI__builtin_ia32_pmaxsw128:
11507  case X86::BI__builtin_ia32_pmaxsd128:
11508  case X86::BI__builtin_ia32_pmaxsq128:
11509  case X86::BI__builtin_ia32_pmaxsb256:
11510  case X86::BI__builtin_ia32_pmaxsw256:
11511  case X86::BI__builtin_ia32_pmaxsd256:
11512  case X86::BI__builtin_ia32_pmaxsq256:
11513  case X86::BI__builtin_ia32_pmaxsb512:
11514  case X86::BI__builtin_ia32_pmaxsw512:
11515  case X86::BI__builtin_ia32_pmaxsd512:
11516  case X86::BI__builtin_ia32_pmaxsq512:
11517  return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
11518  case X86::BI__builtin_ia32_pmaxub128:
11519  case X86::BI__builtin_ia32_pmaxuw128:
11520  case X86::BI__builtin_ia32_pmaxud128:
11521  case X86::BI__builtin_ia32_pmaxuq128:
11522  case X86::BI__builtin_ia32_pmaxub256:
11523  case X86::BI__builtin_ia32_pmaxuw256:
11524  case X86::BI__builtin_ia32_pmaxud256:
11525  case X86::BI__builtin_ia32_pmaxuq256:
11526  case X86::BI__builtin_ia32_pmaxub512:
11527  case X86::BI__builtin_ia32_pmaxuw512:
11528  case X86::BI__builtin_ia32_pmaxud512:
11529  case X86::BI__builtin_ia32_pmaxuq512:
11530  return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
11531  case X86::BI__builtin_ia32_pminsb128:
11532  case X86::BI__builtin_ia32_pminsw128:
11533  case X86::BI__builtin_ia32_pminsd128:
11534  case X86::BI__builtin_ia32_pminsq128:
11535  case X86::BI__builtin_ia32_pminsb256:
11536  case X86::BI__builtin_ia32_pminsw256:
11537  case X86::BI__builtin_ia32_pminsd256:
11538  case X86::BI__builtin_ia32_pminsq256:
11539  case X86::BI__builtin_ia32_pminsb512:
11540  case X86::BI__builtin_ia32_pminsw512:
11541  case X86::BI__builtin_ia32_pminsd512:
11542  case X86::BI__builtin_ia32_pminsq512:
11543  return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
11544  case X86::BI__builtin_ia32_pminub128:
11545  case X86::BI__builtin_ia32_pminuw128:
11546  case X86::BI__builtin_ia32_pminud128:
11547  case X86::BI__builtin_ia32_pminuq128:
11548  case X86::BI__builtin_ia32_pminub256:
11549  case X86::BI__builtin_ia32_pminuw256:
11550  case X86::BI__builtin_ia32_pminud256:
11551  case X86::BI__builtin_ia32_pminuq256:
11552  case X86::BI__builtin_ia32_pminub512:
11553  case X86::BI__builtin_ia32_pminuw512:
11554  case X86::BI__builtin_ia32_pminud512:
11555  case X86::BI__builtin_ia32_pminuq512:
11556  return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
11557 
11558  case X86::BI__builtin_ia32_pmuludq128:
11559  case X86::BI__builtin_ia32_pmuludq256:
11560  case X86::BI__builtin_ia32_pmuludq512:
11561  return EmitX86Muldq(*this, /*IsSigned*/false, Ops);
11562 
11563  case X86::BI__builtin_ia32_pmuldq128:
11564  case X86::BI__builtin_ia32_pmuldq256:
11565  case X86::BI__builtin_ia32_pmuldq512:
11566  return EmitX86Muldq(*this, /*IsSigned*/true, Ops);
11567 
11568  case X86::BI__builtin_ia32_pternlogd512_mask:
11569  case X86::BI__builtin_ia32_pternlogq512_mask:
11570  case X86::BI__builtin_ia32_pternlogd128_mask:
11571  case X86::BI__builtin_ia32_pternlogd256_mask:
11572  case X86::BI__builtin_ia32_pternlogq128_mask:
11573  case X86::BI__builtin_ia32_pternlogq256_mask:
11574  return EmitX86Ternlog(*this, /*ZeroMask*/false, Ops);
11575 
11576  case X86::BI__builtin_ia32_pternlogd512_maskz:
11577  case X86::BI__builtin_ia32_pternlogq512_maskz:
11578  case X86::BI__builtin_ia32_pternlogd128_maskz:
11579  case X86::BI__builtin_ia32_pternlogd256_maskz:
11580  case X86::BI__builtin_ia32_pternlogq128_maskz:
11581  case X86::BI__builtin_ia32_pternlogq256_maskz:
11582  return EmitX86Ternlog(*this, /*ZeroMask*/true, Ops);
11583 
11584  case X86::BI__builtin_ia32_vpshldd128:
11585  case X86::BI__builtin_ia32_vpshldd256:
11586  case X86::BI__builtin_ia32_vpshldd512:
11587  case X86::BI__builtin_ia32_vpshldq128:
11588  case X86::BI__builtin_ia32_vpshldq256:
11589  case X86::BI__builtin_ia32_vpshldq512:
11590  case X86::BI__builtin_ia32_vpshldw128:
11591  case X86::BI__builtin_ia32_vpshldw256:
11592  case X86::BI__builtin_ia32_vpshldw512:
11593  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11594 
11595  case X86::BI__builtin_ia32_vpshrdd128:
11596  case X86::BI__builtin_ia32_vpshrdd256:
11597  case X86::BI__builtin_ia32_vpshrdd512:
11598  case X86::BI__builtin_ia32_vpshrdq128:
11599  case X86::BI__builtin_ia32_vpshrdq256:
11600  case X86::BI__builtin_ia32_vpshrdq512:
11601  case X86::BI__builtin_ia32_vpshrdw128:
11602  case X86::BI__builtin_ia32_vpshrdw256:
11603  case X86::BI__builtin_ia32_vpshrdw512:
11604  // Ops 0 and 1 are swapped.
11605  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11606 
11607  case X86::BI__builtin_ia32_vpshldvd128:
11608  case X86::BI__builtin_ia32_vpshldvd256:
11609  case X86::BI__builtin_ia32_vpshldvd512:
11610  case X86::BI__builtin_ia32_vpshldvq128:
11611  case X86::BI__builtin_ia32_vpshldvq256:
11612  case X86::BI__builtin_ia32_vpshldvq512:
11613  case X86::BI__builtin_ia32_vpshldvw128:
11614  case X86::BI__builtin_ia32_vpshldvw256:
11615  case X86::BI__builtin_ia32_vpshldvw512:
11616  return EmitX86FunnelShift(*this, Ops[0], Ops[1], Ops[2], false);
11617 
11618  case X86::BI__builtin_ia32_vpshrdvd128:
11619  case X86::BI__builtin_ia32_vpshrdvd256:
11620  case X86::BI__builtin_ia32_vpshrdvd512:
11621  case X86::BI__builtin_ia32_vpshrdvq128:
11622  case X86::BI__builtin_ia32_vpshrdvq256:
11623  case X86::BI__builtin_ia32_vpshrdvq512:
11624  case X86::BI__builtin_ia32_vpshrdvw128:
11625  case X86::BI__builtin_ia32_vpshrdvw256:
11626  case X86::BI__builtin_ia32_vpshrdvw512:
11627  // Ops 0 and 1 are swapped.
11628  return EmitX86FunnelShift(*this, Ops[1], Ops[0], Ops[2], true);
11629 
11630  // 3DNow!
11631  case X86::BI__builtin_ia32_pswapdsf:
11632  case X86::BI__builtin_ia32_pswapdsi: {
11633  llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
11634  Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
11635  llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
11636  return Builder.CreateCall(F, Ops, "pswapd");
11637  }
11638  case X86::BI__builtin_ia32_rdrand16_step:
11639  case X86::BI__builtin_ia32_rdrand32_step:
11640  case X86::BI__builtin_ia32_rdrand64_step:
11641  case X86::BI__builtin_ia32_rdseed16_step:
11642  case X86::BI__builtin_ia32_rdseed32_step:
11643  case X86::BI__builtin_ia32_rdseed64_step: {
11644  Intrinsic::ID ID;
11645  switch (BuiltinID) {
11646  default: llvm_unreachable("Unsupported intrinsic!");
11647  case X86::BI__builtin_ia32_rdrand16_step:
11648  ID = Intrinsic::x86_rdrand_16;
11649  break;
11650  case X86::BI__builtin_ia32_rdrand32_step:
11651  ID = Intrinsic::x86_rdrand_32;
11652  break;
11653  case X86::BI__builtin_ia32_rdrand64_step:
11654  ID = Intrinsic::x86_rdrand_64;
11655  break;
11656  case X86::BI__builtin_ia32_rdseed16_step:
11657  ID = Intrinsic::x86_rdseed_16;
11658  break;
11659  case X86::BI__builtin_ia32_rdseed32_step:
11660  ID = Intrinsic::x86_rdseed_32;
11661  break;
11662  case X86::BI__builtin_ia32_rdseed64_step:
11663  ID = Intrinsic::x86_rdseed_64;
11664  break;
11665  }
11666 
11667  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
11668  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
11669  Ops[0]);
11670  return Builder.CreateExtractValue(Call, 1);
11671  }
11672  case X86::BI__builtin_ia32_addcarryx_u32:
11673  case X86::BI__builtin_ia32_addcarryx_u64:
11674  case X86::BI__builtin_ia32_subborrow_u32:
11675  case X86::BI__builtin_ia32_subborrow_u64: {
11676  Intrinsic::ID IID;
11677  switch (BuiltinID) {
11678  default: llvm_unreachable("Unsupported intrinsic!");
11679  case X86::BI__builtin_ia32_addcarryx_u32:
11680  IID = Intrinsic::x86_addcarry_32;
11681  break;
11682  case X86::BI__builtin_ia32_addcarryx_u64:
11683  IID = Intrinsic::x86_addcarry_64;
11684  break;
11685  case X86::BI__builtin_ia32_subborrow_u32:
11686  IID = Intrinsic::x86_subborrow_32;
11687  break;
11688  case X86::BI__builtin_ia32_subborrow_u64:
11689  IID = Intrinsic::x86_subborrow_64;
11690  break;
11691  }
11692 
11693  Value *Call = Builder.CreateCall(CGM.getIntrinsic(IID),
11694  { Ops[0], Ops[1], Ops[2] });
11695  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 1),
11696  Ops[3]);
11697  return Builder.CreateExtractValue(Call, 0);
11698  }
11699 
11700  case X86::BI__builtin_ia32_fpclassps128_mask:
11701  case X86::BI__builtin_ia32_fpclassps256_mask:
11702  case X86::BI__builtin_ia32_fpclassps512_mask:
11703  case X86::BI__builtin_ia32_fpclasspd128_mask:
11704  case X86::BI__builtin_ia32_fpclasspd256_mask:
11705  case X86::BI__builtin_ia32_fpclasspd512_mask: {
11706  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11707  Value *MaskIn = Ops[2];
11708  Ops.erase(&Ops[2]);
11709 
11710  Intrinsic::ID ID;
11711  switch (BuiltinID) {
11712  default: llvm_unreachable("Unsupported intrinsic!");
11713  case X86::BI__builtin_ia32_fpclassps128_mask:
11714  ID = Intrinsic::x86_avx512_fpclass_ps_128;
11715  break;
11716  case X86::BI__builtin_ia32_fpclassps256_mask:
11717  ID = Intrinsic::x86_avx512_fpclass_ps_256;
11718  break;
11719  case X86::BI__builtin_ia32_fpclassps512_mask:
11720  ID = Intrinsic::x86_avx512_fpclass_ps_512;
11721  break;
11722  case X86::BI__builtin_ia32_fpclasspd128_mask:
11723  ID = Intrinsic::x86_avx512_fpclass_pd_128;
11724  break;
11725  case X86::BI__builtin_ia32_fpclasspd256_mask:
11726  ID = Intrinsic::x86_avx512_fpclass_pd_256;
11727  break;
11728  case X86::BI__builtin_ia32_fpclasspd512_mask:
11729  ID = Intrinsic::x86_avx512_fpclass_pd_512;
11730  break;
11731  }
11732 
11733  Value *Fpclass = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11734  return EmitX86MaskedCompareResult(*this, Fpclass, NumElts, MaskIn);
11735  }
11736 
11737  case X86::BI__builtin_ia32_vp2intersect_q_512:
11738  case X86::BI__builtin_ia32_vp2intersect_q_256:
11739  case X86::BI__builtin_ia32_vp2intersect_q_128:
11740  case X86::BI__builtin_ia32_vp2intersect_d_512:
11741  case X86::BI__builtin_ia32_vp2intersect_d_256:
11742  case X86::BI__builtin_ia32_vp2intersect_d_128: {
11743  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11744  Intrinsic::ID ID;
11745 
11746  switch (BuiltinID) {
11747  default: llvm_unreachable("Unsupported intrinsic!");
11748  case X86::BI__builtin_ia32_vp2intersect_q_512:
11749  ID = Intrinsic::x86_avx512_vp2intersect_q_512;
11750  break;
11751  case X86::BI__builtin_ia32_vp2intersect_q_256:
11752  ID = Intrinsic::x86_avx512_vp2intersect_q_256;
11753  break;
11754  case X86::BI__builtin_ia32_vp2intersect_q_128:
11755  ID = Intrinsic::x86_avx512_vp2intersect_q_128;
11756  break;
11757  case X86::BI__builtin_ia32_vp2intersect_d_512:
11758  ID = Intrinsic::x86_avx512_vp2intersect_d_512;
11759  break;
11760  case X86::BI__builtin_ia32_vp2intersect_d_256:
11761  ID = Intrinsic::x86_avx512_vp2intersect_d_256;
11762  break;
11763  case X86::BI__builtin_ia32_vp2intersect_d_128:
11764  ID = Intrinsic::x86_avx512_vp2intersect_d_128;
11765  break;
11766  }
11767 
11768  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID), {Ops[0], Ops[1]});
11769  Value *Result = Builder.CreateExtractValue(Call, 0);
11770  Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
11771  Builder.CreateDefaultAlignedStore(Result, Ops[2]);
11772 
11773  Result = Builder.CreateExtractValue(Call, 1);
11774  Result = EmitX86MaskedCompareResult(*this, Result, NumElts, nullptr);
11775  return Builder.CreateDefaultAlignedStore(Result, Ops[3]);
11776  }
11777 
11778  case X86::BI__builtin_ia32_vpmultishiftqb128:
11779  case X86::BI__builtin_ia32_vpmultishiftqb256:
11780  case X86::BI__builtin_ia32_vpmultishiftqb512: {
11781  Intrinsic::ID ID;
11782  switch (BuiltinID) {
11783  default: llvm_unreachable("Unsupported intrinsic!");
11784  case X86::BI__builtin_ia32_vpmultishiftqb128:
11785  ID = Intrinsic::x86_avx512_pmultishift_qb_128;
11786  break;
11787  case X86::BI__builtin_ia32_vpmultishiftqb256:
11788  ID = Intrinsic::x86_avx512_pmultishift_qb_256;
11789  break;
11790  case X86::BI__builtin_ia32_vpmultishiftqb512:
11791  ID = Intrinsic::x86_avx512_pmultishift_qb_512;
11792  break;
11793  }
11794 
11795  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11796  }
11797 
11798  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11799  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11800  case X86::BI__builtin_ia32_vpshufbitqmb512_mask: {
11801  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11802  Value *MaskIn = Ops[2];
11803  Ops.erase(&Ops[2]);
11804 
11805  Intrinsic::ID ID;
11806  switch (BuiltinID) {
11807  default: llvm_unreachable("Unsupported intrinsic!");
11808  case X86::BI__builtin_ia32_vpshufbitqmb128_mask:
11809  ID = Intrinsic::x86_avx512_vpshufbitqmb_128;
11810  break;
11811  case X86::BI__builtin_ia32_vpshufbitqmb256_mask:
11812  ID = Intrinsic::x86_avx512_vpshufbitqmb_256;
11813  break;
11814  case X86::BI__builtin_ia32_vpshufbitqmb512_mask:
11815  ID = Intrinsic::x86_avx512_vpshufbitqmb_512;
11816  break;
11817  }
11818 
11819  Value *Shufbit = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
11820  return EmitX86MaskedCompareResult(*this, Shufbit, NumElts, MaskIn);
11821  }
11822 
11823  // packed comparison intrinsics
11824  case X86::BI__builtin_ia32_cmpeqps:
11825  case X86::BI__builtin_ia32_cmpeqpd:
11826  return getVectorFCmpIR(CmpInst::FCMP_OEQ);
11827  case X86::BI__builtin_ia32_cmpltps:
11828  case X86::BI__builtin_ia32_cmpltpd:
11829  return getVectorFCmpIR(CmpInst::FCMP_OLT);
11830  case X86::BI__builtin_ia32_cmpleps:
11831  case X86::BI__builtin_ia32_cmplepd:
11832  return getVectorFCmpIR(CmpInst::FCMP_OLE);
11833  case X86::BI__builtin_ia32_cmpunordps:
11834  case X86::BI__builtin_ia32_cmpunordpd:
11835  return getVectorFCmpIR(CmpInst::FCMP_UNO);
11836  case X86::BI__builtin_ia32_cmpneqps:
11837  case X86::BI__builtin_ia32_cmpneqpd:
11838  return getVectorFCmpIR(CmpInst::FCMP_UNE);
11839  case X86::BI__builtin_ia32_cmpnltps:
11840  case X86::BI__builtin_ia32_cmpnltpd:
11841  return getVectorFCmpIR(CmpInst::FCMP_UGE);
11842  case X86::BI__builtin_ia32_cmpnleps:
11843  case X86::BI__builtin_ia32_cmpnlepd:
11844  return getVectorFCmpIR(CmpInst::FCMP_UGT);
11845  case X86::BI__builtin_ia32_cmpordps:
11846  case X86::BI__builtin_ia32_cmpordpd:
11847  return getVectorFCmpIR(CmpInst::FCMP_ORD);
11848  case X86::BI__builtin_ia32_cmpps:
11849  case X86::BI__builtin_ia32_cmpps256:
11850  case X86::BI__builtin_ia32_cmppd:
11851  case X86::BI__builtin_ia32_cmppd256:
11852  case X86::BI__builtin_ia32_cmpps128_mask:
11853  case X86::BI__builtin_ia32_cmpps256_mask:
11854  case X86::BI__builtin_ia32_cmpps512_mask:
11855  case X86::BI__builtin_ia32_cmppd128_mask:
11856  case X86::BI__builtin_ia32_cmppd256_mask:
11857  case X86::BI__builtin_ia32_cmppd512_mask: {
11858  // Lowering vector comparisons to fcmp instructions, while
11859  // ignoring signalling behaviour requested
11860  // ignoring rounding mode requested
11861  // This is is only possible as long as FENV_ACCESS is not implemented.
11862  // See also: https://reviews.llvm.org/D45616
11863 
11864  // The third argument is the comparison condition, and integer in the
11865  // range [0, 31]
11866  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x1f;
11867 
11868  // Lowering to IR fcmp instruction.
11869  // Ignoring requested signaling behaviour,
11870  // e.g. both _CMP_GT_OS & _CMP_GT_OQ are translated to FCMP_OGT.
11871  FCmpInst::Predicate Pred;
11872  switch (CC) {
11873  case 0x00: Pred = FCmpInst::FCMP_OEQ; break;
11874  case 0x01: Pred = FCmpInst::FCMP_OLT; break;
11875  case 0x02: Pred = FCmpInst::FCMP_OLE; break;
11876  case 0x03: Pred = FCmpInst::FCMP_UNO; break;
11877  case 0x04: Pred = FCmpInst::FCMP_UNE; break;
11878  case 0x05: Pred = FCmpInst::FCMP_UGE; break;
11879  case 0x06: Pred = FCmpInst::FCMP_UGT; break;
11880  case 0x07: Pred = FCmpInst::FCMP_ORD; break;
11881  case 0x08: Pred = FCmpInst::FCMP_UEQ; break;
11882  case 0x09: Pred = FCmpInst::FCMP_ULT; break;
11883  case 0x0a: Pred = FCmpInst::FCMP_ULE; break;
11884  case 0x0b: Pred = FCmpInst::FCMP_FALSE; break;
11885  case 0x0c: Pred = FCmpInst::FCMP_ONE; break;
11886  case 0x0d: Pred = FCmpInst::FCMP_OGE; break;
11887  case 0x0e: Pred = FCmpInst::FCMP_OGT; break;
11888  case 0x0f: Pred = FCmpInst::FCMP_TRUE; break;
11889  case 0x10: Pred = FCmpInst::FCMP_OEQ; break;
11890  case 0x11: Pred = FCmpInst::FCMP_OLT; break;
11891  case 0x12: Pred = FCmpInst::FCMP_OLE; break;
11892  case 0x13: Pred = FCmpInst::FCMP_UNO; break;
11893  case 0x14: Pred = FCmpInst::FCMP_UNE; break;
11894  case 0x15: Pred = FCmpInst::FCMP_UGE; break;
11895  case 0x16: Pred = FCmpInst::FCMP_UGT; break;
11896  case 0x17: Pred = FCmpInst::FCMP_ORD; break;
11897  case 0x18: Pred = FCmpInst::FCMP_UEQ; break;
11898  case 0x19: Pred = FCmpInst::FCMP_ULT; break;
11899  case 0x1a: Pred = FCmpInst::FCMP_ULE; break;
11900  case 0x1b: Pred = FCmpInst::FCMP_FALSE; break;
11901  case 0x1c: Pred = FCmpInst::FCMP_ONE; break;
11902  case 0x1d: Pred = FCmpInst::FCMP_OGE; break;
11903  case 0x1e: Pred = FCmpInst::FCMP_OGT; break;
11904  case 0x1f: Pred = FCmpInst::FCMP_TRUE; break;
11905  default: llvm_unreachable("Unhandled CC");
11906  }
11907 
11908  // Builtins without the _mask suffix return a vector of integers
11909  // of the same width as the input vectors
11910  switch (BuiltinID) {
11911  case X86::BI__builtin_ia32_cmpps512_mask:
11912  case X86::BI__builtin_ia32_cmppd512_mask:
11913  case X86::BI__builtin_ia32_cmpps128_mask:
11914  case X86::BI__builtin_ia32_cmpps256_mask:
11915  case X86::BI__builtin_ia32_cmppd128_mask:
11916  case X86::BI__builtin_ia32_cmppd256_mask: {
11917  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
11918  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
11919  return EmitX86MaskedCompareResult(*this, Cmp, NumElts, Ops[3]);
11920  }
11921  default:
11922  return getVectorFCmpIR(Pred);
11923  }
11924  }
11925 
11926  // SSE scalar comparison intrinsics
11927  case X86::BI__builtin_ia32_cmpeqss:
11928  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
11929  case X86::BI__builtin_ia32_cmpltss:
11930  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
11931  case X86::BI__builtin_ia32_cmpless:
11932  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
11933  case X86::BI__builtin_ia32_cmpunordss:
11934  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
11935  case X86::BI__builtin_ia32_cmpneqss:
11936  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
11937  case X86::BI__builtin_ia32_cmpnltss:
11938  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
11939  case X86::BI__builtin_ia32_cmpnless:
11940  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
11941  case X86::BI__builtin_ia32_cmpordss:
11942  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
11943  case X86::BI__builtin_ia32_cmpeqsd:
11944  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
11945  case X86::BI__builtin_ia32_cmpltsd:
11946  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
11947  case X86::BI__builtin_ia32_cmplesd:
11948  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
11949  case X86::BI__builtin_ia32_cmpunordsd:
11950  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
11951  case X86::BI__builtin_ia32_cmpneqsd:
11952  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
11953  case X86::BI__builtin_ia32_cmpnltsd:
11954  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
11955  case X86::BI__builtin_ia32_cmpnlesd:
11956  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
11957  case X86::BI__builtin_ia32_cmpordsd:
11958  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
11959 
11960 // AVX512 bf16 intrinsics
11961  case X86::BI__builtin_ia32_cvtneps2bf16_128_mask: {
11962  Ops[2] = getMaskVecValue(*this, Ops[2],
11963  Ops[0]->getType()->getVectorNumElements());
11964  Intrinsic::ID IID = Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128;
11965  return Builder.CreateCall(CGM.getIntrinsic(IID), Ops);
11966  }
11967  case X86::BI__builtin_ia32_cvtsbf162ss_32:
11968  return EmitX86CvtBF16ToFloatExpr(*this, E, Ops);
11969 
11970  case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
11971  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask: {
11972  Intrinsic::ID IID;
11973  switch (BuiltinID) {
11974  default: llvm_unreachable("Unsupported intrinsic!");
11975  case X86::BI__builtin_ia32_cvtneps2bf16_256_mask:
11976  IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_256;
11977  break;
11978  case X86::BI__builtin_ia32_cvtneps2bf16_512_mask:
11979  IID = Intrinsic::x86_avx512bf16_cvtneps2bf16_512;
11980  break;
11981  }
11982  Value *Res = Builder.CreateCall(CGM.getIntrinsic(IID), Ops[0]);
11983  return EmitX86Select(*this, Ops[2], Res, Ops[1]);
11984  }
11985 
11986  case X86::BI__emul:
11987  case X86::BI__emulu: {
11988  llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
11989  bool isSigned = (BuiltinID == X86::BI__emul);
11990  Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
11991  Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
11992  return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
11993  }
11994  case X86::BI__mulh:
11995  case X86::BI__umulh:
11996  case X86::BI_mul128:
11997  case X86::BI_umul128: {
11998  llvm::Type *ResType = ConvertType(E->getType());
11999  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
12000 
12001  bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
12002  Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
12003  Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
12004 
12005  Value *MulResult, *HigherBits;
12006  if (IsSigned) {
12007  MulResult = Builder.CreateNSWMul(LHS, RHS);
12008  HigherBits = Builder.CreateAShr(MulResult, 64);
12009  } else {
12010  MulResult = Builder.CreateNUWMul(LHS, RHS);
12011  HigherBits = Builder.CreateLShr(MulResult, 64);
12012  }
12013  HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
12014 
12015  if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
12016  return HigherBits;
12017 
12018  Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
12019  Builder.CreateStore(HigherBits, HighBitsAddress);
12020  return Builder.CreateIntCast(MulResult, ResType, IsSigned);
12021  }
12022 
12023  case X86::BI__faststorefence: {
12024  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
12026  }
12027  case X86::BI__shiftleft128:
12028  case X86::BI__shiftright128: {
12029  // FIXME: Once fshl/fshr no longer add an unneeded and and cmov, do this:
12030  // llvm::Function *F = CGM.getIntrinsic(
12031  // BuiltinID == X86::BI__shiftleft128 ? Intrinsic::fshl : Intrinsic::fshr,
12032  // Int64Ty);
12033  // Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
12034  // return Builder.CreateCall(F, Ops);
12035  llvm::Type *Int128Ty = Builder.getInt128Ty();
12036  Value *HighPart128 =
12037  Builder.CreateShl(Builder.CreateZExt(Ops[1], Int128Ty), 64);
12038  Value *LowPart128 = Builder.CreateZExt(Ops[0], Int128Ty);
12039  Value *Val = Builder.CreateOr(HighPart128, LowPart128);
12040  Value *Amt = Builder.CreateAnd(Builder.CreateZExt(Ops[2], Int128Ty),
12041  llvm::ConstantInt::get(Int128Ty, 0x3f));
12042  Value *Res;
12043  if (BuiltinID == X86::BI__shiftleft128)
12044  Res = Builder.CreateLShr(Builder.CreateShl(Val, Amt), 64);
12045  else
12046  Res = Builder.CreateLShr(Val, Amt);
12047  return Builder.CreateTrunc(Res, Int64Ty);
12048  }
12049  case X86::BI_ReadWriteBarrier:
12050  case X86::BI_ReadBarrier:
12051  case X86::BI_WriteBarrier: {
12052  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
12053  llvm::SyncScope::SingleThread);
12054  }
12055  case X86::BI_BitScanForward:
12056  case X86::BI_BitScanForward64:
12057  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
12058  case X86::BI_BitScanReverse:
12059  case X86::BI_BitScanReverse64:
12060  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
12061 
12062  case X86::BI_InterlockedAnd64:
12063  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
12064  case X86::BI_InterlockedExchange64:
12065  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
12066  case X86::BI_InterlockedExchangeAdd64:
12067  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
12068  case X86::BI_InterlockedExchangeSub64:
12069  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
12070  case X86::BI_InterlockedOr64:
12071  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
12072  case X86::BI_InterlockedXor64:
12073  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
12074  case X86::BI_InterlockedDecrement64:
12075  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
12076  case X86::BI_InterlockedIncrement64:
12077  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
12078  case X86::BI_InterlockedCompareExchange128: {
12079  // InterlockedCompareExchange128 doesn't directly refer to 128bit ints,
12080  // instead it takes pointers to 64bit ints for Destination and
12081  // ComparandResult, and exchange is taken as two 64bit ints (high & low).
12082  // The previous value is written to ComparandResult, and success is
12083  // returned.
12084 
12085  llvm::Type *Int128Ty = Builder.getInt128Ty();
12086  llvm::Type *Int128PtrTy = Int128Ty->getPointerTo();
12087 
12088  Value *Destination =
12089  Builder.CreateBitCast(Ops[0], Int128PtrTy);
12090  Value *ExchangeHigh128 = Builder.CreateZExt(Ops[1], Int128Ty);
12091  Value *ExchangeLow128 = Builder.CreateZExt(Ops[2], Int128Ty);
12092  Address ComparandResult(Builder.CreateBitCast(Ops[3], Int128PtrTy),
12093  getContext().toCharUnitsFromBits(128));
12094 
12095  Value *Exchange = Builder.CreateOr(
12096  Builder.CreateShl(ExchangeHigh128, 64, "", false, false),
12097  ExchangeLow128);
12098 
12099  Value *Comparand = Builder.CreateLoad(ComparandResult);
12100 
12101  AtomicCmpXchgInst *CXI =
12102  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
12103  AtomicOrdering::SequentiallyConsistent,
12104  AtomicOrdering::SequentiallyConsistent);
12105  CXI->setVolatile(true);
12106 
12107  // Write the result back to the inout pointer.
12108  Builder.CreateStore(Builder.CreateExtractValue(CXI, 0), ComparandResult);
12109 
12110  // Get the success boolean and zero extend it to i8.
12111  Value *Success = Builder.CreateExtractValue(CXI, 1);
12112  return Builder.CreateZExt(Success, ConvertType(E->getType()));
12113  }
12114 
12115  case X86::BI_AddressOfReturnAddress: {
12116  Function *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
12117  return Builder.CreateCall(F);
12118  }
12119  case X86::BI__stosb: {
12120  // We treat __stosb as a volatile memset - it may not generate "rep stosb"
12121  // instruction, but it will create a memset that won't be optimized away.
12122  return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
12123  }
12124  case X86::BI__ud2:
12125  // llvm.trap makes a ud2a instruction on x86.
12126  return EmitTrapCall(Intrinsic::trap);
12127  case X86::BI__int2c: {
12128  // This syscall signals a driver assertion failure in x86 NT kernels.
12129  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
12130  llvm::InlineAsm *IA =
12131  llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*hasSideEffects=*/true);
12132  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
12133  getLLVMContext(), llvm::AttributeList::FunctionIndex,
12134  llvm::Attribute::NoReturn);
12135  llvm::CallInst *CI = Builder.CreateCall(IA);
12136  CI->setAttributes(NoReturnAttr);
12137  return CI;
12138  }
12139  case X86::BI__readfsbyte:
12140  case X86::BI__readfsword:
12141  case X86::BI__readfsdword:
12142  case X86::BI__readfsqword: {
12143  llvm::Type *IntTy = ConvertType(E->getType());
12144  Value *Ptr =
12145  Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 257));
12146  LoadInst *Load = Builder.CreateAlignedLoad(
12147  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
12148  Load->setVolatile(true);
12149  return Load;
12150  }
12151  case X86::BI__readgsbyte:
12152  case X86::BI__readgsword:
12153  case X86::BI__readgsdword:
12154  case X86::BI__readgsqword: {
12155  llvm::Type *IntTy = ConvertType(E->getType());
12156  Value *Ptr =
12157  Builder.CreateIntToPtr(Ops[0], llvm::PointerType::get(IntTy, 256));
12158  LoadInst *Load = Builder.CreateAlignedLoad(
12159  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
12160  Load->setVolatile(true);
12161  return Load;
12162  }
12163  case X86::BI__builtin_ia32_paddsb512:
12164  case X86::BI__builtin_ia32_paddsw512:
12165  case X86::BI__builtin_ia32_paddsb256:
12166  case X86::BI__builtin_ia32_paddsw256:
12167  case X86::BI__builtin_ia32_paddsb128:
12168  case X86::BI__builtin_ia32_paddsw128:
12169  return EmitX86AddSubSatExpr(*this, Ops, true, true);
12170  case X86::BI__builtin_ia32_paddusb512:
12171  case X86::BI__builtin_ia32_paddusw512:
12172  case X86::BI__builtin_ia32_paddusb256:
12173  case X86::BI__builtin_ia32_paddusw256:
12174  case X86::BI__builtin_ia32_paddusb128:
12175  case X86::BI__builtin_ia32_paddusw128:
12176  return EmitX86AddSubSatExpr(*this, Ops, false, true);
12177  case X86::BI__builtin_ia32_psubsb512:
12178  case X86::BI__builtin_ia32_psubsw512:
12179  case X86::BI__builtin_ia32_psubsb256:
12180  case X86::BI__builtin_ia32_psubsw256:
12181  case X86::BI__builtin_ia32_psubsb128:
12182  case X86::BI__builtin_ia32_psubsw128:
12183  return EmitX86AddSubSatExpr(*this, Ops, true, false);
12184  case X86::BI__builtin_ia32_psubusb512:
12185  case X86::BI__builtin_ia32_psubusw512:
12186  case X86::BI__builtin_ia32_psubusb256:
12187  case X86::BI__builtin_ia32_psubusw256:
12188  case X86::BI__builtin_ia32_psubusb128:
12189  case X86::BI__builtin_ia32_psubusw128:
12190  return EmitX86AddSubSatExpr(*this, Ops, false, false);
12191  }
12192 }
12193 
12195  const CallExpr *E) {
12197 
12198  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
12199  Ops.push_back(EmitScalarExpr(E->getArg(i)));
12200 
12201  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12202 
12203  switch (BuiltinID) {
12204  default: return nullptr;
12205 
12206  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
12207  // call __builtin_readcyclecounter.
12208  case PPC::BI__builtin_ppc_get_timebase:
12209  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
12210 
12211  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
12212  case PPC::BI__builtin_altivec_lvx:
12213  case PPC::BI__builtin_altivec_lvxl:
12214  case PPC::BI__builtin_altivec_lvebx:
12215  case PPC::BI__builtin_altivec_lvehx:
12216  case PPC::BI__builtin_altivec_lvewx:
12217  case PPC::BI__builtin_altivec_lvsl:
12218  case PPC::BI__builtin_altivec_lvsr:
12219  case PPC::BI__builtin_vsx_lxvd2x:
12220  case PPC::BI__builtin_vsx_lxvw4x:
12221  case PPC::BI__builtin_vsx_lxvd2x_be:
12222  case PPC::BI__builtin_vsx_lxvw4x_be:
12223  case PPC::BI__builtin_vsx_lxvl:
12224  case PPC::BI__builtin_vsx_lxvll:
12225  {
12226  if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
12227  BuiltinID == PPC::BI__builtin_vsx_lxvll){
12228  Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
12229  }else {
12230  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
12231  Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
12232  Ops.pop_back();
12233  }
12234 
12235  switch (BuiltinID) {
12236  default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
12237  case PPC::BI__builtin_altivec_lvx:
12238  ID = Intrinsic::ppc_altivec_lvx;
12239  break;
12240  case PPC::BI__builtin_altivec_lvxl:
12241  ID = Intrinsic::ppc_altivec_lvxl;
12242  break;
12243  case PPC::BI__builtin_altivec_lvebx:
12244  ID = Intrinsic::ppc_altivec_lvebx;
12245  break;
12246  case PPC::BI__builtin_altivec_lvehx:
12247  ID = Intrinsic::ppc_altivec_lvehx;
12248  break;
12249  case PPC::BI__builtin_altivec_lvewx:
12250  ID = Intrinsic::ppc_altivec_lvewx;
12251  break;
12252  case PPC::BI__builtin_altivec_lvsl:
12253  ID = Intrinsic::ppc_altivec_lvsl;
12254  break;
12255  case PPC::BI__builtin_altivec_lvsr:
12256  ID = Intrinsic::ppc_altivec_lvsr;
12257  break;
12258  case PPC::BI__builtin_vsx_lxvd2x:
12259  ID = Intrinsic::ppc_vsx_lxvd2x;
12260  break;
12261  case PPC::BI__builtin_vsx_lxvw4x:
12262  ID = Intrinsic::ppc_vsx_lxvw4x;
12263  break;
12264  case PPC::BI__builtin_vsx_lxvd2x_be:
12265  ID = Intrinsic::ppc_vsx_lxvd2x_be;
12266  break;
12267  case PPC::BI__builtin_vsx_lxvw4x_be:
12268  ID = Intrinsic::ppc_vsx_lxvw4x_be;
12269  break;
12270  case PPC::BI__builtin_vsx_lxvl:
12271  ID = Intrinsic::ppc_vsx_lxvl;
12272  break;
12273  case PPC::BI__builtin_vsx_lxvll:
12274  ID = Intrinsic::ppc_vsx_lxvll;
12275  break;
12276  }
12277  llvm::Function *F = CGM.getIntrinsic(ID);
12278  return Builder.CreateCall(F, Ops, "");
12279  }
12280 
12281  // vec_st, vec_xst_be
12282  case PPC::BI__builtin_altivec_stvx:
12283  case PPC::BI__builtin_altivec_stvxl:
12284  case PPC::BI__builtin_altivec_stvebx:
12285  case PPC::BI__builtin_altivec_stvehx:
12286  case PPC::BI__builtin_altivec_stvewx:
12287  case PPC::BI__builtin_vsx_stxvd2x:
12288  case PPC::BI__builtin_vsx_stxvw4x:
12289  case PPC::BI__builtin_vsx_stxvd2x_be:
12290  case PPC::BI__builtin_vsx_stxvw4x_be:
12291  case PPC::BI__builtin_vsx_stxvl:
12292  case PPC::BI__builtin_vsx_stxvll:
12293  {
12294  if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
12295  BuiltinID == PPC::BI__builtin_vsx_stxvll ){
12296  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
12297  }else {
12298  Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
12299  Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
12300  Ops.pop_back();
12301  }
12302 
12303  switch (BuiltinID) {
12304  default: llvm_unreachable("Unsupported st intrinsic!");
12305  case PPC::BI__builtin_altivec_stvx:
12306  ID = Intrinsic::ppc_altivec_stvx;
12307  break;
12308  case PPC::BI__builtin_altivec_stvxl:
12309  ID = Intrinsic::ppc_altivec_stvxl;
12310  break;
12311  case PPC::BI__builtin_altivec_stvebx:
12312  ID = Intrinsic::ppc_altivec_stvebx;
12313  break;
12314  case PPC::BI__builtin_altivec_stvehx:
12315  ID = Intrinsic::ppc_altivec_stvehx;
12316  break;
12317  case PPC::BI__builtin_altivec_stvewx:
12318  ID = Intrinsic::ppc_altivec_stvewx;
12319  break;
12320  case PPC::BI__builtin_vsx_stxvd2x:
12321  ID = Intrinsic::ppc_vsx_stxvd2x;
12322  break;
12323  case PPC::BI__builtin_vsx_stxvw4x:
12324  ID = Intrinsic::ppc_vsx_stxvw4x;
12325  break;
12326  case PPC::BI__builtin_vsx_stxvd2x_be:
12327  ID = Intrinsic::ppc_vsx_stxvd2x_be;
12328  break;
12329  case PPC::BI__builtin_vsx_stxvw4x_be:
12330  ID = Intrinsic::ppc_vsx_stxvw4x_be;
12331  break;
12332  case PPC::BI__builtin_vsx_stxvl:
12333  ID = Intrinsic::ppc_vsx_stxvl;
12334  break;
12335  case PPC::BI__builtin_vsx_stxvll:
12336  ID = Intrinsic::ppc_vsx_stxvll;
12337  break;
12338  }
12339  llvm::Function *F = CGM.getIntrinsic(ID);
12340  return Builder.CreateCall(F, Ops, "");
12341  }
12342  // Square root
12343  case PPC::BI__builtin_vsx_xvsqrtsp:
12344  case PPC::BI__builtin_vsx_xvsqrtdp: {
12345  llvm::Type *ResultType = ConvertType(E->getType());
12346  Value *X = EmitScalarExpr(E->getArg(0));
12347  ID = Intrinsic::sqrt;
12348  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
12349  return Builder.CreateCall(F, X);
12350  }
12351  // Count leading zeros
12352  case PPC::BI__builtin_altivec_vclzb:
12353  case PPC::BI__builtin_altivec_vclzh:
12354  case PPC::BI__builtin_altivec_vclzw:
12355  case PPC::BI__builtin_altivec_vclzd: {
12356  llvm::Type *ResultType = ConvertType(E->getType());
12357  Value *X = EmitScalarExpr(E->getArg(0));
12358  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12359  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
12360  return Builder.CreateCall(F, {X, Undef});
12361  }
12362  case PPC::BI__builtin_altivec_vctzb:
12363  case PPC::BI__builtin_altivec_vctzh:
12364  case PPC::BI__builtin_altivec_vctzw:
12365  case PPC::BI__builtin_altivec_vctzd: {
12366  llvm::Type *ResultType = ConvertType(E->getType());
12367  Value *X = EmitScalarExpr(E->getArg(0));
12368  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12369  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
12370  return Builder.CreateCall(F, {X, Undef});
12371  }
12372  case PPC::BI__builtin_altivec_vpopcntb:
12373  case PPC::BI__builtin_altivec_vpopcnth:
12374  case PPC::BI__builtin_altivec_vpopcntw:
12375  case PPC::BI__builtin_altivec_vpopcntd: {
12376  llvm::Type *ResultType = ConvertType(E->getType());
12377  Value *X = EmitScalarExpr(E->getArg(0));
12378  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
12379  return Builder.CreateCall(F, X);
12380  }
12381  // Copy sign
12382  case PPC::BI__builtin_vsx_xvcpsgnsp:
12383  case PPC::BI__builtin_vsx_xvcpsgndp: {
12384  llvm::Type *ResultType = ConvertType(E->getType());
12385  Value *X = EmitScalarExpr(E->getArg(0));
12386  Value *Y = EmitScalarExpr(E->getArg(1));
12387  ID = Intrinsic::copysign;
12388  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
12389  return Builder.CreateCall(F, {X, Y});
12390  }
12391  // Rounding/truncation
12392  case PPC::BI__builtin_vsx_xvrspip:
12393  case PPC::BI__builtin_vsx_xvrdpip:
12394  case PPC::BI__builtin_vsx_xvrdpim:
12395  case PPC::BI__builtin_vsx_xvrspim:
12396  case PPC::BI__builtin_vsx_xvrdpi:
12397  case PPC::BI__builtin_vsx_xvrspi:
12398  case PPC::BI__builtin_vsx_xvrdpic:
12399  case PPC::BI__builtin_vsx_xvrspic:
12400  case PPC::BI__builtin_vsx_xvrdpiz:
12401  case PPC::BI__builtin_vsx_xvrspiz: {
12402  llvm::Type *ResultType = ConvertType(E->getType());
12403  Value *X = EmitScalarExpr(E->getArg(0));
12404  if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
12405  BuiltinID == PPC::BI__builtin_vsx_xvrspim)
12406  ID = Intrinsic::floor;
12407  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
12408  BuiltinID == PPC::BI__builtin_vsx_xvrspi)
12409  ID = Intrinsic::round;
12410  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
12411  BuiltinID == PPC::BI__builtin_vsx_xvrspic)
12412  ID = Intrinsic::nearbyint;
12413  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
12414  BuiltinID == PPC::BI__builtin_vsx_xvrspip)
12415  ID = Intrinsic::ceil;
12416  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
12417  BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
12418  ID = Intrinsic::trunc;
12419  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
12420  return Builder.CreateCall(F, X);
12421  }
12422 
12423  // Absolute value
12424  case PPC::BI__builtin_vsx_xvabsdp:
12425  case PPC::BI__builtin_vsx_xvabssp: {
12426  llvm::Type *ResultType = ConvertType(E->getType());
12427  Value *X = EmitScalarExpr(E->getArg(0));
12428  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12429  return Builder.CreateCall(F, X);
12430  }
12431 
12432  // FMA variations
12433  case PPC::BI__builtin_vsx_xvmaddadp:
12434  case PPC::BI__builtin_vsx_xvmaddasp:
12435  case PPC::BI__builtin_vsx_xvnmaddadp:
12436  case PPC::BI__builtin_vsx_xvnmaddasp:
12437  case PPC::BI__builtin_vsx_xvmsubadp:
12438  case PPC::BI__builtin_vsx_xvmsubasp:
12439  case PPC::BI__builtin_vsx_xvnmsubadp:
12440  case PPC::BI__builtin_vsx_xvnmsubasp: {
12441  llvm::Type *ResultType = ConvertType(E->getType());
12442  Value *X = EmitScalarExpr(E->getArg(0));
12443  Value *Y = EmitScalarExpr(E->getArg(1));
12444  Value *Z = EmitScalarExpr(E->getArg(2));
12445  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12446  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12447  switch (BuiltinID) {
12448  case PPC::BI__builtin_vsx_xvmaddadp:
12449  case PPC::BI__builtin_vsx_xvmaddasp:
12450  return Builder.CreateCall(F, {X, Y, Z});
12451  case PPC::BI__builtin_vsx_xvnmaddadp:
12452  case PPC::BI__builtin_vsx_xvnmaddasp:
12453  return Builder.CreateFSub(Zero,
12454  Builder.CreateCall(F, {X, Y, Z}), "sub");
12455  case PPC::BI__builtin_vsx_xvmsubadp:
12456  case PPC::BI__builtin_vsx_xvmsubasp:
12457  return Builder.CreateCall(F,
12458  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12459  case PPC::BI__builtin_vsx_xvnmsubadp:
12460  case PPC::BI__builtin_vsx_xvnmsubasp:
12461  Value *FsubRes =
12462  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12463  return Builder.CreateFSub(Zero, FsubRes, "sub");
12464  }
12465  llvm_unreachable("Unknown FMA operation");
12466  return nullptr; // Suppress no-return warning
12467  }
12468 
12469  case PPC::BI__builtin_vsx_insertword: {
12470  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
12471 
12472  // Third argument is a compile time constant int. It must be clamped to
12473  // to the range [0, 12].
12474  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
12475  assert(ArgCI &&
12476  "Third arg to xxinsertw intrinsic must be constant integer");
12477  const int64_t MaxIndex = 12;
12478  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
12479 
12480  // The builtin semantics don't exactly match the xxinsertw instructions
12481  // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
12482  // word from the first argument, and inserts it in the second argument. The
12483  // instruction extracts the word from its second input register and inserts
12484  // it into its first input register, so swap the first and second arguments.
12485  std::swap(Ops[0], Ops[1]);
12486 
12487  // Need to cast the second argument from a vector of unsigned int to a
12488  // vector of long long.
12489  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
12490 
12491  if (getTarget().isLittleEndian()) {
12492  // Create a shuffle mask of (1, 0)
12493  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
12494  ConstantInt::get(Int32Ty, 0)
12495  };
12496  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12497 
12498  // Reverse the double words in the vector we will extract from.
12499  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
12500  Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
12501 
12502  // Reverse the index.
12503  Index = MaxIndex - Index;
12504  }
12505 
12506  // Intrinsic expects the first arg to be a vector of int.
12507  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
12508  Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
12509  return Builder.CreateCall(F, Ops);
12510  }
12511 
12512  case PPC::BI__builtin_vsx_extractuword: {
12513  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
12514 
12515  // Intrinsic expects the first argument to be a vector of doublewords.
12516  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
12517 
12518  // The second argument is a compile time constant int that needs to
12519  // be clamped to the range [0, 12].
12520  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
12521  assert(ArgCI &&
12522  "Second Arg to xxextractuw intrinsic must be a constant integer!");
12523  const int64_t MaxIndex = 12;
12524  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
12525 
12526  if (getTarget().isLittleEndian()) {
12527  // Reverse the index.
12528  Index = MaxIndex - Index;
12529  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
12530 
12531  // Emit the call, then reverse the double words of the results vector.
12532  Value *Call = Builder.CreateCall(F, Ops);
12533 
12534  // Create a shuffle mask of (1, 0)
12535  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
12536  ConstantInt::get(Int32Ty, 0)
12537  };
12538  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12539 
12540  Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
12541  return ShuffleCall;
12542  } else {
12543  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
12544  return Builder.CreateCall(F, Ops);
12545  }
12546  }
12547 
12548  case PPC::BI__builtin_vsx_xxpermdi: {
12549  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
12550  assert(ArgCI && "Third arg must be constant integer!");
12551 
12552  unsigned Index = ArgCI->getZExtValue();
12553  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
12554  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
12555 
12556  // Account for endianness by treating this as just a shuffle. So we use the
12557  // same indices for both LE and BE in order to produce expected results in
12558  // both cases.
12559  unsigned ElemIdx0 = (Index & 2) >> 1;
12560  unsigned ElemIdx1 = 2 + (Index & 1);
12561 
12562  Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
12563  ConstantInt::get(Int32Ty, ElemIdx1)};
12564  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12565 
12566  Value *ShuffleCall =
12567  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
12568  QualType BIRetType = E->getType();
12569  auto RetTy = ConvertType(BIRetType);
12570  return Builder.CreateBitCast(ShuffleCall, RetTy);
12571  }
12572 
12573  case PPC::BI__builtin_vsx_xxsldwi: {
12574  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
12575  assert(ArgCI && "Third argument must be a compile time constant");
12576  unsigned Index = ArgCI->getZExtValue() & 0x3;
12577  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
12578  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
12579 
12580  // Create a shuffle mask
12581  unsigned ElemIdx0;
12582  unsigned ElemIdx1;
12583  unsigned ElemIdx2;
12584  unsigned ElemIdx3;
12585  if (getTarget().isLittleEndian()) {
12586  // Little endian element N comes from element 8+N-Index of the
12587  // concatenated wide vector (of course, using modulo arithmetic on
12588  // the total number of elements).
12589  ElemIdx0 = (8 - Index) % 8;
12590  ElemIdx1 = (9 - Index) % 8;
12591  ElemIdx2 = (10 - Index) % 8;
12592  ElemIdx3 = (11 - Index) % 8;
12593  } else {
12594  // Big endian ElemIdx<N> = Index + N
12595  ElemIdx0 = Index;
12596  ElemIdx1 = Index + 1;
12597  ElemIdx2 = Index + 2;
12598  ElemIdx3 = Index + 3;
12599  }
12600 
12601  Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
12602  ConstantInt::get(Int32Ty, ElemIdx1),
12603  ConstantInt::get(Int32Ty, ElemIdx2),
12604  ConstantInt::get(Int32Ty, ElemIdx3)};
12605 
12606  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
12607  Value *ShuffleCall =
12608  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
12609  QualType BIRetType = E->getType();
12610  auto RetTy = ConvertType(BIRetType);
12611  return Builder.CreateBitCast(ShuffleCall, RetTy);
12612  }
12613 
12614  case PPC::BI__builtin_pack_vector_int128: {
12615  bool isLittleEndian = getTarget().isLittleEndian();
12616  Value *UndefValue =
12617  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), 2));
12618  Value *Res = Builder.CreateInsertElement(
12619  UndefValue, Ops[0], (uint64_t)(isLittleEndian ? 1 : 0));
12620  Res = Builder.CreateInsertElement(Res, Ops[1],
12621  (uint64_t)(isLittleEndian ? 0 : 1));
12622  return Builder.CreateBitCast(Res, ConvertType(E->getType()));
12623  }
12624 
12625  case PPC::BI__builtin_unpack_vector_int128: {
12626  ConstantInt *Index = cast<ConstantInt>(Ops[1]);
12627  Value *Unpacked = Builder.CreateBitCast(
12628  Ops[0], llvm::VectorType::get(ConvertType(E->getType()), 2));
12629 
12630  if (getTarget().isLittleEndian())
12631  Index = ConstantInt::get(Index->getType(), 1 - Index->getZExtValue());
12632 
12633  return Builder.CreateExtractElement(Unpacked, Index);
12634  }
12635  }
12636 }
12637 
12639  const CallExpr *E) {
12640  switch (BuiltinID) {
12641  case AMDGPU::BI__builtin_amdgcn_div_scale:
12642  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
12643  // Translate from the intrinsics's struct return to the builtin's out
12644  // argument.
12645 
12646  Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
12647 
12648  llvm::Value *X = EmitScalarExpr(E->getArg(0));
12649  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
12650  llvm::Value *Z = EmitScalarExpr(E->getArg(2));
12651 
12652  llvm::Function *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
12653  X->getType());
12654 
12655  llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
12656 
12657  llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
12658  llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
12659 
12660  llvm::Type *RealFlagType
12661  = FlagOutPtr.getPointer()->getType()->getPointerElementType();
12662 
12663  llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
12664  Builder.CreateStore(FlagExt, FlagOutPtr);
12665  return Result;
12666  }
12667  case AMDGPU::BI__builtin_amdgcn_div_fmas:
12668  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
12669  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
12670  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
12671  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
12672  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
12673 
12674  llvm::Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
12675  Src0->getType());
12676  llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
12677  return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
12678  }
12679 
12680  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
12681  return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
12682  case AMDGPU::BI__builtin_amdgcn_mov_dpp8:
12683  return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_mov_dpp8);
12684  case AMDGPU::BI__builtin_amdgcn_mov_dpp:
12685  case AMDGPU::BI__builtin_amdgcn_update_dpp: {
12687  for (unsigned I = 0; I != E->getNumArgs(); ++I)
12688  Args.push_back(EmitScalarExpr(E->getArg(I)));
12689  assert(Args.size() == 5 || Args.size() == 6);
12690  if (Args.size() == 5)
12691  Args.insert(Args.begin(), llvm::UndefValue::get(Args[0]->getType()));
12692  Function *F =
12693  CGM.getIntrinsic(Intrinsic::amdgcn_update_dpp, Args[0]->getType());
12694  return Builder.CreateCall(F, Args);
12695  }
12696  case AMDGPU::BI__builtin_amdgcn_div_fixup:
12697  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
12698  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
12699  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
12700  case AMDGPU::BI__builtin_amdgcn_trig_preop:
12701  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
12702  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
12703  case AMDGPU::BI__builtin_amdgcn_rcp:
12704  case AMDGPU::BI__builtin_amdgcn_rcpf:
12705  case AMDGPU::BI__builtin_amdgcn_rcph:
12706  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
12707  case AMDGPU::BI__builtin_amdgcn_rsq:
12708  case AMDGPU::BI__builtin_amdgcn_rsqf:
12709  case AMDGPU::BI__builtin_amdgcn_rsqh:
12710  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
12711  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
12712  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
12713  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
12714  case AMDGPU::BI__builtin_amdgcn_sinf:
12715  case AMDGPU::BI__builtin_amdgcn_sinh:
12716  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
12717  case AMDGPU::BI__builtin_amdgcn_cosf:
12718  case AMDGPU::BI__builtin_amdgcn_cosh:
12719  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
12720  case AMDGPU::BI__builtin_amdgcn_log_clampf:
12721  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
12722  case AMDGPU::BI__builtin_amdgcn_ldexp:
12723  case AMDGPU::BI__builtin_amdgcn_ldexpf:
12724  case AMDGPU::BI__builtin_amdgcn_ldexph:
12725  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
12726  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
12727  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
12728  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
12729  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
12730  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
12731  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
12732  Value *Src0 = EmitScalarExpr(E->getArg(0));
12733  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12734  { Builder.getInt32Ty(), Src0->getType() });
12735  return Builder.CreateCall(F, Src0);
12736  }
12737  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
12738  Value *Src0 = EmitScalarExpr(E->getArg(0));
12739  Function *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
12740  { Builder.getInt16Ty(), Src0->getType() });
12741  return Builder.CreateCall(F, Src0);
12742  }
12743  case AMDGPU::BI__builtin_amdgcn_fract:
12744  case AMDGPU::BI__builtin_amdgcn_fractf:
12745  case AMDGPU::BI__builtin_amdgcn_fracth:
12746  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
12747  case AMDGPU::BI__builtin_amdgcn_lerp:
12748  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
12749  case AMDGPU::BI__builtin_amdgcn_ubfe:
12750  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_ubfe);
12751  case AMDGPU::BI__builtin_amdgcn_sbfe:
12752  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_sbfe);
12753  case AMDGPU::BI__builtin_amdgcn_uicmp:
12754  case AMDGPU::BI__builtin_amdgcn_uicmpl:
12755  case AMDGPU::BI__builtin_amdgcn_sicmp:
12756  case AMDGPU::BI__builtin_amdgcn_sicmpl: {
12757  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
12758  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
12759  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
12760 
12761  // FIXME-GFX10: How should 32 bit mask be handled?
12762  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_icmp,
12763  { Builder.getInt64Ty(), Src0->getType() });
12764  return Builder.CreateCall(F, { Src0, Src1, Src2 });
12765  }
12766  case AMDGPU::BI__builtin_amdgcn_fcmp:
12767  case AMDGPU::BI__builtin_amdgcn_fcmpf: {
12768  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
12769  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
12770  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
12771 
12772  // FIXME-GFX10: How should 32 bit mask be handled?
12773  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_fcmp,
12774  { Builder.getInt64Ty(), Src0->getType() });
12775  return Builder.CreateCall(F, { Src0, Src1, Src2 });
12776  }
12777  case AMDGPU::BI__builtin_amdgcn_class:
12778  case AMDGPU::BI__builtin_amdgcn_classf:
12779  case AMDGPU::BI__builtin_amdgcn_classh:
12780  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
12781  case AMDGPU::BI__builtin_amdgcn_fmed3f:
12782  case AMDGPU::BI__builtin_amdgcn_fmed3h:
12783  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
12784  case AMDGPU::BI__builtin_amdgcn_ds_append:
12785  case AMDGPU::BI__builtin_amdgcn_ds_consume: {
12786  Intrinsic::ID Intrin = BuiltinID == AMDGPU::BI__builtin_amdgcn_ds_append ?
12787  Intrinsic::amdgcn_ds_append : Intrinsic::amdgcn_ds_consume;
12788  Value *Src0 = EmitScalarExpr(E->getArg(0));
12789  Function *F = CGM.getIntrinsic(Intrin, { Src0->getType() });
12790  return Builder.CreateCall(F, { Src0, Builder.getFalse() });
12791  }
12792  case AMDGPU::BI__builtin_amdgcn_read_exec: {
12793  CallInst *CI = cast<CallInst>(
12794  EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
12795  CI->setConvergent();
12796  return CI;
12797  }
12798  case AMDGPU::BI__builtin_amdgcn_read_exec_lo:
12799  case AMDGPU::BI__builtin_amdgcn_read_exec_hi: {
12800  StringRef RegName = BuiltinID == AMDGPU::BI__builtin_amdgcn_read_exec_lo ?
12801  "exec_lo" : "exec_hi";
12802  CallInst *CI = cast<CallInst>(
12803  EmitSpecialRegisterBuiltin(*this, E, Int32Ty, Int32Ty, true, RegName));
12804  CI->setConvergent();
12805  return CI;
12806  }
12807  // amdgcn workitem
12808  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
12809  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
12810  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
12811  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
12812  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
12813  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
12814 
12815  // r600 intrinsics
12816  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
12817  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
12818  return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
12819  case AMDGPU::BI__builtin_r600_read_tidig_x:
12820  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
12821  case AMDGPU::BI__builtin_r600_read_tidig_y:
12822  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
12823  case AMDGPU::BI__builtin_r600_read_tidig_z:
12824  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
12825  default:
12826  return nullptr;
12827  }
12828 }
12829 
12830 /// Handle a SystemZ function in which the final argument is a pointer
12831 /// to an int that receives the post-instruction CC value. At the LLVM level
12832 /// this is represented as a function that returns a {result, cc} pair.
12834  unsigned IntrinsicID,
12835  const CallExpr *E) {
12836  unsigned NumArgs = E->getNumArgs() - 1;
12837  SmallVector<Value *, 8> Args(NumArgs);
12838  for (unsigned I = 0; I < NumArgs; ++I)
12839  Args[I] = CGF.EmitScalarExpr(E->getArg(I));
12840  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
12841  Function *F = CGF.CGM.getIntrinsic(IntrinsicID);
12842  Value *Call = CGF.Builder.CreateCall(F, Args);
12843  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
12844  CGF.Builder.CreateStore(CC, CCPtr);
12845  return CGF.Builder.CreateExtractValue(Call, 0);
12846 }
12847 
12849  const CallExpr *E) {
12850  switch (BuiltinID) {
12851  case SystemZ::BI__builtin_tbegin: {
12852  Value *TDB = EmitScalarExpr(E->getArg(0));
12853  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12854  Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
12855  return Builder.CreateCall(F, {TDB, Control});
12856  }
12857  case SystemZ::BI__builtin_tbegin_nofloat: {
12858  Value *TDB = EmitScalarExpr(E->getArg(0));
12859  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
12860  Function *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
12861  return Builder.CreateCall(F, {TDB, Control});
12862  }
12863  case SystemZ::BI__builtin_tbeginc: {
12864  Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
12865  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
12866  Function *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
12867  return Builder.CreateCall(F, {TDB, Control});
12868  }
12869  case SystemZ::BI__builtin_tabort: {
12870  Value *Data = EmitScalarExpr(E->getArg(0));
12871  Function *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
12872  return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
12873  }
12874  case SystemZ::BI__builtin_non_tx_store: {
12875  Value *Address = EmitScalarExpr(E->getArg(0));
12876  Value *Data = EmitScalarExpr(E->getArg(1));
12877  Function *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
12878  return Builder.CreateCall(F, {Data, Address});
12879  }
12880 
12881  // Vector builtins. Note that most vector builtins are mapped automatically
12882  // to target-specific LLVM intrinsics. The ones handled specially here can
12883  // be represented via standard LLVM IR, which is preferable to enable common
12884  // LLVM optimizations.
12885 
12886  case SystemZ::BI__builtin_s390_vpopctb:
12887  case SystemZ::BI__builtin_s390_vpopcth:
12888  case SystemZ::BI__builtin_s390_vpopctf:
12889  case SystemZ::BI__builtin_s390_vpopctg: {
12890  llvm::Type *ResultType = ConvertType(E->getType());
12891  Value *X = EmitScalarExpr(E->getArg(0));
12892  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
12893  return Builder.CreateCall(F, X);
12894  }
12895 
12896  case SystemZ::BI__builtin_s390_vclzb:
12897  case SystemZ::BI__builtin_s390_vclzh:
12898  case SystemZ::BI__builtin_s390_vclzf:
12899  case SystemZ::BI__builtin_s390_vclzg: {
12900  llvm::Type *ResultType = ConvertType(E->getType());
12901  Value *X = EmitScalarExpr(E->getArg(0));
12902  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12903  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
12904  return Builder.CreateCall(F, {X, Undef});
12905  }
12906 
12907  case SystemZ::BI__builtin_s390_vctzb:
12908  case SystemZ::BI__builtin_s390_vctzh:
12909  case SystemZ::BI__builtin_s390_vctzf:
12910  case SystemZ::BI__builtin_s390_vctzg: {
12911  llvm::Type *ResultType = ConvertType(E->getType());
12912  Value *X = EmitScalarExpr(E->getArg(0));
12913  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
12914  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
12915  return Builder.CreateCall(F, {X, Undef});
12916  }
12917 
12918  case SystemZ::BI__builtin_s390_vfsqsb:
12919  case SystemZ::BI__builtin_s390_vfsqdb: {
12920  llvm::Type *ResultType = ConvertType(E->getType());
12921  Value *X = EmitScalarExpr(E->getArg(0));
12922  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
12923  return Builder.CreateCall(F, X);
12924  }
12925  case SystemZ::BI__builtin_s390_vfmasb:
12926  case SystemZ::BI__builtin_s390_vfmadb: {
12927  llvm::Type *ResultType = ConvertType(E->getType());
12928  Value *X = EmitScalarExpr(E->getArg(0));
12929  Value *Y = EmitScalarExpr(E->getArg(1));
12930  Value *Z = EmitScalarExpr(E->getArg(2));
12931  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12932  return Builder.CreateCall(F, {X, Y, Z});
12933  }
12934  case SystemZ::BI__builtin_s390_vfmssb:
12935  case SystemZ::BI__builtin_s390_vfmsdb: {
12936  llvm::Type *ResultType = ConvertType(E->getType());
12937  Value *X = EmitScalarExpr(E->getArg(0));
12938  Value *Y = EmitScalarExpr(E->getArg(1));
12939  Value *Z = EmitScalarExpr(E->getArg(2));
12940  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12941  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12942  return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
12943  }
12944  case SystemZ::BI__builtin_s390_vfnmasb:
12945  case SystemZ::BI__builtin_s390_vfnmadb: {
12946  llvm::Type *ResultType = ConvertType(E->getType());
12947  Value *X = EmitScalarExpr(E->getArg(0));
12948  Value *Y = EmitScalarExpr(E->getArg(1));
12949  Value *Z = EmitScalarExpr(E->getArg(2));
12950  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12951  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12952  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
12953  }
12954  case SystemZ::BI__builtin_s390_vfnmssb:
12955  case SystemZ::BI__builtin_s390_vfnmsdb: {
12956  llvm::Type *ResultType = ConvertType(E->getType());
12957  Value *X = EmitScalarExpr(E->getArg(0));
12958  Value *Y = EmitScalarExpr(E->getArg(1));
12959  Value *Z = EmitScalarExpr(E->getArg(2));
12960  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12961  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
12962  Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
12963  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
12964  }
12965  case SystemZ::BI__builtin_s390_vflpsb:
12966  case SystemZ::BI__builtin_s390_vflpdb: {
12967  llvm::Type *ResultType = ConvertType(E->getType());
12968  Value *X = EmitScalarExpr(E->getArg(0));
12969  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12970  return Builder.CreateCall(F, X);
12971  }
12972  case SystemZ::BI__builtin_s390_vflnsb:
12973  case SystemZ::BI__builtin_s390_vflndb: {
12974  llvm::Type *ResultType = ConvertType(E->getType());
12975  Value *X = EmitScalarExpr(E->getArg(0));
12976  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
12977  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
12978  return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
12979  }
12980  case SystemZ::BI__builtin_s390_vfisb:
12981  case SystemZ::BI__builtin_s390_vfidb: {
12982  llvm::Type *ResultType = ConvertType(E->getType());
12983  Value *X = EmitScalarExpr(E->getArg(0));
12984  // Constant-fold the M4 and M5 mask arguments.
12985  llvm::APSInt M4, M5;
12986  bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
12987  bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
12988  assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
12989  (void)IsConstM4; (void)IsConstM5;
12990  // Check whether this instance can be represented via a LLVM standard
12991  // intrinsic. We only support some combinations of M4 and M5.
12992  Intrinsic::ID ID = Intrinsic::not_intrinsic;
12993  switch (M4.getZExtValue()) {
12994  default: break;
12995  case 0: // IEEE-inexact exception allowed
12996  switch (M5.getZExtValue()) {
12997  default: break;
12998  case 0: ID = Intrinsic::rint; break;
12999  }
13000  break;
13001  case 4: // IEEE-inexact exception suppressed
13002  switch (M5.getZExtValue()) {
13003  default: break;
13004  case 0: ID = Intrinsic::nearbyint; break;
13005  case 1: ID = Intrinsic::round; break;
13006  case 5: ID = Intrinsic::trunc; break;
13007  case 6: ID = Intrinsic::ceil; break;
13008  case 7: ID = Intrinsic::floor; break;
13009  }
13010  break;
13011  }
13012  if (ID != Intrinsic::not_intrinsic) {
13013  Function *F = CGM.getIntrinsic(ID, ResultType);
13014  return Builder.CreateCall(F, X);
13015  }
13016  switch (BuiltinID) {
13017  case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
13018  case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
13019  default: llvm_unreachable("Unknown BuiltinID");
13020  }
13021  Function *F = CGM.getIntrinsic(ID);
13022  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
13023  Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
13024  return Builder.CreateCall(F, {X, M4Value, M5Value});
13025  }
13026  case SystemZ::BI__builtin_s390_vfmaxsb:
13027  case SystemZ::BI__builtin_s390_vfmaxdb: {
13028  llvm::Type *ResultType = ConvertType(E->getType());
13029  Value *X = EmitScalarExpr(E->getArg(0));
13030  Value *Y = EmitScalarExpr(E->getArg(1));
13031  // Constant-fold the M4 mask argument.
13032  llvm::APSInt M4;
13033  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
13034  assert(IsConstM4 && "Constant arg isn't actually constant?");
13035  (void)IsConstM4;
13036  // Check whether this instance can be represented via a LLVM standard
13037  // intrinsic. We only support some values of M4.
13038  Intrinsic::ID ID = Intrinsic::not_intrinsic;
13039  switch (M4.getZExtValue()) {
13040  default: break;
13041  case 4: ID = Intrinsic::maxnum; break;
13042  }
13043  if (ID != Intrinsic::not_intrinsic) {
13044  Function *F = CGM.getIntrinsic(ID, ResultType);
13045  return Builder.CreateCall(F, {X, Y});
13046  }
13047  switch (BuiltinID) {
13048  case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
13049  case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
13050  default: llvm_unreachable("Unknown BuiltinID");
13051  }
13052  Function *F = CGM.getIntrinsic(ID);
13053  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
13054  return Builder.CreateCall(F, {X, Y, M4Value});
13055  }
13056  case SystemZ::BI__builtin_s390_vfminsb:
13057  case SystemZ::BI__builtin_s390_vfmindb: {
13058  llvm::Type *ResultType = ConvertType(E->getType());
13059  Value *X = EmitScalarExpr(E->getArg(0));
13060  Value *Y = EmitScalarExpr(E->getArg(1));
13061  // Constant-fold the M4 mask argument.
13062  llvm::APSInt M4;
13063  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
13064  assert(IsConstM4 && "Constant arg isn't actually constant?");
13065  (void)IsConstM4;
13066  // Check whether this instance can be represented via a LLVM standard
13067  // intrinsic. We only support some values of M4.
13068  Intrinsic::ID ID = Intrinsic::not_intrinsic;
13069  switch (M4.getZExtValue()) {
13070  default: break;
13071  case 4: ID = Intrinsic::minnum; break;
13072  }
13073  if (ID != Intrinsic::not_intrinsic) {
13074  Function *F = CGM.getIntrinsic(ID, ResultType);
13075  return Builder.CreateCall(F, {X, Y});
13076  }
13077  switch (BuiltinID) {
13078  case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
13079  case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
13080  default: llvm_unreachable("Unknown BuiltinID");
13081  }
13082  Function *F = CGM.getIntrinsic(ID);
13083  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
13084  return Builder.CreateCall(F, {X, Y, M4Value});
13085  }
13086 
13087  case SystemZ::BI__builtin_s390_vlbrh:
13088  case SystemZ::BI__builtin_s390_vlbrf:
13089  case SystemZ::BI__builtin_s390_vlbrg: {
13090  llvm::Type *ResultType = ConvertType(E->getType());
13091  Value *X = EmitScalarExpr(E->getArg(0));
13092  Function *F = CGM.getIntrinsic(Intrinsic::bswap, ResultType);
13093  return Builder.CreateCall(F, X);
13094  }
13095 
13096  // Vector intrinsics that output the post-instruction CC value.
13097 
13098 #define INTRINSIC_WITH_CC(NAME) \
13099  case SystemZ::BI__builtin_##NAME: \
13100  return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
13101 
13102  INTRINSIC_WITH_CC(s390_vpkshs);
13103  INTRINSIC_WITH_CC(s390_vpksfs);
13104  INTRINSIC_WITH_CC(s390_vpksgs);
13105 
13106  INTRINSIC_WITH_CC(s390_vpklshs);
13107  INTRINSIC_WITH_CC(s390_vpklsfs);
13108  INTRINSIC_WITH_CC(s390_vpklsgs);
13109 
13110  INTRINSIC_WITH_CC(s390_vceqbs);
13111  INTRINSIC_WITH_CC(s390_vceqhs);
13112  INTRINSIC_WITH_CC(s390_vceqfs);
13113  INTRINSIC_WITH_CC(s390_vceqgs);
13114 
13115  INTRINSIC_WITH_CC(s390_vchbs);
13116  INTRINSIC_WITH_CC(s390_vchhs);
13117  INTRINSIC_WITH_CC(s390_vchfs);
13118  INTRINSIC_WITH_CC(s390_vchgs);
13119 
13120  INTRINSIC_WITH_CC(s390_vchlbs);
13121  INTRINSIC_WITH_CC(s390_vchlhs);
13122  INTRINSIC_WITH_CC(s390_vchlfs);
13123  INTRINSIC_WITH_CC(s390_vchlgs);
13124 
13125  INTRINSIC_WITH_CC(s390_vfaebs);
13126  INTRINSIC_WITH_CC(s390_vfaehs);
13127  INTRINSIC_WITH_CC(s390_vfaefs);
13128 
13129  INTRINSIC_WITH_CC(s390_vfaezbs);
13130  INTRINSIC_WITH_CC(s390_vfaezhs);
13131  INTRINSIC_WITH_CC(s390_vfaezfs);
13132 
13133  INTRINSIC_WITH_CC(s390_vfeebs);
13134  INTRINSIC_WITH_CC(s390_vfeehs);
13135  INTRINSIC_WITH_CC(s390_vfeefs);
13136 
13137  INTRINSIC_WITH_CC(s390_vfeezbs);
13138  INTRINSIC_WITH_CC(s390_vfeezhs);
13139  INTRINSIC_WITH_CC(s390_vfeezfs);
13140 
13141  INTRINSIC_WITH_CC(s390_vfenebs);
13142  INTRINSIC_WITH_CC(s390_vfenehs);
13143  INTRINSIC_WITH_CC(s390_vfenefs);
13144 
13145  INTRINSIC_WITH_CC(s390_vfenezbs);
13146  INTRINSIC_WITH_CC(s390_vfenezhs);
13147  INTRINSIC_WITH_CC(s390_vfenezfs);
13148 
13149  INTRINSIC_WITH_CC(s390_vistrbs);
13150  INTRINSIC_WITH_CC(s390_vistrhs);
13151  INTRINSIC_WITH_CC(s390_vistrfs);
13152 
13153  INTRINSIC_WITH_CC(s390_vstrcbs);
13154  INTRINSIC_WITH_CC(s390_vstrchs);
13155  INTRINSIC_WITH_CC(s390_vstrcfs);
13156 
13157  INTRINSIC_WITH_CC(s390_vstrczbs);
13158  INTRINSIC_WITH_CC(s390_vstrczhs);
13159  INTRINSIC_WITH_CC(s390_vstrczfs);
13160 
13161  INTRINSIC_WITH_CC(s390_vfcesbs);
13162  INTRINSIC_WITH_CC(s390_vfcedbs);
13163  INTRINSIC_WITH_CC(s390_vfchsbs);
13164  INTRINSIC_WITH_CC(s390_vfchdbs);
13165  INTRINSIC_WITH_CC(s390_vfchesbs);
13166  INTRINSIC_WITH_CC(s390_vfchedbs);
13167 
13168  INTRINSIC_WITH_CC(s390_vftcisb);
13169  INTRINSIC_WITH_CC(s390_vftcidb);
13170 
13171  INTRINSIC_WITH_CC(s390_vstrsb);
13172  INTRINSIC_WITH_CC(s390_vstrsh);
13173  INTRINSIC_WITH_CC(s390_vstrsf);
13174 
13175  INTRINSIC_WITH_CC(s390_vstrszb);
13176  INTRINSIC_WITH_CC(s390_vstrszh);
13177  INTRINSIC_WITH_CC(s390_vstrszf);
13178 
13179 #undef INTRINSIC_WITH_CC
13180 
13181  default:
13182  return nullptr;
13183  }
13184 }
13185 
13186 namespace {
13187 // Helper classes for mapping MMA builtins to particular LLVM intrinsic variant.
13188 struct NVPTXMmaLdstInfo {
13189  unsigned NumResults; // Number of elements to load/store
13190  // Intrinsic IDs for row/col variants. 0 if particular layout is unsupported.
13191  unsigned IID_col;
13192  unsigned IID_row;
13193 };
13194 
13195 #define MMA_INTR(geom_op_type, layout) \
13196  Intrinsic::nvvm_wmma_##geom_op_type##_##layout##_stride
13197 #define MMA_LDST(n, geom_op_type) \
13198  { n, MMA_INTR(geom_op_type, col), MMA_INTR(geom_op_type, row) }
13199 
13200 static NVPTXMmaLdstInfo getNVPTXMmaLdstInfo(unsigned BuiltinID) {
13201  switch (BuiltinID) {
13202  // FP MMA loads
13203  case NVPTX::BI__hmma_m16n16k16_ld_a:
13204  return MMA_LDST(8, m16n16k16_load_a_f16);
13205  case NVPTX::BI__hmma_m16n16k16_ld_b:
13206  return MMA_LDST(8, m16n16k16_load_b_f16);
13207  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
13208  return MMA_LDST(4, m16n16k16_load_c_f16);
13209  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
13210  return MMA_LDST(8, m16n16k16_load_c_f32);
13211  case NVPTX::BI__hmma_m32n8k16_ld_a:
13212  return MMA_LDST(8, m32n8k16_load_a_f16);
13213  case NVPTX::BI__hmma_m32n8k16_ld_b:
13214  return MMA_LDST(8, m32n8k16_load_b_f16);
13215  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
13216  return MMA_LDST(4, m32n8k16_load_c_f16);
13217  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
13218  return MMA_LDST(8, m32n8k16_load_c_f32);
13219  case NVPTX::BI__hmma_m8n32k16_ld_a:
13220  return MMA_LDST(8, m8n32k16_load_a_f16);
13221  case NVPTX::BI__hmma_m8n32k16_ld_b:
13222  return MMA_LDST(8, m8n32k16_load_b_f16);
13223  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
13224  return MMA_LDST(4, m8n32k16_load_c_f16);
13225  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
13226  return MMA_LDST(8, m8n32k16_load_c_f32);
13227 
13228  // Integer MMA loads
13229  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
13230  return MMA_LDST(2, m16n16k16_load_a_s8);
13231  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
13232  return MMA_LDST(2, m16n16k16_load_a_u8);
13233  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
13234  return MMA_LDST(2, m16n16k16_load_b_s8);
13235  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
13236  return MMA_LDST(2, m16n16k16_load_b_u8);
13237  case NVPTX::BI__imma_m16n16k16_ld_c:
13238  return MMA_LDST(8, m16n16k16_load_c_s32);
13239  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
13240  return MMA_LDST(4, m32n8k16_load_a_s8);
13241  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
13242  return MMA_LDST(4, m32n8k16_load_a_u8);
13243  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
13244  return MMA_LDST(1, m32n8k16_load_b_s8);
13245  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
13246  return MMA_LDST(1, m32n8k16_load_b_u8);
13247  case NVPTX::BI__imma_m32n8k16_ld_c:
13248  return MMA_LDST(8, m32n8k16_load_c_s32);
13249  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
13250  return MMA_LDST(1, m8n32k16_load_a_s8);
13251  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
13252  return MMA_LDST(1, m8n32k16_load_a_u8);
13253  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
13254  return MMA_LDST(4, m8n32k16_load_b_s8);
13255  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
13256  return MMA_LDST(4, m8n32k16_load_b_u8);
13257  case NVPTX::BI__imma_m8n32k16_ld_c:
13258  return MMA_LDST(8, m8n32k16_load_c_s32);
13259 
13260  // Sub-integer MMA loads.
13261  // Only row/col layout is supported by A/B fragments.
13262  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
13263  return {1, 0, MMA_INTR(m8n8k32_load_a_s4, row)};
13264  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
13265  return {1, 0, MMA_INTR(m8n8k32_load_a_u4, row)};
13266  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
13267  return {1, MMA_INTR(m8n8k32_load_b_s4, col), 0};
13268  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
13269  return {1, MMA_INTR(m8n8k32_load_b_u4, col), 0};
13270  case NVPTX::BI__imma_m8n8k32_ld_c:
13271  return MMA_LDST(2, m8n8k32_load_c_s32);
13272  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
13273  return {1, 0, MMA_INTR(m8n8k128_load_a_b1, row)};
13274  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
13275  return {1, MMA_INTR(m8n8k128_load_b_b1, col), 0};
13276  case NVPTX::BI__bmma_m8n8k128_ld_c:
13277  return MMA_LDST(2, m8n8k128_load_c_s32);
13278 
13279  // NOTE: We need to follow inconsitent naming scheme used by NVCC. Unlike
13280  // PTX and LLVM IR where stores always use fragment D, NVCC builtins always
13281  // use fragment C for both loads and stores.
13282  // FP MMA stores.
13283  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
13284  return MMA_LDST(4, m16n16k16_store_d_f16);
13285  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
13286  return MMA_LDST(8, m16n16k16_store_d_f32);
13287  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
13288  return MMA_LDST(4, m32n8k16_store_d_f16);
13289  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
13290  return MMA_LDST(8, m32n8k16_store_d_f32);
13291  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
13292  return MMA_LDST(4, m8n32k16_store_d_f16);
13293  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
13294  return MMA_LDST(8, m8n32k16_store_d_f32);
13295 
13296  // Integer and sub-integer MMA stores.
13297  // Another naming quirk. Unlike other MMA builtins that use PTX types in the
13298  // name, integer loads/stores use LLVM's i32.
13299  case NVPTX::BI__imma_m16n16k16_st_c_i32:
13300  return MMA_LDST(8, m16n16k16_store_d_s32);
13301  case NVPTX::BI__imma_m32n8k16_st_c_i32:
13302  return MMA_LDST(8, m32n8k16_store_d_s32);
13303  case NVPTX::BI__imma_m8n32k16_st_c_i32:
13304  return MMA_LDST(8, m8n32k16_store_d_s32);
13305  case NVPTX::BI__imma_m8n8k32_st_c_i32:
13306  return MMA_LDST(2, m8n8k32_store_d_s32);
13307  case NVPTX::BI__bmma_m8n8k128_st_c_i32:
13308  return MMA_LDST(2, m8n8k128_store_d_s32);
13309 
13310  default:
13311  llvm_unreachable("Unknown MMA builtin");
13312  }
13313 }
13314 #undef MMA_LDST
13315 #undef MMA_INTR
13316 
13317 
13318 struct NVPTXMmaInfo {
13319  unsigned NumEltsA;
13320  unsigned NumEltsB;
13321  unsigned NumEltsC;
13322  unsigned NumEltsD;
13323  std::array<unsigned, 8> Variants;
13324 
13325  unsigned getMMAIntrinsic(int Layout, bool Satf) {
13326  unsigned Index = Layout * 2 + Satf;
13327  if (Index >= Variants.size())
13328  return 0;
13329  return Variants[Index];
13330  }
13331 };
13332 
13333  // Returns an intrinsic that matches Layout and Satf for valid combinations of
13334  // Layout and Satf, 0 otherwise.
13335 static NVPTXMmaInfo getNVPTXMmaInfo(unsigned BuiltinID) {
13336  // clang-format off
13337 #define MMA_VARIANTS(geom, type) {{ \
13338  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type, \
13339  Intrinsic::nvvm_wmma_##geom##_mma_row_row_##type##_satfinite, \
13340  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
13341  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
13342  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type, \
13343  Intrinsic::nvvm_wmma_##geom##_mma_col_row_##type##_satfinite, \
13344  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type, \
13345  Intrinsic::nvvm_wmma_##geom##_mma_col_col_##type##_satfinite \
13346  }}
13347 // Sub-integer MMA only supports row.col layout.
13348 #define MMA_VARIANTS_I4(geom, type) {{ \
13349  0, \
13350  0, \
13351  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
13352  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type##_satfinite, \
13353  0, \
13354  0, \
13355  0, \
13356  0 \
13357  }}
13358 // b1 MMA does not support .satfinite.
13359 #define MMA_VARIANTS_B1(geom, type) {{ \
13360  0, \
13361  0, \
13362  Intrinsic::nvvm_wmma_##geom##_mma_row_col_##type, \
13363  0, \
13364  0, \
13365  0, \
13366  0, \
13367  0 \
13368  }}
13369  // clang-format on
13370  switch (BuiltinID) {
13371  // FP MMA
13372  // Note that 'type' argument of MMA_VARIANT uses D_C notation, while
13373  // NumEltsN of return value are ordered as A,B,C,D.
13374  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
13375  return {8, 8, 4, 4, MMA_VARIANTS(m16n16k16, f16_f16)};
13376  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
13377  return {8, 8, 4, 8, MMA_VARIANTS(m16n16k16, f32_f16)};
13378  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
13379  return {8, 8, 8, 4, MMA_VARIANTS(m16n16k16, f16_f32)};
13380  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
13381  return {8, 8, 8, 8, MMA_VARIANTS(m16n16k16, f32_f32)};
13382  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
13383  return {8, 8, 4, 4, MMA_VARIANTS(m32n8k16, f16_f16)};
13384  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
13385  return {8, 8, 4, 8, MMA_VARIANTS(m32n8k16, f32_f16)};
13386  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
13387  return {8, 8, 8, 4, MMA_VARIANTS(m32n8k16, f16_f32)};
13388  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
13389  return {8, 8, 8, 8, MMA_VARIANTS(m32n8k16, f32_f32)};
13390  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
13391  return {8, 8, 4, 4, MMA_VARIANTS(m8n32k16, f16_f16)};
13392  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
13393  return {8, 8, 4, 8, MMA_VARIANTS(m8n32k16, f32_f16)};
13394  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
13395  return {8, 8, 8, 4, MMA_VARIANTS(m8n32k16, f16_f32)};
13396  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
13397  return {8, 8, 8, 8, MMA_VARIANTS(m8n32k16, f32_f32)};
13398 
13399  // Integer MMA
13400  case NVPTX::BI__imma_m16n16k16_mma_s8:
13401  return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, s8)};
13402  case NVPTX::BI__imma_m16n16k16_mma_u8:
13403  return {2, 2, 8, 8, MMA_VARIANTS(m16n16k16, u8)};
13404  case NVPTX::BI__imma_m32n8k16_mma_s8:
13405  return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, s8)};
13406  case NVPTX::BI__imma_m32n8k16_mma_u8:
13407  return {4, 1, 8, 8, MMA_VARIANTS(m32n8k16, u8)};
13408  case NVPTX::BI__imma_m8n32k16_mma_s8:
13409  return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, s8)};
13410  case NVPTX::BI__imma_m8n32k16_mma_u8:
13411  return {1, 4, 8, 8, MMA_VARIANTS(m8n32k16, u8)};
13412 
13413  // Sub-integer MMA
13414  case NVPTX::BI__imma_m8n8k32_mma_s4:
13415  return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, s4)};
13416  case NVPTX::BI__imma_m8n8k32_mma_u4:
13417  return {1, 1, 2, 2, MMA_VARIANTS_I4(m8n8k32, u4)};
13418  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1:
13419  return {1, 1, 2, 2, MMA_VARIANTS_B1(m8n8k128, b1)};
13420  default:
13421  llvm_unreachable("Unexpected builtin ID.");
13422  }
13423 #undef MMA_VARIANTS
13424 #undef MMA_VARIANTS_I4
13425 #undef MMA_VARIANTS_B1
13426 }
13427 
13428 } // namespace
13429 
13430 Value *
13431 CodeGenFunction::EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E) {
13432  auto MakeLdg = [&](unsigned IntrinsicID) {
13433  Value *Ptr = EmitScalarExpr(E->getArg(0));
13434  clang::CharUnits Align =
13435  getNaturalPointeeTypeAlignment(E->getArg(0)->getType());
13436  return Builder.CreateCall(
13437  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
13438  Ptr->getType()}),
13439  {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
13440  };
13441  auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
13442  Value *Ptr = EmitScalarExpr(E->getArg(0));
13443  return Builder.CreateCall(
13444  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
13445  Ptr->getType()}),
13446  {Ptr, EmitScalarExpr(E->getArg(1))});
13447  };
13448  switch (BuiltinID) {
13449  case NVPTX::BI__nvvm_atom_add_gen_i:
13450  case NVPTX::BI__nvvm_atom_add_gen_l:
13451  case NVPTX::BI__nvvm_atom_add_gen_ll:
13452  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
13453 
13454  case NVPTX::BI__nvvm_atom_sub_gen_i:
13455  case NVPTX::BI__nvvm_atom_sub_gen_l:
13456  case NVPTX::BI__nvvm_atom_sub_gen_ll:
13457  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
13458 
13459  case NVPTX::BI__nvvm_atom_and_gen_i:
13460  case NVPTX::BI__nvvm_atom_and_gen_l:
13461  case NVPTX::BI__nvvm_atom_and_gen_ll:
13463 
13464  case NVPTX::BI__nvvm_atom_or_gen_i:
13465  case NVPTX::BI__nvvm_atom_or_gen_l:
13466  case NVPTX::BI__nvvm_atom_or_gen_ll:
13467  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
13468 
13469  case NVPTX::BI__nvvm_atom_xor_gen_i:
13470  case NVPTX::BI__nvvm_atom_xor_gen_l:
13471  case NVPTX::BI__nvvm_atom_xor_gen_ll:
13472  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
13473 
13474  case NVPTX::BI__nvvm_atom_xchg_gen_i:
13475  case NVPTX::BI__nvvm_atom_xchg_gen_l:
13476  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
13477  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
13478 
13479  case NVPTX::BI__nvvm_atom_max_gen_i:
13480  case NVPTX::BI__nvvm_atom_max_gen_l:
13481  case NVPTX::BI__nvvm_atom_max_gen_ll:
13482  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
13483 
13484  case NVPTX::BI__nvvm_atom_max_gen_ui:
13485  case NVPTX::BI__nvvm_atom_max_gen_ul:
13486  case NVPTX::BI__nvvm_atom_max_gen_ull:
13487  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
13488 
13489  case NVPTX::BI__nvvm_atom_min_gen_i:
13490  case NVPTX::BI__nvvm_atom_min_gen_l:
13491  case NVPTX::BI__nvvm_atom_min_gen_ll:
13492  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
13493 
13494  case NVPTX::BI__nvvm_atom_min_gen_ui:
13495  case NVPTX::BI__nvvm_atom_min_gen_ul:
13496  case NVPTX::BI__nvvm_atom_min_gen_ull:
13497  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
13498 
13499  case NVPTX::BI__nvvm_atom_cas_gen_i:
13500  case NVPTX::BI__nvvm_atom_cas_gen_l:
13501  case NVPTX::BI__nvvm_atom_cas_gen_ll:
13502  // __nvvm_atom_cas_gen_* should return the old value rather than the
13503  // success flag.
13504  return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
13505 
13506  case NVPTX::BI__nvvm_atom_add_gen_f:
13507  case NVPTX::BI__nvvm_atom_add_gen_d: {
13508  Value *Ptr = EmitScalarExpr(E->getArg(0));
13509  Value *Val = EmitScalarExpr(E->getArg(1));
13510  return Builder.CreateAtomicRMW(llvm::AtomicRMWInst::FAdd, Ptr, Val,
13511  AtomicOrdering::SequentiallyConsistent);
13512  }
13513 
13514  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
13515  Value *Ptr = EmitScalarExpr(E->getArg(0));
13516  Value *Val = EmitScalarExpr(E->getArg(1));
13517  Function *FnALI32 =
13518  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
13519  return Builder.CreateCall(FnALI32, {Ptr, Val});
13520  }
13521 
13522  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
13523  Value *Ptr = EmitScalarExpr(E->getArg(0));
13524  Value *Val = EmitScalarExpr(E->getArg(1));
13525  Function *FnALD32 =
13526  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
13527  return Builder.CreateCall(FnALD32, {Ptr, Val});
13528  }
13529 
13530  case NVPTX::BI__nvvm_ldg_c:
13531  case NVPTX::BI__nvvm_ldg_c2:
13532  case NVPTX::BI__nvvm_ldg_c4:
13533  case NVPTX::BI__nvvm_ldg_s:
13534  case NVPTX::BI__nvvm_ldg_s2:
13535  case NVPTX::BI__nvvm_ldg_s4:
13536  case NVPTX::BI__nvvm_ldg_i:
13537  case NVPTX::BI__nvvm_ldg_i2:
13538  case NVPTX::BI__nvvm_ldg_i4:
13539  case NVPTX::BI__nvvm_ldg_l:
13540  case NVPTX::BI__nvvm_ldg_ll:
13541  case NVPTX::BI__nvvm_ldg_ll2:
13542  case NVPTX::BI__nvvm_ldg_uc:
13543  case NVPTX::BI__nvvm_ldg_uc2:
13544  case NVPTX::BI__nvvm_ldg_uc4:
13545  case NVPTX::BI__nvvm_ldg_us:
13546  case NVPTX::BI__nvvm_ldg_us2:
13547  case NVPTX::BI__nvvm_ldg_us4:
13548  case NVPTX::BI__nvvm_ldg_ui:
13549  case NVPTX::BI__nvvm_ldg_ui2:
13550  case NVPTX::BI__nvvm_ldg_ui4:
13551  case NVPTX::BI__nvvm_ldg_ul:
13552  case NVPTX::BI__nvvm_ldg_ull:
13553  case NVPTX::BI__nvvm_ldg_ull2:
13554  // PTX Interoperability section 2.2: "For a vector with an even number of
13555  // elements, its alignment is set to number of elements times the alignment
13556  // of its member: n*alignof(t)."
13557  return MakeLdg(Intrinsic::nvvm_ldg_global_i);
13558  case NVPTX::BI__nvvm_ldg_f:
13559  case NVPTX::BI__nvvm_ldg_f2:
13560  case NVPTX::BI__nvvm_ldg_f4:
13561  case NVPTX::BI__nvvm_ldg_d:
13562  case NVPTX::BI__nvvm_ldg_d2:
13563  return MakeLdg(Intrinsic::nvvm_ldg_global_f);
13564 
13565  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
13566  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
13567  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
13568  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
13569  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
13570  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
13571  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
13572  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
13573  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
13574  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
13575  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
13576  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
13577  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
13578  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
13579  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
13580  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
13581  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
13582  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
13583  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
13584  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
13585  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
13586  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
13587  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
13588  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
13589  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
13590  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
13591  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
13592  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
13593  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
13594  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
13595  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
13596  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
13597  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
13598  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
13599  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
13600  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
13601  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
13602  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
13603  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
13604  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
13605  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
13606  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
13607  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
13608  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
13609  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
13610  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
13611  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
13612  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
13613  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
13614  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
13615  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
13616  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
13617  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
13618  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
13619  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
13620  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
13621  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
13622  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
13623  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
13624  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
13625  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
13626  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
13627  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
13628  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
13629  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
13630  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
13631  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
13632  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
13633  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
13634  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
13635  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
13636  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
13637  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
13638  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
13639  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
13640  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
13641  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
13642  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
13643  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
13644  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
13645  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
13646  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
13647  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
13648  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
13649  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
13650  Value *Ptr = EmitScalarExpr(E->getArg(0));
13651  return Builder.CreateCall(
13652  CGM.getIntrinsic(
13653  Intrinsic::nvvm_atomic_cas_gen_i_cta,
13654  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
13655  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
13656  }
13657  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
13658  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
13659  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
13660  Value *Ptr = EmitScalarExpr(E->getArg(0));
13661  return Builder.CreateCall(
13662  CGM.getIntrinsic(
13663  Intrinsic::nvvm_atomic_cas_gen_i_sys,
13664  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
13665  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
13666  }
13667  case NVPTX::BI__nvvm_match_all_sync_i32p:
13668  case NVPTX::BI__nvvm_match_all_sync_i64p: {
13669  Value *Mask = EmitScalarExpr(E->getArg(0));
13670  Value *Val = EmitScalarExpr(E->getArg(1));
13671  Address PredOutPtr = EmitPointerWithAlignment(E->getArg(2));
13672  Value *ResultPair = Builder.CreateCall(
13673  CGM.getIntrinsic(BuiltinID == NVPTX::BI__nvvm_match_all_sync_i32p
13674  ? Intrinsic::nvvm_match_all_sync_i32p
13675  : Intrinsic::nvvm_match_all_sync_i64p),
13676  {Mask, Val});
13677  Value *Pred = Builder.CreateZExt(Builder.CreateExtractValue(ResultPair, 1),
13678  PredOutPtr.getElementType());
13679  Builder.CreateStore(Pred, PredOutPtr);
13680  return Builder.CreateExtractValue(ResultPair, 0);
13681  }
13682 
13683  // FP MMA loads
13684  case NVPTX::BI__hmma_m16n16k16_ld_a:
13685  case NVPTX::BI__hmma_m16n16k16_ld_b:
13686  case NVPTX::BI__hmma_m16n16k16_ld_c_f16:
13687  case NVPTX::BI__hmma_m16n16k16_ld_c_f32:
13688  case NVPTX::BI__hmma_m32n8k16_ld_a:
13689  case NVPTX::BI__hmma_m32n8k16_ld_b:
13690  case NVPTX::BI__hmma_m32n8k16_ld_c_f16:
13691  case NVPTX::BI__hmma_m32n8k16_ld_c_f32:
13692  case NVPTX::BI__hmma_m8n32k16_ld_a:
13693  case NVPTX::BI__hmma_m8n32k16_ld_b:
13694  case NVPTX::BI__hmma_m8n32k16_ld_c_f16:
13695  case NVPTX::BI__hmma_m8n32k16_ld_c_f32:
13696  // Integer MMA loads.
13697  case NVPTX::BI__imma_m16n16k16_ld_a_s8:
13698  case NVPTX::BI__imma_m16n16k16_ld_a_u8:
13699  case NVPTX::BI__imma_m16n16k16_ld_b_s8:
13700  case NVPTX::BI__imma_m16n16k16_ld_b_u8:
13701  case NVPTX::BI__imma_m16n16k16_ld_c:
13702  case NVPTX::BI__imma_m32n8k16_ld_a_s8:
13703  case NVPTX::BI__imma_m32n8k16_ld_a_u8:
13704  case NVPTX::BI__imma_m32n8k16_ld_b_s8:
13705  case NVPTX::BI__imma_m32n8k16_ld_b_u8:
13706  case NVPTX::BI__imma_m32n8k16_ld_c:
13707  case NVPTX::BI__imma_m8n32k16_ld_a_s8:
13708  case NVPTX::BI__imma_m8n32k16_ld_a_u8:
13709  case NVPTX::BI__imma_m8n32k16_ld_b_s8:
13710  case NVPTX::BI__imma_m8n32k16_ld_b_u8:
13711  case NVPTX::BI__imma_m8n32k16_ld_c:
13712  // Sub-integer MMA loads.
13713  case NVPTX::BI__imma_m8n8k32_ld_a_s4:
13714  case NVPTX::BI__imma_m8n8k32_ld_a_u4:
13715  case NVPTX::BI__imma_m8n8k32_ld_b_s4:
13716  case NVPTX::BI__imma_m8n8k32_ld_b_u4:
13717  case NVPTX::BI__imma_m8n8k32_ld_c:
13718  case NVPTX::BI__bmma_m8n8k128_ld_a_b1:
13719  case NVPTX::BI__bmma_m8n8k128_ld_b_b1:
13720  case NVPTX::BI__bmma_m8n8k128_ld_c:
13721  {
13722  Address Dst = EmitPointerWithAlignment(E->getArg(0));
13723  Value *Src = EmitScalarExpr(E->getArg(1));
13724  Value *Ldm = EmitScalarExpr(E->getArg(2));
13725  llvm::APSInt isColMajorArg;
13726  if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
13727  return nullptr;
13728  bool isColMajor = isColMajorArg.getSExtValue();
13729  NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
13730  unsigned IID = isColMajor ? II.IID_col : II.IID_row;
13731  if (IID == 0)
13732  return nullptr;
13733 
13734  Value *Result =
13735  Builder.CreateCall(CGM.getIntrinsic(IID, Src->getType()), {Src, Ldm});
13736 
13737  // Save returned values.
13738  assert(II.NumResults);
13739  if (II.NumResults == 1) {
13740  Builder.CreateAlignedStore(Result, Dst.getPointer(),
13742  } else {
13743  for (unsigned i = 0; i < II.NumResults; ++i) {
13744  Builder.CreateAlignedStore(
13745  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i),
13746  Dst.getElementType()),
13747  Builder.CreateGEP(Dst.getPointer(),
13748  llvm::ConstantInt::get(IntTy, i)),
13750  }
13751  }
13752  return Result;
13753  }
13754 
13755  case NVPTX::BI__hmma_m16n16k16_st_c_f16:
13756  case NVPTX::BI__hmma_m16n16k16_st_c_f32:
13757  case NVPTX::BI__hmma_m32n8k16_st_c_f16:
13758  case NVPTX::BI__hmma_m32n8k16_st_c_f32:
13759  case NVPTX::BI__hmma_m8n32k16_st_c_f16:
13760  case NVPTX::BI__hmma_m8n32k16_st_c_f32:
13761  case NVPTX::BI__imma_m16n16k16_st_c_i32:
13762  case NVPTX::BI__imma_m32n8k16_st_c_i32:
13763  case NVPTX::BI__imma_m8n32k16_st_c_i32:
13764  case NVPTX::BI__imma_m8n8k32_st_c_i32:
13765  case NVPTX::BI__bmma_m8n8k128_st_c_i32: {
13766  Value *Dst = EmitScalarExpr(E->getArg(0));
13767  Address Src = EmitPointerWithAlignment(E->getArg(1));
13768  Value *Ldm = EmitScalarExpr(E->getArg(2));
13769  llvm::APSInt isColMajorArg;
13770  if (!E->getArg(3)->isIntegerConstantExpr(isColMajorArg, getContext()))
13771  return nullptr;
13772  bool isColMajor = isColMajorArg.getSExtValue();
13773  NVPTXMmaLdstInfo II = getNVPTXMmaLdstInfo(BuiltinID);
13774  unsigned IID = isColMajor ? II.IID_col : II.IID_row;
13775  if (IID == 0)
13776  return nullptr;
13777  Function *Intrinsic =
13778  CGM.getIntrinsic(IID, Dst->getType());
13779  llvm::Type *ParamType = Intrinsic->getFunctionType()->getParamType(1);
13780  SmallVector<Value *, 10> Values = {Dst};
13781  for (unsigned i = 0; i < II.NumResults; ++i) {
13782  Value *V = Builder.CreateAlignedLoad(
13783  Builder.CreateGEP(Src.getPointer(), llvm::ConstantInt::get(IntTy, i)),
13785  Values.push_back(Builder.CreateBitCast(V, ParamType));
13786  }
13787  Values.push_back(Ldm);
13788  Value *Result = Builder.CreateCall(Intrinsic, Values);
13789  return Result;
13790  }
13791 
13792  // BI__hmma_m16n16k16_mma_<Dtype><CType>(d, a, b, c, layout, satf) -->
13793  // Intrinsic::nvvm_wmma_m16n16k16_mma_sync<layout A,B><DType><CType><Satf>
13794  case NVPTX::BI__hmma_m16n16k16_mma_f16f16:
13795  case NVPTX::BI__hmma_m16n16k16_mma_f32f16:
13796  case NVPTX::BI__hmma_m16n16k16_mma_f32f32:
13797  case NVPTX::BI__hmma_m16n16k16_mma_f16f32:
13798  case NVPTX::BI__hmma_m32n8k16_mma_f16f16:
13799  case NVPTX::BI__hmma_m32n8k16_mma_f32f16:
13800  case NVPTX::BI__hmma_m32n8k16_mma_f32f32:
13801  case NVPTX::BI__hmma_m32n8k16_mma_f16f32:
13802  case NVPTX::BI__hmma_m8n32k16_mma_f16f16:
13803  case NVPTX::BI__hmma_m8n32k16_mma_f32f16:
13804  case NVPTX::BI__hmma_m8n32k16_mma_f32f32:
13805  case NVPTX::BI__hmma_m8n32k16_mma_f16f32:
13806  case NVPTX::BI__imma_m16n16k16_mma_s8:
13807  case NVPTX::BI__imma_m16n16k16_mma_u8:
13808  case NVPTX::BI__imma_m32n8k16_mma_s8:
13809  case NVPTX::BI__imma_m32n8k16_mma_u8:
13810  case NVPTX::BI__imma_m8n32k16_mma_s8:
13811  case NVPTX::BI__imma_m8n32k16_mma_u8:
13812  case NVPTX::BI__imma_m8n8k32_mma_s4:
13813  case NVPTX::BI__imma_m8n8k32_mma_u4:
13814  case NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1: {
13815  Address Dst = EmitPointerWithAlignment(E->getArg(0));
13816  Address SrcA = EmitPointerWithAlignment(E->getArg(1));
13817  Address SrcB = EmitPointerWithAlignment(E->getArg(2));
13818  Address SrcC = EmitPointerWithAlignment(E->getArg(3));
13819  llvm::APSInt LayoutArg;
13820  if (!E->getArg(4)->isIntegerConstantExpr(LayoutArg, getContext()))
13821  return nullptr;
13822  int Layout = LayoutArg.getSExtValue();
13823  if (Layout < 0 || Layout > 3)
13824  return nullptr;
13825  llvm::APSInt SatfArg;
13826  if (BuiltinID == NVPTX::BI__bmma_m8n8k128_mma_xor_popc_b1)
13827  SatfArg = 0; // .b1 does not have satf argument.
13828  else if (!E->getArg(5)->isIntegerConstantExpr(SatfArg, getContext()))
13829  return nullptr;
13830  bool Satf = SatfArg.getSExtValue();
13831  NVPTXMmaInfo MI = getNVPTXMmaInfo(BuiltinID);
13832  unsigned IID = MI.getMMAIntrinsic(Layout, Satf);
13833  if (IID == 0) // Unsupported combination of Layout/Satf.
13834  return nullptr;
13835 
13836  SmallVector<Value *, 24> Values;
13837  Function *Intrinsic = CGM.getIntrinsic(IID);
13838  llvm::Type *AType = Intrinsic->getFunctionType()->getParamType(0);
13839  // Load A
13840  for (unsigned i = 0; i < MI.NumEltsA; ++i) {
13841  Value *V = Builder.CreateAlignedLoad(
13842  Builder.CreateGEP(SrcA.getPointer(),
13843  llvm::ConstantInt::get(IntTy, i)),
13845  Values.push_back(Builder.CreateBitCast(V, AType));
13846  }
13847  // Load B
13848  llvm::Type *BType = Intrinsic->getFunctionType()->getParamType(MI.NumEltsA);
13849  for (unsigned i = 0; i < MI.NumEltsB; ++i) {
13850  Value *V = Builder.CreateAlignedLoad(
13851  Builder.CreateGEP(SrcB.getPointer(),
13852  llvm::ConstantInt::get(IntTy, i)),
13854  Values.push_back(Builder.CreateBitCast(V, BType));
13855  }
13856  // Load C
13857  llvm::Type *CType =
13858  Intrinsic->getFunctionType()->getParamType(MI.NumEltsA + MI.NumEltsB);
13859  for (unsigned i = 0; i < MI.NumEltsC; ++i) {
13860  Value *V = Builder.CreateAlignedLoad(
13861  Builder.CreateGEP(SrcC.getPointer(),
13862  llvm::ConstantInt::get(IntTy, i)),
13864  Values.push_back(Builder.CreateBitCast(V, CType));
13865  }
13866  Value *Result = Builder.CreateCall(Intrinsic, Values);
13867  llvm::Type *DType = Dst.getElementType();
13868  for (unsigned i = 0; i < MI.NumEltsD; ++i)
13869  Builder.CreateAlignedStore(
13870  Builder.CreateBitCast(Builder.CreateExtractValue(Result, i), DType),
13871  Builder.CreateGEP(Dst.getPointer(), llvm::ConstantInt::get(IntTy, i)),
13873  return Result;
13874  }
13875  default:
13876  return nullptr;
13877  }
13878 }
13879 
13881  const CallExpr *E) {
13882  switch (BuiltinID) {
13883  case WebAssembly::BI__builtin_wasm_memory_size: {
13884  llvm::Type *ResultType = ConvertType(E->getType());
13885  Value *I = EmitScalarExpr(E->getArg(0));
13886  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_size, ResultType);
13887  return Builder.CreateCall(Callee, I);
13888  }
13889  case WebAssembly::BI__builtin_wasm_memory_grow: {
13890  llvm::Type *ResultType = ConvertType(E->getType());
13891  Value *Args[] = {
13892  EmitScalarExpr(E->getArg(0)),
13893  EmitScalarExpr(E->getArg(1))
13894  };
13895  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_grow, ResultType);
13896  return Builder.CreateCall(Callee, Args);
13897  }
13898  case WebAssembly::BI__builtin_wasm_memory_init: {
13899  llvm::APSInt SegConst;
13900  if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
13901  llvm_unreachable("Constant arg isn't actually constant?");
13902  llvm::APSInt MemConst;
13903  if (!E->getArg(1)->isIntegerConstantExpr(MemConst, getContext()))
13904  llvm_unreachable("Constant arg isn't actually constant?");
13905  if (!MemConst.isNullValue())
13906  ErrorUnsupported(E, "non-zero memory index");
13907  Value *Args[] = {llvm::ConstantInt::get(getLLVMContext(), SegConst),
13908  llvm::ConstantInt::get(getLLVMContext(), MemConst),
13909  EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)),
13910  EmitScalarExpr(E->getArg(4))};
13911  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_memory_init);
13912  return Builder.CreateCall(Callee, Args);
13913  }
13914  case WebAssembly::BI__builtin_wasm_data_drop: {
13915  llvm::APSInt SegConst;
13916  if (!E->getArg(0)->isIntegerConstantExpr(SegConst, getContext()))
13917  llvm_unreachable("Constant arg isn't actually constant?");
13918  Value *Arg = llvm::ConstantInt::get(getLLVMContext(), SegConst);
13919  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_data_drop);
13920  return Builder.CreateCall(Callee, {Arg});
13921  }
13922  case WebAssembly::BI__builtin_wasm_tls_size: {
13923  llvm::Type *ResultType = ConvertType(E->getType());
13924  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_tls_size, ResultType);
13925  return Builder.CreateCall(Callee);
13926  }
13927  case WebAssembly::BI__builtin_wasm_throw: {
13928  Value *Tag = EmitScalarExpr(E->getArg(0));
13929  Value *Obj = EmitScalarExpr(E->getArg(1));
13930  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
13931  return Builder.CreateCall(Callee, {Tag, Obj});
13932  }
13933  case WebAssembly::BI__builtin_wasm_rethrow_in_catch: {
13934  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow_in_catch);
13935  return Builder.CreateCall(Callee);
13936  }
13937  case WebAssembly::BI__builtin_wasm_atomic_wait_i32: {
13938  Value *Addr = EmitScalarExpr(E->getArg(0));
13939  Value *Expected = EmitScalarExpr(E->getArg(1));
13940  Value *Timeout = EmitScalarExpr(E->getArg(2));
13941  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i32);
13942  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13943  }
13944  case WebAssembly::BI__builtin_wasm_atomic_wait_i64: {
13945  Value *Addr = EmitScalarExpr(E->getArg(0));
13946  Value *Expected = EmitScalarExpr(E->getArg(1));
13947  Value *Timeout = EmitScalarExpr(E->getArg(2));
13948  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_wait_i64);
13949  return Builder.CreateCall(Callee, {Addr, Expected, Timeout});
13950  }
13951  case WebAssembly::BI__builtin_wasm_atomic_notify: {
13952  Value *Addr = EmitScalarExpr(E->getArg(0));
13953  Value *Count = EmitScalarExpr(E->getArg(1));
13954  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_atomic_notify);
13955  return Builder.CreateCall(Callee, {Addr, Count});
13956  }
13957  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f32:
13958  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32_f64:
13959  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f32:
13960  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64_f64:
13961  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i32x4_f32x4:
13962  case WebAssembly::BI__builtin_wasm_trunc_saturate_s_i64x2_f64x2: {
13963  Value *Src = EmitScalarExpr(E->getArg(0));
13964  llvm::Type *ResT = ConvertType(E->getType());
13965  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_signed,
13966  {ResT, Src->getType()});
13967  return Builder.CreateCall(Callee, {Src});
13968  }
13969  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f32:
13970  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32_f64:
13971  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f32:
13972  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64_f64:
13973  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i32x4_f32x4:
13974  case WebAssembly::BI__builtin_wasm_trunc_saturate_u_i64x2_f64x2: {
13975  Value *Src = EmitScalarExpr(E->getArg(0));
13976  llvm::Type *ResT = ConvertType(E->getType());
13977  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_trunc_saturate_unsigned,
13978  {ResT, Src->getType()});
13979  return Builder.CreateCall(Callee, {Src});
13980  }
13981  case WebAssembly::BI__builtin_wasm_min_f32:
13982  case WebAssembly::BI__builtin_wasm_min_f64:
13983  case WebAssembly::BI__builtin_wasm_min_f32x4:
13984  case WebAssembly::BI__builtin_wasm_min_f64x2: {
13985  Value *LHS = EmitScalarExpr(E->getArg(0));
13986  Value *RHS = EmitScalarExpr(E->getArg(1));
13987  Function *Callee = CGM.getIntrinsic(Intrinsic::minimum,
13988  ConvertType(E->getType()));
13989  return Builder.CreateCall(Callee, {LHS, RHS});
13990  }
13991  case WebAssembly::BI__builtin_wasm_max_f32:
13992  case WebAssembly::BI__builtin_wasm_max_f64:
13993  case WebAssembly::BI__builtin_wasm_max_f32x4:
13994  case WebAssembly::BI__builtin_wasm_max_f64x2: {
13995  Value *LHS = EmitScalarExpr(E->getArg(0));
13996  Value *RHS = EmitScalarExpr(E->getArg(1));
13997  Function *Callee = CGM.getIntrinsic(Intrinsic::maximum,
13998  ConvertType(E->getType()));
13999  return Builder.CreateCall(Callee, {LHS, RHS});
14000  }
14001  case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
14002  case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
14003  case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
14004  case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
14005  case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
14006  case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
14007  case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
14008  case WebAssembly::BI__builtin_wasm_extract_lane_f64x2: {
14009  llvm::APSInt LaneConst;
14010  if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
14011  llvm_unreachable("Constant arg isn't actually constant?");
14012  Value *Vec = EmitScalarExpr(E->getArg(0));
14013  Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
14014  Value *Extract = Builder.CreateExtractElement(Vec, Lane);
14015  switch (BuiltinID) {
14016  case WebAssembly::BI__builtin_wasm_extract_lane_s_i8x16:
14017  case WebAssembly::BI__builtin_wasm_extract_lane_s_i16x8:
14018  return Builder.CreateSExt(Extract, ConvertType(E->getType()));
14019  case WebAssembly::BI__builtin_wasm_extract_lane_u_i8x16:
14020  case WebAssembly::BI__builtin_wasm_extract_lane_u_i16x8:
14021  return Builder.CreateZExt(Extract, ConvertType(E->getType()));
14022  case WebAssembly::BI__builtin_wasm_extract_lane_i32x4:
14023  case WebAssembly::BI__builtin_wasm_extract_lane_i64x2:
14024  case WebAssembly::BI__builtin_wasm_extract_lane_f32x4:
14025  case WebAssembly::BI__builtin_wasm_extract_lane_f64x2:
14026  return Extract;
14027  default:
14028  llvm_unreachable("unexpected builtin ID");
14029  }
14030  }
14031  case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
14032  case WebAssembly::BI__builtin_wasm_replace_lane_i16x8:
14033  case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
14034  case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
14035  case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
14036  case WebAssembly::BI__builtin_wasm_replace_lane_f64x2: {
14037  llvm::APSInt LaneConst;
14038  if (!E->getArg(1)->isIntegerConstantExpr(LaneConst, getContext()))
14039  llvm_unreachable("Constant arg isn't actually constant?");
14040  Value *Vec = EmitScalarExpr(E->getArg(0));
14041  Value *Lane = llvm::ConstantInt::get(getLLVMContext(), LaneConst);
14042  Value *Val = EmitScalarExpr(E->getArg(2));
14043  switch (BuiltinID) {
14044  case WebAssembly::BI__builtin_wasm_replace_lane_i8x16:
14045  case WebAssembly::BI__builtin_wasm_replace_lane_i16x8: {
14046  llvm::Type *ElemType = ConvertType(E->getType())->getVectorElementType();
14047  Value *Trunc = Builder.CreateTrunc(Val, ElemType);
14048  return Builder.CreateInsertElement(Vec, Trunc, Lane);
14049  }
14050  case WebAssembly::BI__builtin_wasm_replace_lane_i32x4:
14051  case WebAssembly::BI__builtin_wasm_replace_lane_i64x2:
14052  case WebAssembly::BI__builtin_wasm_replace_lane_f32x4:
14053  case WebAssembly::BI__builtin_wasm_replace_lane_f64x2:
14054  return Builder.CreateInsertElement(Vec, Val, Lane);
14055  default:
14056  llvm_unreachable("unexpected builtin ID");
14057  }
14058  }
14059  case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
14060  case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
14061  case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
14062  case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
14063  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
14064  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
14065  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
14066  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8: {
14067  unsigned IntNo;
14068  switch (BuiltinID) {
14069  case WebAssembly::BI__builtin_wasm_add_saturate_s_i8x16:
14070  case WebAssembly::BI__builtin_wasm_add_saturate_s_i16x8:
14071  IntNo = Intrinsic::sadd_sat;
14072  break;
14073  case WebAssembly::BI__builtin_wasm_add_saturate_u_i8x16:
14074  case WebAssembly::BI__builtin_wasm_add_saturate_u_i16x8:
14075  IntNo = Intrinsic::uadd_sat;
14076  break;
14077  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i8x16:
14078  case WebAssembly::BI__builtin_wasm_sub_saturate_s_i16x8:
14079  IntNo = Intrinsic::wasm_sub_saturate_signed;
14080  break;
14081  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i8x16:
14082  case WebAssembly::BI__builtin_wasm_sub_saturate_u_i16x8:
14083  IntNo = Intrinsic::wasm_sub_saturate_unsigned;
14084  break;
14085  default:
14086  llvm_unreachable("unexpected builtin ID");
14087  }
14088  Value *LHS = EmitScalarExpr(E->getArg(0));
14089  Value *RHS = EmitScalarExpr(E->getArg(1));
14090  Function *Callee = CGM.getIntrinsic(IntNo, ConvertType(E->getType()));
14091  return Builder.CreateCall(Callee, {LHS, RHS});
14092  }
14093  case WebAssembly::BI__builtin_wasm_bitselect: {
14094  Value *V1 = EmitScalarExpr(E->getArg(0));
14095  Value *V2 = EmitScalarExpr(E->getArg(1));
14096  Value *C = EmitScalarExpr(E->getArg(2));
14097  Function *Callee = CGM.getIntrinsic(Intrinsic::wasm_bitselect,
14098  ConvertType(E->getType()));
14099  return Builder.CreateCall(Callee, {V1, V2, C});
14100  }
14101  case WebAssembly::BI__builtin_wasm_any_true_i8x16:
14102  case WebAssembly::BI__builtin_wasm_any_true_i16x8:
14103  case WebAssembly::BI__builtin_wasm_any_true_i32x4:
14104  case WebAssembly::BI__builtin_wasm_any_true_i64x2:
14105  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
14106  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
14107  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
14108  case WebAssembly::BI__builtin_wasm_all_true_i64x2: {
14109  unsigned IntNo;
14110  switch (BuiltinID) {
14111  case WebAssembly::BI__builtin_wasm_any_true_i8x16:
14112  case WebAssembly::BI__builtin_wasm_any_true_i16x8:
14113  case WebAssembly::BI__builtin_wasm_any_true_i32x4:
14114  case WebAssembly::BI__builtin_wasm_any_true_i64x2:
14115  IntNo = Intrinsic::wasm_anytrue;
14116  break;
14117  case WebAssembly::BI__builtin_wasm_all_true_i8x16:
14118  case WebAssembly::BI__builtin_wasm_all_true_i16x8:
14119  case WebAssembly::BI__builtin_wasm_all_true_i32x4:
14120  case WebAssembly::BI__builtin_wasm_all_true_i64x2:
14121  IntNo = Intrinsic::wasm_alltrue;
14122  break;
14123  default:
14124  llvm_unreachable("unexpected builtin ID");
14125  }
14126  Value *Vec = EmitScalarExpr(E->getArg(0));
14127  Function *Callee = CGM.getIntrinsic(IntNo, Vec->getType());
14128  return Builder.CreateCall(Callee, {Vec});
14129  }
14130  case WebAssembly::BI__builtin_wasm_abs_f32x4:
14131  case WebAssembly::BI__builtin_wasm_abs_f64x2: {
14132  Value *Vec = EmitScalarExpr(E->getArg(0));
14133  Function *Callee = CGM.getIntrinsic(Intrinsic::fabs, Vec->getType());
14134  return Builder.CreateCall(Callee, {Vec});
14135  }
14136  case WebAssembly::BI__builtin_wasm_sqrt_f32x4:
14137  case WebAssembly::BI__builtin_wasm_sqrt_f64x2: {
14138  Value *Vec = EmitScalarExpr(E->getArg(0));
14139  Function *Callee = CGM.getIntrinsic(Intrinsic::sqrt, Vec->getType());
14140  return Builder.CreateCall(Callee, {Vec});
14141  }
14142 
14143  default:
14144  return nullptr;
14145  }
14146 }
14147 
14149  const CallExpr *E) {
14151  Intrinsic::ID ID = Intrinsic::not_intrinsic;
14152 
14153  auto MakeCircLd = [&](unsigned IntID, bool HasImm) {
14154  // The base pointer is passed by address, so it needs to be loaded.
14155  Address BP = EmitPointerWithAlignment(E->getArg(0));
14156  BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
14157  BP.getAlignment());
14158  llvm::Value *Base = Builder.CreateLoad(BP);
14159  // Operands are Base, Increment, Modifier, Start.
14160  if (HasImm)
14161  Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
14162  EmitScalarExpr(E->getArg(3)) };
14163  else
14164  Ops = { Base, EmitScalarExpr(E->getArg(1)),
14165  EmitScalarExpr(E->getArg(2)) };
14166 
14167  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
14168  llvm::Value *NewBase = Builder.CreateExtractValue(Result, 1);
14169  llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
14170  NewBase->getType()->getPointerTo());
14171  Address Dest = EmitPointerWithAlignment(E->getArg(0));
14172  // The intrinsic generates two results. The new value for the base pointer
14173  // needs to be stored.
14174  Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
14175  return Builder.CreateExtractValue(Result, 0);
14176  };
14177 
14178  auto MakeCircSt = [&](unsigned IntID, bool HasImm) {
14179  // The base pointer is passed by address, so it needs to be loaded.
14180  Address BP = EmitPointerWithAlignment(E->getArg(0));
14181  BP = Address(Builder.CreateBitCast(BP.getPointer(), Int8PtrPtrTy),
14182  BP.getAlignment());
14183  llvm::Value *Base = Builder.CreateLoad(BP);
14184  // Operands are Base, Increment, Modifier, Value, Start.
14185  if (HasImm)
14186  Ops = { Base, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2)),
14187  EmitScalarExpr(E->getArg(3)), EmitScalarExpr(E->getArg(4)) };
14188  else
14189  Ops = { Base, EmitScalarExpr(E->getArg(1)),
14190  EmitScalarExpr(E->getArg(2)), EmitScalarExpr(E->getArg(3)) };
14191 
14192  llvm::Value *NewBase = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
14193  llvm::Value *LV = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)),
14194  NewBase->getType()->getPointerTo());
14195  Address Dest = EmitPointerWithAlignment(E->getArg(0));
14196  // The intrinsic generates one result, which is the new value for the base
14197  // pointer. It needs to be stored.
14198  return Builder.CreateAlignedStore(NewBase, LV, Dest.getAlignment());
14199  };
14200 
14201  // Handle the conversion of bit-reverse load intrinsics to bit code.
14202  // The intrinsic call after this function only reads from memory and the
14203  // write to memory is dealt by the store instruction.
14204  auto MakeBrevLd = [&](unsigned IntID, llvm::Type *DestTy) {
14205  // The intrinsic generates one result, which is the new value for the base
14206  // pointer. It needs to be returned. The result of the load instruction is
14207  // passed to intrinsic by address, so the value needs to be stored.
14208  llvm::Value *BaseAddress =
14209  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int8PtrTy);
14210 
14211  // Expressions like &(*pt++) will be incremented per evaluation.
14212  // EmitPointerWithAlignment and EmitScalarExpr evaluates the expression
14213  // per call.
14214  Address DestAddr = EmitPointerWithAlignment(E->getArg(1));
14215  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), Int8PtrTy),
14216  DestAddr.getAlignment());
14217  llvm::Value *DestAddress = DestAddr.getPointer();
14218 
14219  // Operands are Base, Dest, Modifier.
14220  // The intrinsic format in LLVM IR is defined as
14221  // { ValueType, i8* } (i8*, i32).
14222  Ops = {BaseAddress, EmitScalarExpr(E->getArg(2))};
14223 
14224  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(IntID), Ops);
14225  // The value needs to be stored as the variable is passed by reference.
14226  llvm::Value *DestVal = Builder.CreateExtractValue(Result, 0);
14227 
14228  // The store needs to be truncated to fit the destination type.
14229  // While i32 and i64 are natively supported on Hexagon, i8 and i16 needs
14230  // to be handled with stores of respective destination type.
14231  DestVal = Builder.CreateTrunc(DestVal, DestTy);
14232 
14233  llvm::Value *DestForStore =
14234  Builder.CreateBitCast(DestAddress, DestVal->getType()->getPointerTo());
14235  Builder.CreateAlignedStore(DestVal, DestForStore, DestAddr.getAlignment());
14236  // The updated value of the base pointer is returned.
14237  return Builder.CreateExtractValue(Result, 1);
14238  };
14239 
14240  switch (BuiltinID) {
14241  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry:
14242  case Hexagon::BI__builtin_HEXAGON_V6_vaddcarry_128B: {
14243  Address Dest = EmitPointerWithAlignment(E->getArg(2));
14244  unsigned Size;
14245  if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vaddcarry) {
14246  Size = 512;
14247  ID = Intrinsic::hexagon_V6_vaddcarry;
14248  } else {
14249  Size = 1024;
14250  ID = Intrinsic::hexagon_V6_vaddcarry_128B;
14251  }
14252  Dest = Builder.CreateBitCast(Dest,
14253  llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
14254  LoadInst *QLd = Builder.CreateLoad(Dest);
14255  Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
14256  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14257  llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
14258  llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
14259  Vprd->getType()->getPointerTo(0));
14260  Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
14261  return Builder.CreateExtractValue(Result, 0);
14262  }
14263  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry:
14264  case Hexagon::BI__builtin_HEXAGON_V6_vsubcarry_128B: {
14265  Address Dest = EmitPointerWithAlignment(E->getArg(2));
14266  unsigned Size;
14267  if (BuiltinID == Hexagon::BI__builtin_HEXAGON_V6_vsubcarry) {
14268  Size = 512;
14269  ID = Intrinsic::hexagon_V6_vsubcarry;
14270  } else {
14271  Size = 1024;
14272  ID = Intrinsic::hexagon_V6_vsubcarry_128B;
14273  }
14274  Dest = Builder.CreateBitCast(Dest,
14275  llvm::VectorType::get(Builder.getInt1Ty(), Size)->getPointerTo(0));
14276  LoadInst *QLd = Builder.CreateLoad(Dest);
14277  Ops = { EmitScalarExpr(E->getArg(0)), EmitScalarExpr(E->getArg(1)), QLd };
14278  llvm::Value *Result = Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
14279  llvm::Value *Vprd = Builder.CreateExtractValue(Result, 1);
14280  llvm::Value *Base = Builder.CreateBitCast(EmitScalarExpr(E->getArg(2)),
14281  Vprd->getType()->getPointerTo(0));
14282  Builder.CreateAlignedStore(Vprd, Base, Dest.getAlignment());
14283  return Builder.CreateExtractValue(Result, 0);
14284  }
14285  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pci:
14286  return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pci, /*HasImm*/true);
14287  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pci:
14288  return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pci, /*HasImm*/true);
14289  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pci:
14290  return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pci, /*HasImm*/true);
14291  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pci:
14292  return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pci, /*HasImm*/true);
14293  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pci:
14294  return MakeCircLd(Intrinsic::hexagon_L2_loadri_pci, /*HasImm*/true);
14295  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pci:
14296  return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pci, /*HasImm*/true);
14297  case Hexagon::BI__builtin_HEXAGON_L2_loadrub_pcr:
14298  return MakeCircLd(Intrinsic::hexagon_L2_loadrub_pcr, /*HasImm*/false);
14299  case Hexagon::BI__builtin_HEXAGON_L2_loadrb_pcr:
14300  return MakeCircLd(Intrinsic::hexagon_L2_loadrb_pcr, /*HasImm*/false);
14301  case Hexagon::BI__builtin_HEXAGON_L2_loadruh_pcr:
14302  return MakeCircLd(Intrinsic::hexagon_L2_loadruh_pcr, /*HasImm*/false);
14303  case Hexagon::BI__builtin_HEXAGON_L2_loadrh_pcr:
14304  return MakeCircLd(Intrinsic::hexagon_L2_loadrh_pcr, /*HasImm*/false);
14305  case Hexagon::BI__builtin_HEXAGON_L2_loadri_pcr:
14306  return MakeCircLd(Intrinsic::hexagon_L2_loadri_pcr, /*HasImm*/false);
14307  case Hexagon::BI__builtin_HEXAGON_L2_loadrd_pcr:
14308  return MakeCircLd(Intrinsic::hexagon_L2_loadrd_pcr, /*HasImm*/false);
14309  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pci:
14310  return MakeCircSt(Intrinsic::hexagon_S2_storerb_pci, /*HasImm*/true);
14311  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pci:
14312  return MakeCircSt(Intrinsic::hexagon_S2_storerh_pci, /*HasImm*/true);
14313  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pci:
14314  return MakeCircSt(Intrinsic::hexagon_S2_storerf_pci, /*HasImm*/true);
14315  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pci:
14316  return MakeCircSt(Intrinsic::hexagon_S2_storeri_pci, /*HasImm*/true);
14317  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pci:
14318  return MakeCircSt(Intrinsic::hexagon_S2_storerd_pci, /*HasImm*/true);
14319  case Hexagon::BI__builtin_HEXAGON_S2_storerb_pcr:
14320  return MakeCircSt(Intrinsic::hexagon_S2_storerb_pcr, /*HasImm*/false);
14321  case Hexagon::BI__builtin_HEXAGON_S2_storerh_pcr:
14322  return MakeCircSt(Intrinsic::hexagon_S2_storerh_pcr, /*HasImm*/false);
14323  case Hexagon::BI__builtin_HEXAGON_S2_storerf_pcr:
14324  return MakeCircSt(Intrinsic::hexagon_S2_storerf_pcr, /*HasImm*/false);
14325  case Hexagon::BI__builtin_HEXAGON_S2_storeri_pcr:
14326  return MakeCircSt(Intrinsic::hexagon_S2_storeri_pcr, /*HasImm*/false);
14327  case Hexagon::BI__builtin_HEXAGON_S2_storerd_pcr:
14328  return MakeCircSt(Intrinsic::hexagon_S2_storerd_pcr, /*HasImm*/false);
14329  case Hexagon::BI__builtin_brev_ldub:
14330  return MakeBrevLd(Intrinsic::hexagon_L2_loadrub_pbr, Int8Ty);
14331  case Hexagon::BI__builtin_brev_ldb:
14332  return MakeBrevLd(Intrinsic::hexagon_L2_loadrb_pbr, Int8Ty);
14333  case Hexagon::BI__builtin_brev_lduh:
14334  return MakeBrevLd(Intrinsic::hexagon_L2_loadruh_pbr, Int16Ty);
14335  case Hexagon::BI__builtin_brev_ldh:
14336  return MakeBrevLd(Intrinsic::hexagon_L2_loadrh_pbr, Int16Ty);
14337  case Hexagon::BI__builtin_brev_ldw:
14338  return MakeBrevLd(Intrinsic::hexagon_L2_loadri_pbr, Int32Ty);
14339  case Hexagon::BI__builtin_brev_ldd:
14340  return MakeBrevLd(Intrinsic::hexagon_L2_loadrd_pbr, Int64Ty);
14341  default:
14342  break;
14343  } // switch
14344 
14345  return nullptr;
14346 }
ReturnValueSlot - Contains the address where the return value of a function can be stored...
Definition: CGCall.h:363
Address CreateStructGEP(Address Addr, unsigned Index, const llvm::Twine &Name="")
Definition: CGBuilder.h:178
Defines the clang::ASTContext interface.
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:384
Represents a function declaration or definition.
Definition: Decl.h:1748
llvm::IntegerType * IntTy
int
static FunctionDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation StartLoc, SourceLocation NLoc, DeclarationName N, QualType T, TypeSourceInfo *TInfo, StorageClass SC, bool isInlineSpecified=false, bool hasWrittenPrototype=true, ConstexprSpecKind ConstexprKind=CSK_unspecified)
Definition: Decl.h:1895
Other implicit parameter.
Definition: Decl.h:1524
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
Definition: CGBuiltin.cpp:5788
static Value * EmitX86FMAExpr(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned BuiltinID, bool IsAddSub)
Definition: CGBuiltin.cpp:9600
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2569
CanQualType VoidPtrTy
Definition: ASTContext.h:1042
llvm::Constant * initializationPatternFor(CodeGenModule &, llvm::Type *)
Definition: PatternInit.cpp:14
static Value * EmitX86ExpandLoad(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9351
A (possibly-)qualified type.
Definition: Type.h:643
bool isBlockPointerType() const
Definition: Type.h:6392
#define fma(__x, __y, __z)
Definition: tgmath.h:742
bool isArrayType() const
Definition: Type.h:6440
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:494
unsigned char getSummaryByte() const
Definition: OSLog.h:139
const CodeGenOptions & getCodeGenOpts() const
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2673
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:361
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:506
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::LLVMContext & getLLVMContext()
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
bool isBigEndian() const
Definition: TargetInfo.h:1249
#define llround(__x)
Definition: tgmath.h:919
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2660
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:985
#define trunc(__x)
Definition: tgmath.h:1216
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:20
static Value * EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9590
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr, TBAAAccessInfo *TBAAInfo=nullptr)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:1037
constexpr XRayInstrMask Typed
Definition: XRayInstr.h:40
bool isRecordType() const
Definition: Type.h:6464
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
Definition: CGBuiltin.cpp:9306
static Value * EmitX86Ternlog(CodeGenFunction &CGF, bool ZeroMask, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9774
StringRef P
static void initializeAlloca(CodeGenFunction &CGF, AllocaInst *AI, Value *Size, unsigned AlignmentInBytes)
Definition: CGBuiltin.cpp:48
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:909
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size...
Definition: CGBuiltin.cpp:93
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool IsRead, StringRef SysReg="")
Definition: CGBuiltin.cpp:5885
The base class of the type hierarchy.
Definition: Type.h:1433
static Value * EmitX86Muldq(CodeGenFunction &CGF, bool IsSigned, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9745
#define log2(__x)
Definition: tgmath.h:970
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:4410
RangeSelector name(std::string ID)
Given a node with a "name", (like NamedDecl, DeclRefExpr or CxxCtorInitializer) selects the name&#39;s to...
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
static CanQualType getOSLogArgType(ASTContext &C, int Size)
Get the argument type for arguments to os_log_helper.
Definition: CGBuiltin.cpp:1111
RValue emitRotate(const CallExpr *E, bool IsRotateRight)
Definition: CGBuiltin.cpp:1515
virtual llvm::Value * getPipeElemAlign(const Expr *PipeArg)
llvm::Value * EmitCheckedArgForBuiltin(const Expr *E, BuiltinCheckKind Kind)
Emits an argument for a call to a builtin.
Definition: CGBuiltin.cpp:1090
Objects with "hidden" visibility are not seen by the dynamic linker.
Definition: Visibility.h:36
static bool isSpecialMixedSignMultiply(unsigned BuiltinID, WidthAndSignedness Op1Info, WidthAndSignedness Op2Info, WidthAndSignedness ResultInfo)
Determine if a binop is a checked mixed-sign multiply we can specialize.
Definition: CGBuiltin.cpp:1295
RAII object to set/unset CodeGenFunction::IsSanitizerScope.
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned Align)
Definition: CGBuiltin.cpp:9339
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6851
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:27
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
static int64_t clamp(int64_t Value, int64_t Low, int64_t High)
Definition: CGBuiltin.cpp:44
llvm::Value * getPointer() const
Definition: Address.h:37
Represents a parameter to a function.
Definition: Decl.h:1564
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:5948
long i
Definition: xmmintrin.h:1456
The collection of all-type qualifiers we support.
Definition: Type.h:137
void add(RValue rvalue, QualType type)
Definition: CGCall.h:287
PipeType - OpenCL20.
Definition: Type.h:6072
Represents a struct/union/class.
Definition: Decl.h:3626
void __ovld prefetch(const __global char *p, size_t num_elements)
Prefetch num_elements * sizeof(gentype) bytes into the global cache.
const TargetInfo & getTarget() const
static bool TypeRequiresBuiltinLaunder(CodeGenModule &CGM, QualType Ty)
Determine if the specified type requires laundering by checking if it is a dynamic class type or cont...
Definition: CGBuiltin.cpp:1508
One of these records is kept for each identifier that is lexed.
Address getAddress() const
Definition: CGValue.h:326
static Value * emitFPToIntRoundBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:395
#define pow(__x, __y)
Definition: tgmath.h:490
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:154
RecordDecl * getDefinition() const
Returns the RecordDecl that actually defines this struct/union/class.
Definition: Decl.h:3831
field_range fields() const
Definition: Decl.h:3841
llvm::Value * BuildVector(ArrayRef< llvm::Value *> Ops)
Definition: CGBuiltin.cpp:9280
SourceLocation getBeginLoc() const LLVM_READONLY
Definition: Stmt.cpp:263
Represents a member of a struct/union/class.
Definition: Decl.h:2607
static Value * EmitX86CompressStore(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9382
static Value * EmitX86FunnelShift(CodeGenFunction &CGF, Value *Op0, Value *Op1, Value *Amt, bool IsRight)
Definition: CGBuiltin.cpp:9413
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6906
static Value * EmitScalarFMAExpr(CodeGenFunction &CGF, MutableArrayRef< Value *> Ops, Value *Upper, bool ZeroMask=false, unsigned PTIdx=0, bool NegAcc=false)
Definition: CGBuiltin.cpp:9705
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:5083
llvm::Value * EmitHexagonBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
__DEVICE__ int max(int __a, int __b)
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value *> &Ops, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:6749
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Intrinsic::ID and the expression node, where the return value is the result of the operation.
Definition: CGBuiltin.cpp:176
bool isFloat() const
Definition: APValue.h:318
bool isIntegralOrEnumerationType() const
Determine whether this type is an integral or enumeration type.
Definition: Type.h:6747
static llvm::Value * EmitX86BitTestIntrinsic(CodeGenFunction &CGF, BitTest BT, const CallExpr *E, Value *BitBase, Value *BitPos)
Definition: CGBuiltin.cpp:695
IdentifierTable & Idents
Definition: ASTContext.h:569
static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:4696
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, AlignmentSource Source=AlignmentSource::Type, bool isInit=false, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
static Value * EmitX86AddSubSatExpr(CodeGenFunction &CGF, ArrayRef< Value *> Ops, bool IsSigned, bool IsAddition)
Definition: CGBuiltin.cpp:9810
static Value * EmitX86MaskLogic(CodeGenFunction &CGF, Instruction::BinaryOps Opc, ArrayRef< Value *> Ops, bool InvertLHS=false)
Definition: CGBuiltin.cpp:9399
static ApplyDebugLocation CreateArtificial(CodeGenFunction &CGF)
Apply TemporaryLocation if it is valid.
Definition: CGDebugInfo.h:742
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:5210
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, ArrayRef< Value *> Ops, unsigned Align)
Definition: CGBuiltin.cpp:9326
unsigned char getNumArgsByte() const
Definition: OSLog.h:148
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e...
Definition: Builtins.h:133
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:37
static Value * EmitAtomicDecrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:308
static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:4853
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:582
#define INTRINSIC_WITH_CC(NAME)
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:66
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:6212
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:5086
__DEVICE__ double powi(double __a, int __b)
static Value * EmitX86ScalarSelect(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:9484
Expr * IgnoreParenCasts() LLVM_READONLY
Skip past any parentheses and casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2947
bool isInt() const
Definition: APValue.h:317
#define MMA_VARIANTS_I4(geom, type)
#define sin(__x)
Definition: tgmath.h:286
bool isUnsigned() const
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:180
#define lrint(__x)
Definition: tgmath.h:1004
static Value * EmitX86CvtBF16ToFloatExpr(CodeGenFunction &CGF, const CallExpr *E, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9827
Expr * IgnoreImpCasts() LLVM_READONLY
Skip past any implicit casts which might surround this expression until reaching a fixed point...
Definition: Expr.cpp:2926
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, AlignmentSource Source=AlignmentSource::Type, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
static ImplicitParamDecl * Create(ASTContext &C, DeclContext *DC, SourceLocation IdLoc, IdentifierInfo *Id, QualType T, ImplicitParamKind ParamKind)
Create implicit parameter.
Definition: Decl.cpp:4561
bool hasAttr() const
Definition: DeclBase.h:542
llvm::Type * HalfTy
float, double
static CharUnits One()
One - Construct a CharUnits quantity of one.
Definition: CharUnits.h:57
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
CXXRecordDecl * getAsCXXRecordDecl() const
Retrieves the CXXRecordDecl that this type refers to, either because the type is a RecordType or beca...
Definition: Type.cpp:1636
Represents a prototype with parameter type info, e.g.
Definition: Type.h:3719
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:4407
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:38
RValue EmitBuiltinExpr(const GlobalDecl GD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
Definition: CGBuiltin.cpp:1530
const char * getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:85
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:178
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9522
unsigned Offset
Definition: Format.cpp:1713
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:3319
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm.va_end.
Definition: CGBuiltin.cpp:530
Exposes information about the current target.
Definition: TargetInfo.h:161
#define copysign(__x, __y)
Definition: tgmath.h:618
This represents one expression.
Definition: Expr.h:108
SourceLocation End
static Address invalid()
Definition: Address.h:34
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:445
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6916
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:133
#define V(N, I)
Definition: ASTContext.h:2907
Expr * getCallee()
Definition: Expr.h:2634
static uint64_t GetX86CpuSupportsMask(ArrayRef< StringRef > FeatureStrs)
Definition: CGBuiltin.cpp:9890
#define INTRINSIC_X86_XSAVE_ID(NAME)
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:9471
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation...
Definition: CGExpr.cpp:1690
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:351
SourceLocation Begin
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:62
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return null.
Definition: Expr.h:2652
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:104
static SVal getValue(SVal val, SValBuilder &svalBuilder)
int Depth
Definition: ASTDiff.cpp:190
llvm::LLVMContext & getLLVMContext()
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1875
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:475
QualType getType() const
Definition: Expr.h:137
static const NeonIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:4419
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p, To) is correct.
Definition: CGBuiltin.cpp:542
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:296
QualType getFunctionType(QualType ResultTy, ArrayRef< QualType > Args, const FunctionProtoType::ExtProtoInfo &EPI) const
Return a normal function type with a typed argument list.
Definition: ASTContext.h:1382
RValue emitBuiltinOSLogFormat(const CallExpr &E)
Emit IR for __builtin_os_log_format.
Definition: CGBuiltin.cpp:1222
static const NeonIntrinsicInfo * findNeonIntrinsicInMap(ArrayRef< NeonIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:5090
ASTContext & getContext() const
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:159
virtual llvm::Value * getPipeElemSize(const Expr *PipeArg)
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:9972
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:40
static Value * EmitAtomicIncrementValue(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Definition: CGBuiltin.cpp:295
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:460
EltType getEltType() const
#define log(__x)
Definition: tgmath.h:460
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx, bool InConstantContext=false) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
Enumerates target-specific builtins in their own namespaces within namespace clang.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:141
MSVCIntrin
Definition: CGBuiltin.cpp:871
Kind
FunctionDecl * getAsFunction() LLVM_READONLY
Returns the function itself, or the templated function if this is a function template.
Definition: DeclBase.cpp:217
NullPointerConstantKind isNullPointerConstant(ASTContext &Ctx, NullPointerConstantValueDependence NPC) const
isNullPointerConstant - C99 6.3.2.3p3 - Test if this reduces down to a Null pointer constant...
Definition: Expr.cpp:3658
Encodes a location in the source.
static RValue getIgnored()
Definition: CGValue.h:80
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
LangAS getAddressSpace() const
Return the address space of this type.
Definition: Type.h:6258
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation...
Definition: CGExpr.cpp:1704
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::CallBase **callOrInvoke, SourceLocation Loc)
EmitCall - Generate a call of the given function, expecting the given result type, and using the given argument list which specifies both the LLVM arguments and the types they were derived from.
Definition: CGCall.cpp:3776
static Value * EmitX86MaskedCompareResult(CodeGenFunction &CGF, Value *Cmp, unsigned NumElts, Value *MaskIn)
Definition: CGBuiltin.cpp:9499
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:4295
#define rint(__x)
Definition: tgmath.h:1131
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:145
const Decl * getDecl() const
Definition: GlobalDecl.h:76
APFloat & getFloat()
Definition: APValue.h:350
QualType getBaseElementType(const ArrayType *VAT) const
Return the innermost element type of an array type.
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:225
#define round(__x)
Definition: tgmath.h:1148
#define exp2(__x)
Definition: tgmath.h:670
static llvm::AtomicOrdering getBitTestAtomicOrdering(BitTest::InterlockingKind I)
Definition: CGBuiltin.cpp:727
OpenMPLinearClauseKind Modifier
Modifier of &#39;linear&#39; clause.
Definition: OpenMPClause.h:101
#define MMA_VARIANTS(geom, type)
const ParmVarDecl * getParamDecl(unsigned i) const
Definition: Decl.h:2312
#define cos(__x)
Definition: tgmath.h:257
constexpr XRayInstrMask Custom
Definition: XRayInstr.h:39
const ArrayType * getAsArrayType(QualType T) const
Type Query functions.
Represents a canonical, potentially-qualified type.
Definition: CanonicalType.h:65
Specifies that a value-dependent expression should be considered to never be a null pointer constant...
Definition: Expr.h:737
CanQualType VoidTy
Definition: ASTContext.h:1014
llvm::Constant * emitAbstract(const Expr *E, QualType T)
Emit the result of the given expression as an abstract constant, asserting that it succeeded...
llvm::Function * generateBuiltinOSLogHelperFunction(const analyze_os_log::OSLogBufferLayout &Layout, CharUnits BufferAlignment)
Definition: CGBuiltin.cpp:1116
arg_range arguments()
Definition: Expr.h:2710
bool isObjCObjectPointerType() const
Definition: Type.h:6488
An aligned address.
Definition: Address.h:24
static RValue EmitMSVCRTSetJmp(CodeGenFunction &CGF, MSVCSetJmpKind SJKind, const CallExpr *E)
MSVC handles setjmp a bit differently on different platforms.
Definition: CGBuiltin.cpp:831
All available information about a concrete callee.
Definition: CGCall.h:66
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
Definition: CGBuiltin.cpp:9802
static Value * EmitISOVolatileStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:335
#define MMA_LDST(n, geom_op_type)
static Value * EmitAtomicCmpXchgForMSIntrin(CodeGenFunction &CGF, const CallExpr *E, AtomicOrdering SuccessOrdering=AtomicOrdering::SequentiallyConsistent)
This function should be invoked to emit atomic cmpxchg for Microsoft&#39;s _InterlockedCompareExchange* i...
Definition: CGBuiltin.cpp:269
char __ovld __cnfn rotate(char v, char i)
For each element in v, the bits are shifted left by the number of bits given by the corresponding ele...
#define exp(__x)
Definition: tgmath.h:431
#define lround(__x)
Definition: tgmath.h:1021
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2320
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:215
__v16qu tmp
Definition: emmintrin.h:2133
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to .fabs().
Definition: CGBuiltin.cpp:407
Like Angled, but marks system directories.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
FunctionArgList - Type for representing both the decl and type of parameters to a function...
Definition: CGCall.h:358
ast_type_traits::DynTypedNode Node
#define log10(__x)
Definition: tgmath.h:936
CGFunctionInfo - Class to encapsulate the information about a function definition.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: CGBuiltin.cpp:4337
This class organizes the cross-function state that is used while generating LLVM code.
static bool TypeRequiresBuiltinLaunderImp(const ASTContext &Ctx, QualType Ty, llvm::SmallPtrSetImpl< const Decl *> &Seen)
Definition: CGBuiltin.cpp:1480
Dataflow Directional Tag Classes.
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:4414
bool hasSideEffects() const
Definition: Expr.h:574
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:92
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:580
MSVCSetJmpKind
Definition: CGBuiltin.cpp:821
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
llvm::LoadInst * CreateAlignedLoad(llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:90
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value *> &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: CGBuiltin.cpp:4316
void EmitARCIntrinsicUse(ArrayRef< llvm::Value *> values)
Given a number of pointers, inform the optimizer that they&#39;re being intrinsically used up until this ...
Definition: CGObjC.cpp:1953
#define ceil(__x)
Definition: tgmath.h:601
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:69
static RValue EmitCheckedMixedSignMultiply(CodeGenFunction &CGF, const clang::Expr *Op1, WidthAndSignedness Op1Info, const clang::Expr *Op2, WidthAndSignedness Op2Info, const clang::Expr *ResultArg, QualType ResultQTy, WidthAndSignedness ResultInfo)
Emit a checked mixed-sign multiply.
Definition: CGBuiltin.cpp:1307
static std::string getAsString(SplitQualType split, const PrintingPolicy &Policy)
Definition: Type.h:976
bool isBooleanType() const
Definition: Type.h:6760
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:4210
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type *> Tys=None)
bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return true if this expression is a valid integer constant expression...
#define floor(__x)
Definition: tgmath.h:722
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:107
Flags to identify the types for overloaded Neon builtins.
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1490
Expr * IgnoreParenImpCasts() LLVM_READONLY
Skip past any parentheses and implicit casts which might surround this expression until reaching a fi...
Definition: Expr.cpp:2942
#define MMA_INTR(geom_op_type, layout)
SmallVector< OSLogBufferItem, 4 > Items
Definition: OSLog.h:113
#define llrint(__x)
Definition: tgmath.h:902
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx)
Definition: CGBuiltin.cpp:4310
#define X86_VENDOR(ENUM, STRING)
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of structs/unions/cl...
Definition: Type.h:4438
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: CGBuiltin.cpp:6897
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:550
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:6677
T * getAttr() const
Definition: DeclBase.h:538
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:51
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: CGBuiltin.cpp:4330
#define nearbyint(__x)
Definition: tgmath.h:1038
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value *> Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:5809
static llvm::Value * EmitBitTestIntrinsic(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E)
Emit a _bittest* intrinsic.
Definition: CGBuiltin.cpp:742
llvm::FunctionCallee CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create or return a runtime function declaration with the specified type and name. ...
void setNontemporal(bool Value)
Definition: CGValue.h:291
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:5982
#define MMA_VARIANTS_B1(geom, type)
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:2079
static llvm::Value * dumpRecord(CodeGenFunction &CGF, QualType RType, Value *&RecordPtr, CharUnits Align, llvm::FunctionCallee Func, int Lvl)
Definition: CGBuiltin.cpp:1394
CanQualType getCanonicalType(QualType T) const
Return the canonical (structural) type corresponding to the specified potentially non-canonical type ...
Definition: ASTContext.h:2280
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:14445
static Value * EmitX86Abs(CodeGenFunction &CGF, ArrayRef< Value *> Ops)
Definition: CGBuiltin.cpp:9580
bool EvaluateAsInt(EvalResult &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects, bool InConstantContext=false) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
TranslationUnitDecl * getTranslationUnitDecl() const
Definition: ASTContext.h:1007
bool isVoidType() const
Definition: Type.h:6643
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: CGBuiltin.cpp:5108
llvm::Type * ConvertType(QualType T)
static Value * EmitX86vpcom(CodeGenFunction &CGF, ArrayRef< Value *> Ops, bool IsSigned)
Definition: CGBuiltin.cpp:9431
BuiltinCheckKind
Specifies which type of sanitizer check to apply when handling a particular builtin.
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const NeonIntrinsicInfo &SISDInfo, SmallVectorImpl< Value *> &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:5147
#define sqrt(__x)
Definition: tgmath.h:520
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:571
static Value * EmitX86CompressExpand(CodeGenFunction &CGF, ArrayRef< Value *> Ops, bool IsCompress)
Definition: CGBuiltin.cpp:9368
__DEVICE__ int min(int __a, int __b)
static char bitActionToX86BTCode(BitTest::ActionKind A)
Definition: CGBuiltin.cpp:685
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:69
#define fabs(__x)
Definition: tgmath.h:549
Defines the clang::TargetInfo interface.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2516
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:372
StringRef getName() const
Get the name of identifier for this declaration as a StringRef.
Definition: Decl.h:275
uint64_t Width
Definition: ASTContext.h:143
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:416
bool isReadOnly() const
Definition: Type.h:6105
static RValue get(llvm::Value *V)
Definition: CGValue.h:85
static Value * EmitX86ConvertIntToFp(CodeGenFunction &CGF, ArrayRef< Value *> Ops, bool IsSigned)
Definition: CGBuiltin.cpp:9561
bool isUnion() const
Definition: Decl.h:3285
bool isPointerType() const
Definition: Type.h:6384
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
static ApplyDebugLocation CreateEmpty(CodeGenFunction &CGF)
Set the IRBuilder to not attach debug locations.
Definition: CGDebugInfo.h:759
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:114
QualType getType() const
Definition: Decl.h:647
bool isFloatingType() const
Definition: Type.cpp:1952
LValue - This represents an lvalue references.
Definition: CGValue.h:166
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:146
static Value * EmitISOVolatileLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:322
APSInt & getInt()
Definition: APValue.h:336
const LangOptions & getLangOpts() const
static Value * EmitX86ConvertToMask(CodeGenFunction &CGF, Value *In)
Definition: CGBuiltin.cpp:9556
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, AtomicOrdering Ordering=AtomicOrdering::SequentiallyConsistent)
Utility to insert an atomic instruction based on Intrinsic::ID and the expression node...
Definition: CGBuiltin.cpp:117
llvm::CallBase * EmitRuntimeCallOrInvoke(llvm::FunctionCallee callee, ArrayRef< llvm::Value *> args, const Twine &name="")
Emits a call or invoke instruction to the given runtime function.
Definition: CGCall.cpp:3736
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:262
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:167
#define X86_FEATURE_COMPAT(VAL, ENUM, STR)
llvm::Value * EmitTargetBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
EmitTargetBuiltinExpr - Emit the given builtin call.
Definition: CGBuiltin.cpp:4247
static llvm::VectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool HasLegalHalfType=true, bool V1Ty=false)
Definition: CGBuiltin.cpp:4260
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:5085
QualType getIntTypeForBitwidth(unsigned DestWidth, unsigned Signed) const
getIntTypeForBitwidth - sets integer QualTy according to specified details: bitwidth, signed/unsigned.
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
static OMPLinearClause * Create(const ASTContext &C, SourceLocation StartLoc, SourceLocation LParenLoc, OpenMPLinearClauseKind Modifier, SourceLocation ModifierLoc, SourceLocation ColonLoc, SourceLocation EndLoc, ArrayRef< Expr *> VL, ArrayRef< Expr *> PL, ArrayRef< Expr *> IL, Expr *Step, Expr *CalcStep, Stmt *PreInit, Expr *PostUpdate)
Creates clause with a list of variables VL and a linear step Step.
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.