clang  5.0.0
CGBuiltin.cpp
Go to the documentation of this file.
1 //===---- CGBuiltin.cpp - Emit LLVM Code for builtins ---------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This contains code to emit Builtin calls as LLVM code.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "CGCXXABI.h"
15 #include "CGObjCRuntime.h"
16 #include "CGOpenCLRuntime.h"
17 #include "CodeGenFunction.h"
18 #include "CodeGenModule.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/ASTContext.h"
21 #include "clang/AST/Decl.h"
24 #include "clang/Basic/TargetInfo.h"
26 #include "llvm/ADT/StringExtras.h"
27 #include "llvm/IR/CallSite.h"
28 #include "llvm/IR/DataLayout.h"
29 #include "llvm/IR/InlineAsm.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/MDBuilder.h"
32 #include <sstream>
33 
34 using namespace clang;
35 using namespace CodeGen;
36 using namespace llvm;
37 
38 static
39 int64_t clamp(int64_t Value, int64_t Low, int64_t High) {
40  return std::min(High, std::max(Low, Value));
41 }
42 
43 /// getBuiltinLibFunction - Given a builtin id for a function like
44 /// "__builtin_fabsf", return a Function* for "fabsf".
46  unsigned BuiltinID) {
47  assert(Context.BuiltinInfo.isLibFunction(BuiltinID));
48 
49  // Get the name, skip over the __builtin_ prefix (if necessary).
50  StringRef Name;
51  GlobalDecl D(FD);
52 
53  // If the builtin has been declared explicitly with an assembler label,
54  // use the mangled name. This differs from the plain label on platforms
55  // that prefix labels.
56  if (FD->hasAttr<AsmLabelAttr>())
57  Name = getMangledName(D);
58  else
59  Name = Context.BuiltinInfo.getName(BuiltinID) + 10;
60 
61  llvm::FunctionType *Ty =
62  cast<llvm::FunctionType>(getTypes().ConvertType(FD->getType()));
63 
64  return GetOrCreateLLVMFunction(Name, Ty, D, /*ForVTable=*/false);
65 }
66 
67 /// Emit the conversions required to turn the given value into an
68 /// integer of the given size.
70  QualType T, llvm::IntegerType *IntType) {
71  V = CGF.EmitToMemory(V, T);
72 
73  if (V->getType()->isPointerTy())
74  return CGF.Builder.CreatePtrToInt(V, IntType);
75 
76  assert(V->getType() == IntType);
77  return V;
78 }
79 
81  QualType T, llvm::Type *ResultType) {
82  V = CGF.EmitFromMemory(V, T);
83 
84  if (ResultType->isPointerTy())
85  return CGF.Builder.CreateIntToPtr(V, ResultType);
86 
87  assert(V->getType() == ResultType);
88  return V;
89 }
90 
91 /// Utility to insert an atomic instruction based on Instrinsic::ID
92 /// and the expression node.
94  llvm::AtomicRMWInst::BinOp Kind,
95  const CallExpr *E) {
96  QualType T = E->getType();
97  assert(E->getArg(0)->getType()->isPointerType());
98  assert(CGF.getContext().hasSameUnqualifiedType(T,
99  E->getArg(0)->getType()->getPointeeType()));
100  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
101 
102  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
103  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
104 
105  llvm::IntegerType *IntType =
106  llvm::IntegerType::get(CGF.getLLVMContext(),
107  CGF.getContext().getTypeSize(T));
108  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
109 
110  llvm::Value *Args[2];
111  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
112  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
113  llvm::Type *ValueType = Args[1]->getType();
114  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
115 
116  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
117  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
118  return EmitFromInt(CGF, Result, T, ValueType);
119 }
120 
122  Value *Val = CGF.EmitScalarExpr(E->getArg(0));
123  Value *Address = CGF.EmitScalarExpr(E->getArg(1));
124 
125  // Convert the type of the pointer to a pointer to the stored type.
126  Val = CGF.EmitToMemory(Val, E->getArg(0)->getType());
127  Value *BC = CGF.Builder.CreateBitCast(
128  Address, llvm::PointerType::getUnqual(Val->getType()), "cast");
129  LValue LV = CGF.MakeNaturalAlignAddrLValue(BC, E->getArg(0)->getType());
130  LV.setNontemporal(true);
131  CGF.EmitStoreOfScalar(Val, LV, false);
132  return nullptr;
133 }
134 
136  Value *Address = CGF.EmitScalarExpr(E->getArg(0));
137 
138  LValue LV = CGF.MakeNaturalAlignAddrLValue(Address, E->getType());
139  LV.setNontemporal(true);
140  return CGF.EmitLoadOfScalar(LV, E->getExprLoc());
141 }
142 
144  llvm::AtomicRMWInst::BinOp Kind,
145  const CallExpr *E) {
146  return RValue::get(MakeBinaryAtomicValue(CGF, Kind, E));
147 }
148 
149 /// Utility to insert an atomic instruction based Instrinsic::ID and
150 /// the expression node, where the return value is the result of the
151 /// operation.
153  llvm::AtomicRMWInst::BinOp Kind,
154  const CallExpr *E,
155  Instruction::BinaryOps Op,
156  bool Invert = false) {
157  QualType T = E->getType();
158  assert(E->getArg(0)->getType()->isPointerType());
159  assert(CGF.getContext().hasSameUnqualifiedType(T,
160  E->getArg(0)->getType()->getPointeeType()));
161  assert(CGF.getContext().hasSameUnqualifiedType(T, E->getArg(1)->getType()));
162 
163  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
164  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
165 
166  llvm::IntegerType *IntType =
167  llvm::IntegerType::get(CGF.getLLVMContext(),
168  CGF.getContext().getTypeSize(T));
169  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
170 
171  llvm::Value *Args[2];
172  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
173  llvm::Type *ValueType = Args[1]->getType();
174  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
175  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
176 
177  llvm::Value *Result = CGF.Builder.CreateAtomicRMW(
178  Kind, Args[0], Args[1], llvm::AtomicOrdering::SequentiallyConsistent);
179  Result = CGF.Builder.CreateBinOp(Op, Result, Args[1]);
180  if (Invert)
181  Result = CGF.Builder.CreateBinOp(llvm::Instruction::Xor, Result,
182  llvm::ConstantInt::get(IntType, -1));
183  Result = EmitFromInt(CGF, Result, T, ValueType);
184  return RValue::get(Result);
185 }
186 
187 /// @brief Utility to insert an atomic cmpxchg instruction.
188 ///
189 /// @param CGF The current codegen function.
190 /// @param E Builtin call expression to convert to cmpxchg.
191 /// arg0 - address to operate on
192 /// arg1 - value to compare with
193 /// arg2 - new value
194 /// @param ReturnBool Specifies whether to return success flag of
195 /// cmpxchg result or the old value.
196 ///
197 /// @returns result of cmpxchg, according to ReturnBool
199  bool ReturnBool) {
200  QualType T = ReturnBool ? E->getArg(1)->getType() : E->getType();
201  llvm::Value *DestPtr = CGF.EmitScalarExpr(E->getArg(0));
202  unsigned AddrSpace = DestPtr->getType()->getPointerAddressSpace();
203 
204  llvm::IntegerType *IntType = llvm::IntegerType::get(
205  CGF.getLLVMContext(), CGF.getContext().getTypeSize(T));
206  llvm::Type *IntPtrType = IntType->getPointerTo(AddrSpace);
207 
208  Value *Args[3];
209  Args[0] = CGF.Builder.CreateBitCast(DestPtr, IntPtrType);
210  Args[1] = CGF.EmitScalarExpr(E->getArg(1));
211  llvm::Type *ValueType = Args[1]->getType();
212  Args[1] = EmitToInt(CGF, Args[1], T, IntType);
213  Args[2] = EmitToInt(CGF, CGF.EmitScalarExpr(E->getArg(2)), T, IntType);
214 
215  Value *Pair = CGF.Builder.CreateAtomicCmpXchg(
216  Args[0], Args[1], Args[2], llvm::AtomicOrdering::SequentiallyConsistent,
217  llvm::AtomicOrdering::SequentiallyConsistent);
218  if (ReturnBool)
219  // Extract boolean success flag and zext it to int.
220  return CGF.Builder.CreateZExt(CGF.Builder.CreateExtractValue(Pair, 1),
221  CGF.ConvertType(E->getType()));
222  else
223  // Extract old value and emit it using the same type as compare value.
224  return EmitFromInt(CGF, CGF.Builder.CreateExtractValue(Pair, 0), T,
225  ValueType);
226 }
227 
228 // Emit a simple mangled intrinsic that has 1 argument and a return type
229 // matching the argument type.
231  const CallExpr *E,
232  unsigned IntrinsicID) {
233  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
234 
235  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
236  return CGF.Builder.CreateCall(F, Src0);
237 }
238 
239 // Emit an intrinsic that has 2 operands of the same type as its result.
241  const CallExpr *E,
242  unsigned IntrinsicID) {
243  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
244  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
245 
246  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
247  return CGF.Builder.CreateCall(F, { Src0, Src1 });
248 }
249 
250 // Emit an intrinsic that has 3 operands of the same type as its result.
252  const CallExpr *E,
253  unsigned IntrinsicID) {
254  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
255  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
256  llvm::Value *Src2 = CGF.EmitScalarExpr(E->getArg(2));
257 
258  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
259  return CGF.Builder.CreateCall(F, { Src0, Src1, Src2 });
260 }
261 
262 // Emit an intrinsic that has 1 float or double operand, and 1 integer.
264  const CallExpr *E,
265  unsigned IntrinsicID) {
266  llvm::Value *Src0 = CGF.EmitScalarExpr(E->getArg(0));
267  llvm::Value *Src1 = CGF.EmitScalarExpr(E->getArg(1));
268 
269  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, Src0->getType());
270  return CGF.Builder.CreateCall(F, {Src0, Src1});
271 }
272 
273 /// EmitFAbs - Emit a call to @llvm.fabs().
274 static Value *EmitFAbs(CodeGenFunction &CGF, Value *V) {
275  Value *F = CGF.CGM.getIntrinsic(Intrinsic::fabs, V->getType());
276  llvm::CallInst *Call = CGF.Builder.CreateCall(F, V);
277  Call->setDoesNotAccessMemory();
278  return Call;
279 }
280 
281 /// Emit the computation of the sign bit for a floating point value. Returns
282 /// the i1 sign bit value.
284  LLVMContext &C = CGF.CGM.getLLVMContext();
285 
286  llvm::Type *Ty = V->getType();
287  int Width = Ty->getPrimitiveSizeInBits();
288  llvm::Type *IntTy = llvm::IntegerType::get(C, Width);
289  V = CGF.Builder.CreateBitCast(V, IntTy);
290  if (Ty->isPPC_FP128Ty()) {
291  // We want the sign bit of the higher-order double. The bitcast we just
292  // did works as if the double-double was stored to memory and then
293  // read as an i128. The "store" will put the higher-order double in the
294  // lower address in both little- and big-Endian modes, but the "load"
295  // will treat those bits as a different part of the i128: the low bits in
296  // little-Endian, the high bits in big-Endian. Therefore, on big-Endian
297  // we need to shift the high bits down to the low before truncating.
298  Width >>= 1;
299  if (CGF.getTarget().isBigEndian()) {
300  Value *ShiftCst = llvm::ConstantInt::get(IntTy, Width);
301  V = CGF.Builder.CreateLShr(V, ShiftCst);
302  }
303  // We are truncating value in order to extract the higher-order
304  // double, which we will be using to extract the sign from.
305  IntTy = llvm::IntegerType::get(C, Width);
306  V = CGF.Builder.CreateTrunc(V, IntTy);
307  }
308  Value *Zero = llvm::Constant::getNullValue(IntTy);
309  return CGF.Builder.CreateICmpSLT(V, Zero);
310 }
311 
313  const CallExpr *E, llvm::Constant *calleeValue) {
314  CGCallee callee = CGCallee::forDirect(calleeValue, FD);
315  return CGF.EmitCall(E->getCallee()->getType(), callee, E, ReturnValueSlot());
316 }
317 
318 /// \brief Emit a call to llvm.{sadd,uadd,ssub,usub,smul,umul}.with.overflow.*
319 /// depending on IntrinsicID.
320 ///
321 /// \arg CGF The current codegen function.
322 /// \arg IntrinsicID The ID for the Intrinsic we wish to generate.
323 /// \arg X The first argument to the llvm.*.with.overflow.*.
324 /// \arg Y The second argument to the llvm.*.with.overflow.*.
325 /// \arg Carry The carry returned by the llvm.*.with.overflow.*.
326 /// \returns The result (i.e. sum/product) returned by the intrinsic.
328  const llvm::Intrinsic::ID IntrinsicID,
330  llvm::Value *&Carry) {
331  // Make sure we have integers of the same width.
332  assert(X->getType() == Y->getType() &&
333  "Arguments must be the same type. (Did you forget to make sure both "
334  "arguments have the same integer width?)");
335 
336  llvm::Value *Callee = CGF.CGM.getIntrinsic(IntrinsicID, X->getType());
337  llvm::Value *Tmp = CGF.Builder.CreateCall(Callee, {X, Y});
338  Carry = CGF.Builder.CreateExtractValue(Tmp, 1);
339  return CGF.Builder.CreateExtractValue(Tmp, 0);
340 }
341 
343  unsigned IntrinsicID,
344  int low, int high) {
345  llvm::MDBuilder MDHelper(CGF.getLLVMContext());
346  llvm::MDNode *RNode = MDHelper.createRange(APInt(32, low), APInt(32, high));
347  Value *F = CGF.CGM.getIntrinsic(IntrinsicID, {});
348  llvm::Instruction *Call = CGF.Builder.CreateCall(F);
349  Call->setMetadata(llvm::LLVMContext::MD_range, RNode);
350  return Call;
351 }
352 
353 namespace {
354  struct WidthAndSignedness {
355  unsigned Width;
356  bool Signed;
357  };
358 }
359 
360 static WidthAndSignedness
362  const clang::QualType Type) {
363  assert(Type->isIntegerType() && "Given type is not an integer.");
364  unsigned Width = Type->isBooleanType() ? 1 : context.getTypeInfo(Type).Width;
365  bool Signed = Type->isSignedIntegerType();
366  return {Width, Signed};
367 }
368 
369 // Given one or more integer types, this function produces an integer type that
370 // encompasses them: any value in one of the given types could be expressed in
371 // the encompassing type.
372 static struct WidthAndSignedness
373 EncompassingIntegerType(ArrayRef<struct WidthAndSignedness> Types) {
374  assert(Types.size() > 0 && "Empty list of types.");
375 
376  // If any of the given types is signed, we must return a signed type.
377  bool Signed = false;
378  for (const auto &Type : Types) {
379  Signed |= Type.Signed;
380  }
381 
382  // The encompassing type must have a width greater than or equal to the width
383  // of the specified types. Aditionally, if the encompassing type is signed,
384  // its width must be strictly greater than the width of any unsigned types
385  // given.
386  unsigned Width = 0;
387  for (const auto &Type : Types) {
388  unsigned MinWidth = Type.Width + (Signed && !Type.Signed);
389  if (Width < MinWidth) {
390  Width = MinWidth;
391  }
392  }
393 
394  return {Width, Signed};
395 }
396 
397 Value *CodeGenFunction::EmitVAStartEnd(Value *ArgValue, bool IsStart) {
398  llvm::Type *DestType = Int8PtrTy;
399  if (ArgValue->getType() != DestType)
400  ArgValue =
401  Builder.CreateBitCast(ArgValue, DestType, ArgValue->getName().data());
402 
403  Intrinsic::ID inst = IsStart ? Intrinsic::vastart : Intrinsic::vaend;
404  return Builder.CreateCall(CGM.getIntrinsic(inst), ArgValue);
405 }
406 
407 /// Checks if using the result of __builtin_object_size(p, @p From) in place of
408 /// __builtin_object_size(p, @p To) is correct
409 static bool areBOSTypesCompatible(int From, int To) {
410  // Note: Our __builtin_object_size implementation currently treats Type=0 and
411  // Type=2 identically. Encoding this implementation detail here may make
412  // improving __builtin_object_size difficult in the future, so it's omitted.
413  return From == To || (From == 0 && To == 1) || (From == 3 && To == 2);
414 }
415 
416 static llvm::Value *
417 getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType) {
418  return ConstantInt::get(ResType, (Type & 2) ? 0 : -1, /*isSigned=*/true);
419 }
420 
421 llvm::Value *
422 CodeGenFunction::evaluateOrEmitBuiltinObjectSize(const Expr *E, unsigned Type,
423  llvm::IntegerType *ResType,
424  llvm::Value *EmittedE) {
425  uint64_t ObjectSize;
426  if (!E->tryEvaluateObjectSize(ObjectSize, getContext(), Type))
427  return emitBuiltinObjectSize(E, Type, ResType, EmittedE);
428  return ConstantInt::get(ResType, ObjectSize, /*isSigned=*/true);
429 }
430 
431 /// Returns a Value corresponding to the size of the given expression.
432 /// This Value may be either of the following:
433 /// - A llvm::Argument (if E is a param with the pass_object_size attribute on
434 /// it)
435 /// - A call to the @llvm.objectsize intrinsic
436 ///
437 /// EmittedE is the result of emitting `E` as a scalar expr. If it's non-null
438 /// and we wouldn't otherwise try to reference a pass_object_size parameter,
439 /// we'll call @llvm.objectsize on EmittedE, rather than emitting E.
440 llvm::Value *
441 CodeGenFunction::emitBuiltinObjectSize(const Expr *E, unsigned Type,
442  llvm::IntegerType *ResType,
443  llvm::Value *EmittedE) {
444  // We need to reference an argument if the pointer is a parameter with the
445  // pass_object_size attribute.
446  if (auto *D = dyn_cast<DeclRefExpr>(E->IgnoreParenImpCasts())) {
447  auto *Param = dyn_cast<ParmVarDecl>(D->getDecl());
448  auto *PS = D->getDecl()->getAttr<PassObjectSizeAttr>();
449  if (Param != nullptr && PS != nullptr &&
450  areBOSTypesCompatible(PS->getType(), Type)) {
451  auto Iter = SizeArguments.find(Param);
452  assert(Iter != SizeArguments.end());
453 
454  const ImplicitParamDecl *D = Iter->second;
455  auto DIter = LocalDeclMap.find(D);
456  assert(DIter != LocalDeclMap.end());
457 
458  return EmitLoadOfScalar(DIter->second, /*volatile=*/false,
459  getContext().getSizeType(), E->getLocStart());
460  }
461  }
462 
463  // LLVM can't handle Type=3 appropriately, and __builtin_object_size shouldn't
464  // evaluate E for side-effects. In either case, we shouldn't lower to
465  // @llvm.objectsize.
466  if (Type == 3 || (!EmittedE && E->HasSideEffects(getContext())))
467  return getDefaultBuiltinObjectSizeResult(Type, ResType);
468 
469  Value *Ptr = EmittedE ? EmittedE : EmitScalarExpr(E);
470  assert(Ptr->getType()->isPointerTy() &&
471  "Non-pointer passed to __builtin_object_size?");
472 
473  Value *F = CGM.getIntrinsic(Intrinsic::objectsize, {ResType, Ptr->getType()});
474 
475  // LLVM only supports 0 and 2, make sure that we pass along that as a boolean.
476  Value *Min = Builder.getInt1((Type & 2) != 0);
477  // For GCC compatability, __builtin_object_size treat NULL as unknown size.
478  Value *NullIsUnknown = Builder.getTrue();
479  return Builder.CreateCall(F, {Ptr, Min, NullIsUnknown});
480 }
481 
482 // Many of MSVC builtins are on both x64 and ARM; to avoid repeating code, we
483 // handle them here.
485  _BitScanForward,
486  _BitScanReverse,
487  _InterlockedAnd,
488  _InterlockedDecrement,
489  _InterlockedExchange,
490  _InterlockedExchangeAdd,
491  _InterlockedExchangeSub,
492  _InterlockedIncrement,
493  _InterlockedOr,
494  _InterlockedXor,
495  _interlockedbittestandset,
496  __fastfail,
497 };
498 
500  const CallExpr *E) {
501  switch (BuiltinID) {
502  case MSVCIntrin::_BitScanForward:
503  case MSVCIntrin::_BitScanReverse: {
504  Value *ArgValue = EmitScalarExpr(E->getArg(1));
505 
506  llvm::Type *ArgType = ArgValue->getType();
507  llvm::Type *IndexType =
508  EmitScalarExpr(E->getArg(0))->getType()->getPointerElementType();
509  llvm::Type *ResultType = ConvertType(E->getType());
510 
511  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
512  Value *ResZero = llvm::Constant::getNullValue(ResultType);
513  Value *ResOne = llvm::ConstantInt::get(ResultType, 1);
514 
515  BasicBlock *Begin = Builder.GetInsertBlock();
516  BasicBlock *End = createBasicBlock("bitscan_end", this->CurFn);
517  Builder.SetInsertPoint(End);
518  PHINode *Result = Builder.CreatePHI(ResultType, 2, "bitscan_result");
519 
520  Builder.SetInsertPoint(Begin);
521  Value *IsZero = Builder.CreateICmpEQ(ArgValue, ArgZero);
522  BasicBlock *NotZero = createBasicBlock("bitscan_not_zero", this->CurFn);
523  Builder.CreateCondBr(IsZero, End, NotZero);
524  Result->addIncoming(ResZero, Begin);
525 
526  Builder.SetInsertPoint(NotZero);
527  Address IndexAddress = EmitPointerWithAlignment(E->getArg(0));
528 
529  if (BuiltinID == MSVCIntrin::_BitScanForward) {
530  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
531  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
532  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
533  Builder.CreateStore(ZeroCount, IndexAddress, false);
534  } else {
535  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
536  Value *ArgTypeLastIndex = llvm::ConstantInt::get(IndexType, ArgWidth - 1);
537 
538  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
539  Value *ZeroCount = Builder.CreateCall(F, {ArgValue, Builder.getTrue()});
540  ZeroCount = Builder.CreateIntCast(ZeroCount, IndexType, false);
541  Value *Index = Builder.CreateNSWSub(ArgTypeLastIndex, ZeroCount);
542  Builder.CreateStore(Index, IndexAddress, false);
543  }
544  Builder.CreateBr(End);
545  Result->addIncoming(ResOne, NotZero);
546 
547  Builder.SetInsertPoint(End);
548  return Result;
549  }
550  case MSVCIntrin::_InterlockedAnd:
551  return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E);
552  case MSVCIntrin::_InterlockedExchange:
553  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E);
554  case MSVCIntrin::_InterlockedExchangeAdd:
555  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E);
556  case MSVCIntrin::_InterlockedExchangeSub:
557  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Sub, E);
558  case MSVCIntrin::_InterlockedOr:
559  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E);
560  case MSVCIntrin::_InterlockedXor:
561  return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xor, E);
562 
563  case MSVCIntrin::_interlockedbittestandset: {
564  llvm::Value *Addr = EmitScalarExpr(E->getArg(0));
565  llvm::Value *Bit = EmitScalarExpr(E->getArg(1));
566  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
567  AtomicRMWInst::Or, Addr,
568  Builder.CreateShl(ConstantInt::get(Bit->getType(), 1), Bit),
569  llvm::AtomicOrdering::SequentiallyConsistent);
570  // Shift the relevant bit to the least significant position, truncate to
571  // the result type, and test the low bit.
572  llvm::Value *Shifted = Builder.CreateLShr(RMWI, Bit);
573  llvm::Value *Truncated =
574  Builder.CreateTrunc(Shifted, ConvertType(E->getType()));
575  return Builder.CreateAnd(Truncated,
576  ConstantInt::get(Truncated->getType(), 1));
577  }
578 
579  case MSVCIntrin::_InterlockedDecrement: {
580  llvm::Type *IntTy = ConvertType(E->getType());
581  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
582  AtomicRMWInst::Sub,
583  EmitScalarExpr(E->getArg(0)),
584  ConstantInt::get(IntTy, 1),
585  llvm::AtomicOrdering::SequentiallyConsistent);
586  return Builder.CreateSub(RMWI, ConstantInt::get(IntTy, 1));
587  }
588  case MSVCIntrin::_InterlockedIncrement: {
589  llvm::Type *IntTy = ConvertType(E->getType());
590  AtomicRMWInst *RMWI = Builder.CreateAtomicRMW(
591  AtomicRMWInst::Add,
592  EmitScalarExpr(E->getArg(0)),
593  ConstantInt::get(IntTy, 1),
594  llvm::AtomicOrdering::SequentiallyConsistent);
595  return Builder.CreateAdd(RMWI, ConstantInt::get(IntTy, 1));
596  }
597 
598  case MSVCIntrin::__fastfail: {
599  // Request immediate process termination from the kernel. The instruction
600  // sequences to do this are documented on MSDN:
601  // https://msdn.microsoft.com/en-us/library/dn774154.aspx
602  llvm::Triple::ArchType ISA = getTarget().getTriple().getArch();
603  StringRef Asm, Constraints;
604  switch (ISA) {
605  default:
606  ErrorUnsupported(E, "__fastfail call for this architecture");
607  break;
608  case llvm::Triple::x86:
609  case llvm::Triple::x86_64:
610  Asm = "int $$0x29";
611  Constraints = "{cx}";
612  break;
613  case llvm::Triple::thumb:
614  Asm = "udf #251";
615  Constraints = "{r0}";
616  break;
617  }
618  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, {Int32Ty}, false);
619  llvm::InlineAsm *IA =
620  llvm::InlineAsm::get(FTy, Asm, Constraints, /*SideEffects=*/true);
621  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
622  getLLVMContext(), llvm::AttributeList::FunctionIndex,
623  llvm::Attribute::NoReturn);
624  CallSite CS = Builder.CreateCall(IA, EmitScalarExpr(E->getArg(0)));
625  CS.setAttributes(NoReturnAttr);
626  return CS.getInstruction();
627  }
628  }
629  llvm_unreachable("Incorrect MSVC intrinsic!");
630 }
631 
632 namespace {
633 // ARC cleanup for __builtin_os_log_format
634 struct CallObjCArcUse final : EHScopeStack::Cleanup {
635  CallObjCArcUse(llvm::Value *object) : object(object) {}
636  llvm::Value *object;
637 
638  void Emit(CodeGenFunction &CGF, Flags flags) override {
639  CGF.EmitARCIntrinsicUse(object);
640  }
641 };
642 }
643 
645  unsigned BuiltinID, const CallExpr *E,
646  ReturnValueSlot ReturnValue) {
647  // See if we can constant fold this builtin. If so, don't emit it at all.
649  if (E->EvaluateAsRValue(Result, CGM.getContext()) &&
650  !Result.hasSideEffects()) {
651  if (Result.Val.isInt())
652  return RValue::get(llvm::ConstantInt::get(getLLVMContext(),
653  Result.Val.getInt()));
654  if (Result.Val.isFloat())
655  return RValue::get(llvm::ConstantFP::get(getLLVMContext(),
656  Result.Val.getFloat()));
657  }
658 
659  switch (BuiltinID) {
660  default: break; // Handle intrinsics and libm functions below.
661  case Builtin::BI__builtin___CFStringMakeConstantString:
662  case Builtin::BI__builtin___NSStringMakeConstantString:
663  return RValue::get(CGM.EmitConstantExpr(E, E->getType(), nullptr));
664  case Builtin::BI__builtin_stdarg_start:
665  case Builtin::BI__builtin_va_start:
666  case Builtin::BI__va_start:
667  case Builtin::BI__builtin_va_end:
668  return RValue::get(
669  EmitVAStartEnd(BuiltinID == Builtin::BI__va_start
670  ? EmitScalarExpr(E->getArg(0))
671  : EmitVAListRef(E->getArg(0)).getPointer(),
672  BuiltinID != Builtin::BI__builtin_va_end));
673  case Builtin::BI__builtin_va_copy: {
674  Value *DstPtr = EmitVAListRef(E->getArg(0)).getPointer();
675  Value *SrcPtr = EmitVAListRef(E->getArg(1)).getPointer();
676 
677  llvm::Type *Type = Int8PtrTy;
678 
679  DstPtr = Builder.CreateBitCast(DstPtr, Type);
680  SrcPtr = Builder.CreateBitCast(SrcPtr, Type);
681  return RValue::get(Builder.CreateCall(CGM.getIntrinsic(Intrinsic::vacopy),
682  {DstPtr, SrcPtr}));
683  }
684  case Builtin::BI__builtin_abs:
685  case Builtin::BI__builtin_labs:
686  case Builtin::BI__builtin_llabs: {
687  Value *ArgValue = EmitScalarExpr(E->getArg(0));
688 
689  Value *NegOp = Builder.CreateNeg(ArgValue, "neg");
690  Value *CmpResult =
691  Builder.CreateICmpSGE(ArgValue,
692  llvm::Constant::getNullValue(ArgValue->getType()),
693  "abscond");
694  Value *Result =
695  Builder.CreateSelect(CmpResult, ArgValue, NegOp, "abs");
696 
697  return RValue::get(Result);
698  }
699  case Builtin::BI__builtin_fabs:
700  case Builtin::BI__builtin_fabsf:
701  case Builtin::BI__builtin_fabsl: {
702  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::fabs));
703  }
704  case Builtin::BI__builtin_fmod:
705  case Builtin::BI__builtin_fmodf:
706  case Builtin::BI__builtin_fmodl: {
707  Value *Arg1 = EmitScalarExpr(E->getArg(0));
708  Value *Arg2 = EmitScalarExpr(E->getArg(1));
709  Value *Result = Builder.CreateFRem(Arg1, Arg2, "fmod");
710  return RValue::get(Result);
711  }
712  case Builtin::BI__builtin_copysign:
713  case Builtin::BI__builtin_copysignf:
714  case Builtin::BI__builtin_copysignl: {
716  }
717  case Builtin::BI__builtin_ceil:
718  case Builtin::BI__builtin_ceilf:
719  case Builtin::BI__builtin_ceill: {
720  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::ceil));
721  }
722  case Builtin::BI__builtin_floor:
723  case Builtin::BI__builtin_floorf:
724  case Builtin::BI__builtin_floorl: {
725  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::floor));
726  }
727  case Builtin::BI__builtin_trunc:
728  case Builtin::BI__builtin_truncf:
729  case Builtin::BI__builtin_truncl: {
730  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::trunc));
731  }
732  case Builtin::BI__builtin_rint:
733  case Builtin::BI__builtin_rintf:
734  case Builtin::BI__builtin_rintl: {
735  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::rint));
736  }
737  case Builtin::BI__builtin_nearbyint:
738  case Builtin::BI__builtin_nearbyintf:
739  case Builtin::BI__builtin_nearbyintl: {
741  }
742  case Builtin::BI__builtin_round:
743  case Builtin::BI__builtin_roundf:
744  case Builtin::BI__builtin_roundl: {
745  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::round));
746  }
747  case Builtin::BI__builtin_fmin:
748  case Builtin::BI__builtin_fminf:
749  case Builtin::BI__builtin_fminl: {
750  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::minnum));
751  }
752  case Builtin::BI__builtin_fmax:
753  case Builtin::BI__builtin_fmaxf:
754  case Builtin::BI__builtin_fmaxl: {
755  return RValue::get(emitBinaryBuiltin(*this, E, Intrinsic::maxnum));
756  }
757  case Builtin::BI__builtin_conj:
758  case Builtin::BI__builtin_conjf:
759  case Builtin::BI__builtin_conjl: {
760  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
761  Value *Real = ComplexVal.first;
762  Value *Imag = ComplexVal.second;
763  Value *Zero =
764  Imag->getType()->isFPOrFPVectorTy()
765  ? llvm::ConstantFP::getZeroValueForNegation(Imag->getType())
766  : llvm::Constant::getNullValue(Imag->getType());
767 
768  Imag = Builder.CreateFSub(Zero, Imag, "sub");
769  return RValue::getComplex(std::make_pair(Real, Imag));
770  }
771  case Builtin::BI__builtin_creal:
772  case Builtin::BI__builtin_crealf:
773  case Builtin::BI__builtin_creall:
774  case Builtin::BIcreal:
775  case Builtin::BIcrealf:
776  case Builtin::BIcreall: {
777  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
778  return RValue::get(ComplexVal.first);
779  }
780 
781  case Builtin::BI__builtin_cimag:
782  case Builtin::BI__builtin_cimagf:
783  case Builtin::BI__builtin_cimagl:
784  case Builtin::BIcimag:
785  case Builtin::BIcimagf:
786  case Builtin::BIcimagl: {
787  ComplexPairTy ComplexVal = EmitComplexExpr(E->getArg(0));
788  return RValue::get(ComplexVal.second);
789  }
790 
791  case Builtin::BI__builtin_ctzs:
792  case Builtin::BI__builtin_ctz:
793  case Builtin::BI__builtin_ctzl:
794  case Builtin::BI__builtin_ctzll: {
795  Value *ArgValue = EmitScalarExpr(E->getArg(0));
796 
797  llvm::Type *ArgType = ArgValue->getType();
798  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
799 
800  llvm::Type *ResultType = ConvertType(E->getType());
801  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
802  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
803  if (Result->getType() != ResultType)
804  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
805  "cast");
806  return RValue::get(Result);
807  }
808  case Builtin::BI__builtin_clzs:
809  case Builtin::BI__builtin_clz:
810  case Builtin::BI__builtin_clzl:
811  case Builtin::BI__builtin_clzll: {
812  Value *ArgValue = EmitScalarExpr(E->getArg(0));
813 
814  llvm::Type *ArgType = ArgValue->getType();
815  Value *F = CGM.getIntrinsic(Intrinsic::ctlz, ArgType);
816 
817  llvm::Type *ResultType = ConvertType(E->getType());
818  Value *ZeroUndef = Builder.getInt1(getTarget().isCLZForZeroUndef());
819  Value *Result = Builder.CreateCall(F, {ArgValue, ZeroUndef});
820  if (Result->getType() != ResultType)
821  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
822  "cast");
823  return RValue::get(Result);
824  }
825  case Builtin::BI__builtin_ffs:
826  case Builtin::BI__builtin_ffsl:
827  case Builtin::BI__builtin_ffsll: {
828  // ffs(x) -> x ? cttz(x) + 1 : 0
829  Value *ArgValue = EmitScalarExpr(E->getArg(0));
830 
831  llvm::Type *ArgType = ArgValue->getType();
832  Value *F = CGM.getIntrinsic(Intrinsic::cttz, ArgType);
833 
834  llvm::Type *ResultType = ConvertType(E->getType());
835  Value *Tmp =
836  Builder.CreateAdd(Builder.CreateCall(F, {ArgValue, Builder.getTrue()}),
837  llvm::ConstantInt::get(ArgType, 1));
838  Value *Zero = llvm::Constant::getNullValue(ArgType);
839  Value *IsZero = Builder.CreateICmpEQ(ArgValue, Zero, "iszero");
840  Value *Result = Builder.CreateSelect(IsZero, Zero, Tmp, "ffs");
841  if (Result->getType() != ResultType)
842  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
843  "cast");
844  return RValue::get(Result);
845  }
846  case Builtin::BI__builtin_parity:
847  case Builtin::BI__builtin_parityl:
848  case Builtin::BI__builtin_parityll: {
849  // parity(x) -> ctpop(x) & 1
850  Value *ArgValue = EmitScalarExpr(E->getArg(0));
851 
852  llvm::Type *ArgType = ArgValue->getType();
853  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
854 
855  llvm::Type *ResultType = ConvertType(E->getType());
856  Value *Tmp = Builder.CreateCall(F, ArgValue);
857  Value *Result = Builder.CreateAnd(Tmp, llvm::ConstantInt::get(ArgType, 1));
858  if (Result->getType() != ResultType)
859  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
860  "cast");
861  return RValue::get(Result);
862  }
863  case Builtin::BI__popcnt16:
864  case Builtin::BI__popcnt:
865  case Builtin::BI__popcnt64:
866  case Builtin::BI__builtin_popcount:
867  case Builtin::BI__builtin_popcountl:
868  case Builtin::BI__builtin_popcountll: {
869  Value *ArgValue = EmitScalarExpr(E->getArg(0));
870 
871  llvm::Type *ArgType = ArgValue->getType();
872  Value *F = CGM.getIntrinsic(Intrinsic::ctpop, ArgType);
873 
874  llvm::Type *ResultType = ConvertType(E->getType());
875  Value *Result = Builder.CreateCall(F, ArgValue);
876  if (Result->getType() != ResultType)
877  Result = Builder.CreateIntCast(Result, ResultType, /*isSigned*/true,
878  "cast");
879  return RValue::get(Result);
880  }
881  case Builtin::BI_rotr8:
882  case Builtin::BI_rotr16:
883  case Builtin::BI_rotr:
884  case Builtin::BI_lrotr:
885  case Builtin::BI_rotr64: {
886  Value *Val = EmitScalarExpr(E->getArg(0));
887  Value *Shift = EmitScalarExpr(E->getArg(1));
888 
889  llvm::Type *ArgType = Val->getType();
890  Shift = Builder.CreateIntCast(Shift, ArgType, false);
891  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
892  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
893  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
894 
895  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
896  Shift = Builder.CreateAnd(Shift, Mask);
897  Value *LeftShift = Builder.CreateSub(ArgTypeSize, Shift);
898 
899  Value *RightShifted = Builder.CreateLShr(Val, Shift);
900  Value *LeftShifted = Builder.CreateShl(Val, LeftShift);
901  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
902 
903  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
904  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
905  return RValue::get(Result);
906  }
907  case Builtin::BI_rotl8:
908  case Builtin::BI_rotl16:
909  case Builtin::BI_rotl:
910  case Builtin::BI_lrotl:
911  case Builtin::BI_rotl64: {
912  Value *Val = EmitScalarExpr(E->getArg(0));
913  Value *Shift = EmitScalarExpr(E->getArg(1));
914 
915  llvm::Type *ArgType = Val->getType();
916  Shift = Builder.CreateIntCast(Shift, ArgType, false);
917  unsigned ArgWidth = cast<llvm::IntegerType>(ArgType)->getBitWidth();
918  Value *ArgTypeSize = llvm::ConstantInt::get(ArgType, ArgWidth);
919  Value *ArgZero = llvm::Constant::getNullValue(ArgType);
920 
921  Value *Mask = llvm::ConstantInt::get(ArgType, ArgWidth - 1);
922  Shift = Builder.CreateAnd(Shift, Mask);
923  Value *RightShift = Builder.CreateSub(ArgTypeSize, Shift);
924 
925  Value *LeftShifted = Builder.CreateShl(Val, Shift);
926  Value *RightShifted = Builder.CreateLShr(Val, RightShift);
927  Value *Rotated = Builder.CreateOr(LeftShifted, RightShifted);
928 
929  Value *ShiftIsZero = Builder.CreateICmpEQ(Shift, ArgZero);
930  Value *Result = Builder.CreateSelect(ShiftIsZero, Val, Rotated);
931  return RValue::get(Result);
932  }
933  case Builtin::BI__builtin_unpredictable: {
934  // Always return the argument of __builtin_unpredictable. LLVM does not
935  // handle this builtin. Metadata for this builtin should be added directly
936  // to instructions such as branches or switches that use it.
937  return RValue::get(EmitScalarExpr(E->getArg(0)));
938  }
939  case Builtin::BI__builtin_expect: {
940  Value *ArgValue = EmitScalarExpr(E->getArg(0));
941  llvm::Type *ArgType = ArgValue->getType();
942 
943  Value *ExpectedValue = EmitScalarExpr(E->getArg(1));
944  // Don't generate llvm.expect on -O0 as the backend won't use it for
945  // anything.
946  // Note, we still IRGen ExpectedValue because it could have side-effects.
947  if (CGM.getCodeGenOpts().OptimizationLevel == 0)
948  return RValue::get(ArgValue);
949 
950  Value *FnExpect = CGM.getIntrinsic(Intrinsic::expect, ArgType);
951  Value *Result =
952  Builder.CreateCall(FnExpect, {ArgValue, ExpectedValue}, "expval");
953  return RValue::get(Result);
954  }
955  case Builtin::BI__builtin_assume_aligned: {
956  Value *PtrValue = EmitScalarExpr(E->getArg(0));
957  Value *OffsetValue =
958  (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) : nullptr;
959 
960  Value *AlignmentValue = EmitScalarExpr(E->getArg(1));
961  ConstantInt *AlignmentCI = cast<ConstantInt>(AlignmentValue);
962  unsigned Alignment = (unsigned) AlignmentCI->getZExtValue();
963 
964  EmitAlignmentAssumption(PtrValue, Alignment, OffsetValue);
965  return RValue::get(PtrValue);
966  }
967  case Builtin::BI__assume:
968  case Builtin::BI__builtin_assume: {
969  if (E->getArg(0)->HasSideEffects(getContext()))
970  return RValue::get(nullptr);
971 
972  Value *ArgValue = EmitScalarExpr(E->getArg(0));
973  Value *FnAssume = CGM.getIntrinsic(Intrinsic::assume);
974  return RValue::get(Builder.CreateCall(FnAssume, ArgValue));
975  }
976  case Builtin::BI__builtin_bswap16:
977  case Builtin::BI__builtin_bswap32:
978  case Builtin::BI__builtin_bswap64: {
979  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bswap));
980  }
981  case Builtin::BI__builtin_bitreverse8:
982  case Builtin::BI__builtin_bitreverse16:
983  case Builtin::BI__builtin_bitreverse32:
984  case Builtin::BI__builtin_bitreverse64: {
985  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::bitreverse));
986  }
987  case Builtin::BI__builtin_object_size: {
988  unsigned Type =
989  E->getArg(1)->EvaluateKnownConstInt(getContext()).getZExtValue();
990  auto *ResType = cast<llvm::IntegerType>(ConvertType(E->getType()));
991 
992  // We pass this builtin onto the optimizer so that it can figure out the
993  // object size in more complex cases.
994  return RValue::get(emitBuiltinObjectSize(E->getArg(0), Type, ResType,
995  /*EmittedE=*/nullptr));
996  }
997  case Builtin::BI__builtin_prefetch: {
998  Value *Locality, *RW, *Address = EmitScalarExpr(E->getArg(0));
999  // FIXME: Technically these constants should of type 'int', yes?
1000  RW = (E->getNumArgs() > 1) ? EmitScalarExpr(E->getArg(1)) :
1001  llvm::ConstantInt::get(Int32Ty, 0);
1002  Locality = (E->getNumArgs() > 2) ? EmitScalarExpr(E->getArg(2)) :
1003  llvm::ConstantInt::get(Int32Ty, 3);
1004  Value *Data = llvm::ConstantInt::get(Int32Ty, 1);
1005  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
1006  return RValue::get(Builder.CreateCall(F, {Address, RW, Locality, Data}));
1007  }
1008  case Builtin::BI__builtin_readcyclecounter: {
1009  Value *F = CGM.getIntrinsic(Intrinsic::readcyclecounter);
1010  return RValue::get(Builder.CreateCall(F));
1011  }
1012  case Builtin::BI__builtin___clear_cache: {
1013  Value *Begin = EmitScalarExpr(E->getArg(0));
1014  Value *End = EmitScalarExpr(E->getArg(1));
1015  Value *F = CGM.getIntrinsic(Intrinsic::clear_cache);
1016  return RValue::get(Builder.CreateCall(F, {Begin, End}));
1017  }
1018  case Builtin::BI__builtin_trap:
1019  return RValue::get(EmitTrapCall(Intrinsic::trap));
1020  case Builtin::BI__debugbreak:
1021  return RValue::get(EmitTrapCall(Intrinsic::debugtrap));
1022  case Builtin::BI__builtin_unreachable: {
1023  if (SanOpts.has(SanitizerKind::Unreachable)) {
1024  SanitizerScope SanScope(this);
1025  EmitCheck(std::make_pair(static_cast<llvm::Value *>(Builder.getFalse()),
1026  SanitizerKind::Unreachable),
1027  SanitizerHandler::BuiltinUnreachable,
1028  EmitCheckSourceLocation(E->getExprLoc()), None);
1029  } else
1030  Builder.CreateUnreachable();
1031 
1032  // We do need to preserve an insertion point.
1033  EmitBlock(createBasicBlock("unreachable.cont"));
1034 
1035  return RValue::get(nullptr);
1036  }
1037 
1038  case Builtin::BI__builtin_powi:
1039  case Builtin::BI__builtin_powif:
1040  case Builtin::BI__builtin_powil: {
1041  Value *Base = EmitScalarExpr(E->getArg(0));
1042  Value *Exponent = EmitScalarExpr(E->getArg(1));
1043  llvm::Type *ArgType = Base->getType();
1044  Value *F = CGM.getIntrinsic(Intrinsic::powi, ArgType);
1045  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1046  }
1047 
1048  case Builtin::BI__builtin_isgreater:
1049  case Builtin::BI__builtin_isgreaterequal:
1050  case Builtin::BI__builtin_isless:
1051  case Builtin::BI__builtin_islessequal:
1052  case Builtin::BI__builtin_islessgreater:
1053  case Builtin::BI__builtin_isunordered: {
1054  // Ordered comparisons: we know the arguments to these are matching scalar
1055  // floating point values.
1056  Value *LHS = EmitScalarExpr(E->getArg(0));
1057  Value *RHS = EmitScalarExpr(E->getArg(1));
1058 
1059  switch (BuiltinID) {
1060  default: llvm_unreachable("Unknown ordered comparison");
1061  case Builtin::BI__builtin_isgreater:
1062  LHS = Builder.CreateFCmpOGT(LHS, RHS, "cmp");
1063  break;
1064  case Builtin::BI__builtin_isgreaterequal:
1065  LHS = Builder.CreateFCmpOGE(LHS, RHS, "cmp");
1066  break;
1067  case Builtin::BI__builtin_isless:
1068  LHS = Builder.CreateFCmpOLT(LHS, RHS, "cmp");
1069  break;
1070  case Builtin::BI__builtin_islessequal:
1071  LHS = Builder.CreateFCmpOLE(LHS, RHS, "cmp");
1072  break;
1073  case Builtin::BI__builtin_islessgreater:
1074  LHS = Builder.CreateFCmpONE(LHS, RHS, "cmp");
1075  break;
1076  case Builtin::BI__builtin_isunordered:
1077  LHS = Builder.CreateFCmpUNO(LHS, RHS, "cmp");
1078  break;
1079  }
1080  // ZExt bool to int type.
1081  return RValue::get(Builder.CreateZExt(LHS, ConvertType(E->getType())));
1082  }
1083  case Builtin::BI__builtin_isnan: {
1084  Value *V = EmitScalarExpr(E->getArg(0));
1085  V = Builder.CreateFCmpUNO(V, V, "cmp");
1086  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1087  }
1088 
1089  case Builtin::BIfinite:
1090  case Builtin::BI__finite:
1091  case Builtin::BIfinitef:
1092  case Builtin::BI__finitef:
1093  case Builtin::BIfinitel:
1094  case Builtin::BI__finitel:
1095  case Builtin::BI__builtin_isinf:
1096  case Builtin::BI__builtin_isfinite: {
1097  // isinf(x) --> fabs(x) == infinity
1098  // isfinite(x) --> fabs(x) != infinity
1099  // x != NaN via the ordered compare in either case.
1100  Value *V = EmitScalarExpr(E->getArg(0));
1101  Value *Fabs = EmitFAbs(*this, V);
1102  Constant *Infinity = ConstantFP::getInfinity(V->getType());
1103  CmpInst::Predicate Pred = (BuiltinID == Builtin::BI__builtin_isinf)
1104  ? CmpInst::FCMP_OEQ
1105  : CmpInst::FCMP_ONE;
1106  Value *FCmp = Builder.CreateFCmp(Pred, Fabs, Infinity, "cmpinf");
1107  return RValue::get(Builder.CreateZExt(FCmp, ConvertType(E->getType())));
1108  }
1109 
1110  case Builtin::BI__builtin_isinf_sign: {
1111  // isinf_sign(x) -> fabs(x) == infinity ? (signbit(x) ? -1 : 1) : 0
1112  Value *Arg = EmitScalarExpr(E->getArg(0));
1113  Value *AbsArg = EmitFAbs(*this, Arg);
1114  Value *IsInf = Builder.CreateFCmpOEQ(
1115  AbsArg, ConstantFP::getInfinity(Arg->getType()), "isinf");
1116  Value *IsNeg = EmitSignBit(*this, Arg);
1117 
1118  llvm::Type *IntTy = ConvertType(E->getType());
1119  Value *Zero = Constant::getNullValue(IntTy);
1120  Value *One = ConstantInt::get(IntTy, 1);
1121  Value *NegativeOne = ConstantInt::get(IntTy, -1);
1122  Value *SignResult = Builder.CreateSelect(IsNeg, NegativeOne, One);
1123  Value *Result = Builder.CreateSelect(IsInf, SignResult, Zero);
1124  return RValue::get(Result);
1125  }
1126 
1127  case Builtin::BI__builtin_isnormal: {
1128  // isnormal(x) --> x == x && fabsf(x) < infinity && fabsf(x) >= float_min
1129  Value *V = EmitScalarExpr(E->getArg(0));
1130  Value *Eq = Builder.CreateFCmpOEQ(V, V, "iseq");
1131 
1132  Value *Abs = EmitFAbs(*this, V);
1133  Value *IsLessThanInf =
1134  Builder.CreateFCmpULT(Abs, ConstantFP::getInfinity(V->getType()),"isinf");
1135  APFloat Smallest = APFloat::getSmallestNormalized(
1136  getContext().getFloatTypeSemantics(E->getArg(0)->getType()));
1137  Value *IsNormal =
1138  Builder.CreateFCmpUGE(Abs, ConstantFP::get(V->getContext(), Smallest),
1139  "isnormal");
1140  V = Builder.CreateAnd(Eq, IsLessThanInf, "and");
1141  V = Builder.CreateAnd(V, IsNormal, "and");
1142  return RValue::get(Builder.CreateZExt(V, ConvertType(E->getType())));
1143  }
1144 
1145  case Builtin::BI__builtin_fpclassify: {
1146  Value *V = EmitScalarExpr(E->getArg(5));
1147  llvm::Type *Ty = ConvertType(E->getArg(5)->getType());
1148 
1149  // Create Result
1150  BasicBlock *Begin = Builder.GetInsertBlock();
1151  BasicBlock *End = createBasicBlock("fpclassify_end", this->CurFn);
1152  Builder.SetInsertPoint(End);
1153  PHINode *Result =
1154  Builder.CreatePHI(ConvertType(E->getArg(0)->getType()), 4,
1155  "fpclassify_result");
1156 
1157  // if (V==0) return FP_ZERO
1158  Builder.SetInsertPoint(Begin);
1159  Value *IsZero = Builder.CreateFCmpOEQ(V, Constant::getNullValue(Ty),
1160  "iszero");
1161  Value *ZeroLiteral = EmitScalarExpr(E->getArg(4));
1162  BasicBlock *NotZero = createBasicBlock("fpclassify_not_zero", this->CurFn);
1163  Builder.CreateCondBr(IsZero, End, NotZero);
1164  Result->addIncoming(ZeroLiteral, Begin);
1165 
1166  // if (V != V) return FP_NAN
1167  Builder.SetInsertPoint(NotZero);
1168  Value *IsNan = Builder.CreateFCmpUNO(V, V, "cmp");
1169  Value *NanLiteral = EmitScalarExpr(E->getArg(0));
1170  BasicBlock *NotNan = createBasicBlock("fpclassify_not_nan", this->CurFn);
1171  Builder.CreateCondBr(IsNan, End, NotNan);
1172  Result->addIncoming(NanLiteral, NotZero);
1173 
1174  // if (fabs(V) == infinity) return FP_INFINITY
1175  Builder.SetInsertPoint(NotNan);
1176  Value *VAbs = EmitFAbs(*this, V);
1177  Value *IsInf =
1178  Builder.CreateFCmpOEQ(VAbs, ConstantFP::getInfinity(V->getType()),
1179  "isinf");
1180  Value *InfLiteral = EmitScalarExpr(E->getArg(1));
1181  BasicBlock *NotInf = createBasicBlock("fpclassify_not_inf", this->CurFn);
1182  Builder.CreateCondBr(IsInf, End, NotInf);
1183  Result->addIncoming(InfLiteral, NotNan);
1184 
1185  // if (fabs(V) >= MIN_NORMAL) return FP_NORMAL else FP_SUBNORMAL
1186  Builder.SetInsertPoint(NotInf);
1187  APFloat Smallest = APFloat::getSmallestNormalized(
1188  getContext().getFloatTypeSemantics(E->getArg(5)->getType()));
1189  Value *IsNormal =
1190  Builder.CreateFCmpUGE(VAbs, ConstantFP::get(V->getContext(), Smallest),
1191  "isnormal");
1192  Value *NormalResult =
1193  Builder.CreateSelect(IsNormal, EmitScalarExpr(E->getArg(2)),
1194  EmitScalarExpr(E->getArg(3)));
1195  Builder.CreateBr(End);
1196  Result->addIncoming(NormalResult, NotInf);
1197 
1198  // return Result
1199  Builder.SetInsertPoint(End);
1200  return RValue::get(Result);
1201  }
1202 
1203  case Builtin::BIalloca:
1204  case Builtin::BI_alloca:
1205  case Builtin::BI__builtin_alloca: {
1206  Value *Size = EmitScalarExpr(E->getArg(0));
1207  const TargetInfo &TI = getContext().getTargetInfo();
1208  // The alignment of the alloca should correspond to __BIGGEST_ALIGNMENT__.
1209  unsigned SuitableAlignmentInBytes =
1210  CGM.getContext()
1211  .toCharUnitsFromBits(TI.getSuitableAlign())
1212  .getQuantity();
1213  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1214  AI->setAlignment(SuitableAlignmentInBytes);
1215  return RValue::get(AI);
1216  }
1217 
1218  case Builtin::BI__builtin_alloca_with_align: {
1219  Value *Size = EmitScalarExpr(E->getArg(0));
1220  Value *AlignmentInBitsValue = EmitScalarExpr(E->getArg(1));
1221  auto *AlignmentInBitsCI = cast<ConstantInt>(AlignmentInBitsValue);
1222  unsigned AlignmentInBits = AlignmentInBitsCI->getZExtValue();
1223  unsigned AlignmentInBytes =
1224  CGM.getContext().toCharUnitsFromBits(AlignmentInBits).getQuantity();
1225  AllocaInst *AI = Builder.CreateAlloca(Builder.getInt8Ty(), Size);
1226  AI->setAlignment(AlignmentInBytes);
1227  return RValue::get(AI);
1228  }
1229 
1230  case Builtin::BIbzero:
1231  case Builtin::BI__builtin_bzero: {
1232  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1233  Value *SizeVal = EmitScalarExpr(E->getArg(1));
1234  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1235  E->getArg(0)->getExprLoc(), FD, 0);
1236  Builder.CreateMemSet(Dest, Builder.getInt8(0), SizeVal, false);
1237  return RValue::get(Dest.getPointer());
1238  }
1239  case Builtin::BImemcpy:
1240  case Builtin::BI__builtin_memcpy: {
1241  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1242  Address Src = EmitPointerWithAlignment(E->getArg(1));
1243  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1244  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1245  E->getArg(0)->getExprLoc(), FD, 0);
1246  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1247  E->getArg(1)->getExprLoc(), FD, 1);
1248  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1249  return RValue::get(Dest.getPointer());
1250  }
1251 
1252  case Builtin::BI__builtin_char_memchr:
1253  BuiltinID = Builtin::BI__builtin_memchr;
1254  break;
1255 
1256  case Builtin::BI__builtin___memcpy_chk: {
1257  // fold __builtin_memcpy_chk(x, y, cst1, cst2) to memcpy iff cst1<=cst2.
1258  llvm::APSInt Size, DstSize;
1259  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1260  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1261  break;
1262  if (Size.ugt(DstSize))
1263  break;
1264  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1265  Address Src = EmitPointerWithAlignment(E->getArg(1));
1266  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1267  Builder.CreateMemCpy(Dest, Src, SizeVal, false);
1268  return RValue::get(Dest.getPointer());
1269  }
1270 
1271  case Builtin::BI__builtin_objc_memmove_collectable: {
1272  Address DestAddr = EmitPointerWithAlignment(E->getArg(0));
1273  Address SrcAddr = EmitPointerWithAlignment(E->getArg(1));
1274  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1275  CGM.getObjCRuntime().EmitGCMemmoveCollectable(*this,
1276  DestAddr, SrcAddr, SizeVal);
1277  return RValue::get(DestAddr.getPointer());
1278  }
1279 
1280  case Builtin::BI__builtin___memmove_chk: {
1281  // fold __builtin_memmove_chk(x, y, cst1, cst2) to memmove iff cst1<=cst2.
1282  llvm::APSInt Size, DstSize;
1283  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1284  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1285  break;
1286  if (Size.ugt(DstSize))
1287  break;
1288  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1289  Address Src = EmitPointerWithAlignment(E->getArg(1));
1290  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1291  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1292  return RValue::get(Dest.getPointer());
1293  }
1294 
1295  case Builtin::BImemmove:
1296  case Builtin::BI__builtin_memmove: {
1297  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1298  Address Src = EmitPointerWithAlignment(E->getArg(1));
1299  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1300  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1301  E->getArg(0)->getExprLoc(), FD, 0);
1302  EmitNonNullArgCheck(RValue::get(Src.getPointer()), E->getArg(1)->getType(),
1303  E->getArg(1)->getExprLoc(), FD, 1);
1304  Builder.CreateMemMove(Dest, Src, SizeVal, false);
1305  return RValue::get(Dest.getPointer());
1306  }
1307  case Builtin::BImemset:
1308  case Builtin::BI__builtin_memset: {
1309  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1310  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1311  Builder.getInt8Ty());
1312  Value *SizeVal = EmitScalarExpr(E->getArg(2));
1313  EmitNonNullArgCheck(RValue::get(Dest.getPointer()), E->getArg(0)->getType(),
1314  E->getArg(0)->getExprLoc(), FD, 0);
1315  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1316  return RValue::get(Dest.getPointer());
1317  }
1318  case Builtin::BI__builtin___memset_chk: {
1319  // fold __builtin_memset_chk(x, y, cst1, cst2) to memset iff cst1<=cst2.
1320  llvm::APSInt Size, DstSize;
1321  if (!E->getArg(2)->EvaluateAsInt(Size, CGM.getContext()) ||
1322  !E->getArg(3)->EvaluateAsInt(DstSize, CGM.getContext()))
1323  break;
1324  if (Size.ugt(DstSize))
1325  break;
1326  Address Dest = EmitPointerWithAlignment(E->getArg(0));
1327  Value *ByteVal = Builder.CreateTrunc(EmitScalarExpr(E->getArg(1)),
1328  Builder.getInt8Ty());
1329  Value *SizeVal = llvm::ConstantInt::get(Builder.getContext(), Size);
1330  Builder.CreateMemSet(Dest, ByteVal, SizeVal, false);
1331  return RValue::get(Dest.getPointer());
1332  }
1333  case Builtin::BI__builtin_dwarf_cfa: {
1334  // The offset in bytes from the first argument to the CFA.
1335  //
1336  // Why on earth is this in the frontend? Is there any reason at
1337  // all that the backend can't reasonably determine this while
1338  // lowering llvm.eh.dwarf.cfa()?
1339  //
1340  // TODO: If there's a satisfactory reason, add a target hook for
1341  // this instead of hard-coding 0, which is correct for most targets.
1342  int32_t Offset = 0;
1343 
1344  Value *F = CGM.getIntrinsic(Intrinsic::eh_dwarf_cfa);
1345  return RValue::get(Builder.CreateCall(F,
1346  llvm::ConstantInt::get(Int32Ty, Offset)));
1347  }
1348  case Builtin::BI__builtin_return_address: {
1349  Value *Depth =
1350  CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1351  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1352  return RValue::get(Builder.CreateCall(F, Depth));
1353  }
1354  case Builtin::BI_ReturnAddress: {
1355  Value *F = CGM.getIntrinsic(Intrinsic::returnaddress);
1356  return RValue::get(Builder.CreateCall(F, Builder.getInt32(0)));
1357  }
1358  case Builtin::BI__builtin_frame_address: {
1359  Value *Depth =
1360  CGM.EmitConstantExpr(E->getArg(0), getContext().UnsignedIntTy, this);
1361  Value *F = CGM.getIntrinsic(Intrinsic::frameaddress);
1362  return RValue::get(Builder.CreateCall(F, Depth));
1363  }
1364  case Builtin::BI__builtin_extract_return_addr: {
1365  Value *Address = EmitScalarExpr(E->getArg(0));
1366  Value *Result = getTargetHooks().decodeReturnAddress(*this, Address);
1367  return RValue::get(Result);
1368  }
1369  case Builtin::BI__builtin_frob_return_addr: {
1370  Value *Address = EmitScalarExpr(E->getArg(0));
1371  Value *Result = getTargetHooks().encodeReturnAddress(*this, Address);
1372  return RValue::get(Result);
1373  }
1374  case Builtin::BI__builtin_dwarf_sp_column: {
1375  llvm::IntegerType *Ty
1376  = cast<llvm::IntegerType>(ConvertType(E->getType()));
1377  int Column = getTargetHooks().getDwarfEHStackPointer(CGM);
1378  if (Column == -1) {
1379  CGM.ErrorUnsupported(E, "__builtin_dwarf_sp_column");
1380  return RValue::get(llvm::UndefValue::get(Ty));
1381  }
1382  return RValue::get(llvm::ConstantInt::get(Ty, Column, true));
1383  }
1384  case Builtin::BI__builtin_init_dwarf_reg_size_table: {
1385  Value *Address = EmitScalarExpr(E->getArg(0));
1386  if (getTargetHooks().initDwarfEHRegSizeTable(*this, Address))
1387  CGM.ErrorUnsupported(E, "__builtin_init_dwarf_reg_size_table");
1388  return RValue::get(llvm::UndefValue::get(ConvertType(E->getType())));
1389  }
1390  case Builtin::BI__builtin_eh_return: {
1391  Value *Int = EmitScalarExpr(E->getArg(0));
1392  Value *Ptr = EmitScalarExpr(E->getArg(1));
1393 
1394  llvm::IntegerType *IntTy = cast<llvm::IntegerType>(Int->getType());
1395  assert((IntTy->getBitWidth() == 32 || IntTy->getBitWidth() == 64) &&
1396  "LLVM's __builtin_eh_return only supports 32- and 64-bit variants");
1397  Value *F = CGM.getIntrinsic(IntTy->getBitWidth() == 32
1398  ? Intrinsic::eh_return_i32
1399  : Intrinsic::eh_return_i64);
1400  Builder.CreateCall(F, {Int, Ptr});
1401  Builder.CreateUnreachable();
1402 
1403  // We do need to preserve an insertion point.
1404  EmitBlock(createBasicBlock("builtin_eh_return.cont"));
1405 
1406  return RValue::get(nullptr);
1407  }
1408  case Builtin::BI__builtin_unwind_init: {
1409  Value *F = CGM.getIntrinsic(Intrinsic::eh_unwind_init);
1410  return RValue::get(Builder.CreateCall(F));
1411  }
1412  case Builtin::BI__builtin_extend_pointer: {
1413  // Extends a pointer to the size of an _Unwind_Word, which is
1414  // uint64_t on all platforms. Generally this gets poked into a
1415  // register and eventually used as an address, so if the
1416  // addressing registers are wider than pointers and the platform
1417  // doesn't implicitly ignore high-order bits when doing
1418  // addressing, we need to make sure we zext / sext based on
1419  // the platform's expectations.
1420  //
1421  // See: http://gcc.gnu.org/ml/gcc-bugs/2002-02/msg00237.html
1422 
1423  // Cast the pointer to intptr_t.
1424  Value *Ptr = EmitScalarExpr(E->getArg(0));
1425  Value *Result = Builder.CreatePtrToInt(Ptr, IntPtrTy, "extend.cast");
1426 
1427  // If that's 64 bits, we're done.
1428  if (IntPtrTy->getBitWidth() == 64)
1429  return RValue::get(Result);
1430 
1431  // Otherwise, ask the codegen data what to do.
1432  if (getTargetHooks().extendPointerWithSExt())
1433  return RValue::get(Builder.CreateSExt(Result, Int64Ty, "extend.sext"));
1434  else
1435  return RValue::get(Builder.CreateZExt(Result, Int64Ty, "extend.zext"));
1436  }
1437  case Builtin::BI__builtin_setjmp: {
1438  // Buffer is a void**.
1439  Address Buf = EmitPointerWithAlignment(E->getArg(0));
1440 
1441  // Store the frame pointer to the setjmp buffer.
1442  Value *FrameAddr =
1443  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
1444  ConstantInt::get(Int32Ty, 0));
1445  Builder.CreateStore(FrameAddr, Buf);
1446 
1447  // Store the stack pointer to the setjmp buffer.
1448  Value *StackAddr =
1449  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::stacksave));
1450  Address StackSaveSlot =
1451  Builder.CreateConstInBoundsGEP(Buf, 2, getPointerSize());
1452  Builder.CreateStore(StackAddr, StackSaveSlot);
1453 
1454  // Call LLVM's EH setjmp, which is lightweight.
1455  Value *F = CGM.getIntrinsic(Intrinsic::eh_sjlj_setjmp);
1456  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1457  return RValue::get(Builder.CreateCall(F, Buf.getPointer()));
1458  }
1459  case Builtin::BI__builtin_longjmp: {
1460  Value *Buf = EmitScalarExpr(E->getArg(0));
1461  Buf = Builder.CreateBitCast(Buf, Int8PtrTy);
1462 
1463  // Call LLVM's EH longjmp, which is lightweight.
1464  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::eh_sjlj_longjmp), Buf);
1465 
1466  // longjmp doesn't return; mark this as unreachable.
1467  Builder.CreateUnreachable();
1468 
1469  // We do need to preserve an insertion point.
1470  EmitBlock(createBasicBlock("longjmp.cont"));
1471 
1472  return RValue::get(nullptr);
1473  }
1474  case Builtin::BI__sync_fetch_and_add:
1475  case Builtin::BI__sync_fetch_and_sub:
1476  case Builtin::BI__sync_fetch_and_or:
1477  case Builtin::BI__sync_fetch_and_and:
1478  case Builtin::BI__sync_fetch_and_xor:
1479  case Builtin::BI__sync_fetch_and_nand:
1480  case Builtin::BI__sync_add_and_fetch:
1481  case Builtin::BI__sync_sub_and_fetch:
1482  case Builtin::BI__sync_and_and_fetch:
1483  case Builtin::BI__sync_or_and_fetch:
1484  case Builtin::BI__sync_xor_and_fetch:
1485  case Builtin::BI__sync_nand_and_fetch:
1486  case Builtin::BI__sync_val_compare_and_swap:
1487  case Builtin::BI__sync_bool_compare_and_swap:
1488  case Builtin::BI__sync_lock_test_and_set:
1489  case Builtin::BI__sync_lock_release:
1490  case Builtin::BI__sync_swap:
1491  llvm_unreachable("Shouldn't make it through sema");
1492  case Builtin::BI__sync_fetch_and_add_1:
1493  case Builtin::BI__sync_fetch_and_add_2:
1494  case Builtin::BI__sync_fetch_and_add_4:
1495  case Builtin::BI__sync_fetch_and_add_8:
1496  case Builtin::BI__sync_fetch_and_add_16:
1497  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Add, E);
1498  case Builtin::BI__sync_fetch_and_sub_1:
1499  case Builtin::BI__sync_fetch_and_sub_2:
1500  case Builtin::BI__sync_fetch_and_sub_4:
1501  case Builtin::BI__sync_fetch_and_sub_8:
1502  case Builtin::BI__sync_fetch_and_sub_16:
1503  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Sub, E);
1504  case Builtin::BI__sync_fetch_and_or_1:
1505  case Builtin::BI__sync_fetch_and_or_2:
1506  case Builtin::BI__sync_fetch_and_or_4:
1507  case Builtin::BI__sync_fetch_and_or_8:
1508  case Builtin::BI__sync_fetch_and_or_16:
1509  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Or, E);
1510  case Builtin::BI__sync_fetch_and_and_1:
1511  case Builtin::BI__sync_fetch_and_and_2:
1512  case Builtin::BI__sync_fetch_and_and_4:
1513  case Builtin::BI__sync_fetch_and_and_8:
1514  case Builtin::BI__sync_fetch_and_and_16:
1515  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::And, E);
1516  case Builtin::BI__sync_fetch_and_xor_1:
1517  case Builtin::BI__sync_fetch_and_xor_2:
1518  case Builtin::BI__sync_fetch_and_xor_4:
1519  case Builtin::BI__sync_fetch_and_xor_8:
1520  case Builtin::BI__sync_fetch_and_xor_16:
1521  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xor, E);
1522  case Builtin::BI__sync_fetch_and_nand_1:
1523  case Builtin::BI__sync_fetch_and_nand_2:
1524  case Builtin::BI__sync_fetch_and_nand_4:
1525  case Builtin::BI__sync_fetch_and_nand_8:
1526  case Builtin::BI__sync_fetch_and_nand_16:
1527  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Nand, E);
1528 
1529  // Clang extensions: not overloaded yet.
1530  case Builtin::BI__sync_fetch_and_min:
1531  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Min, E);
1532  case Builtin::BI__sync_fetch_and_max:
1533  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Max, E);
1534  case Builtin::BI__sync_fetch_and_umin:
1535  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMin, E);
1536  case Builtin::BI__sync_fetch_and_umax:
1537  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::UMax, E);
1538 
1539  case Builtin::BI__sync_add_and_fetch_1:
1540  case Builtin::BI__sync_add_and_fetch_2:
1541  case Builtin::BI__sync_add_and_fetch_4:
1542  case Builtin::BI__sync_add_and_fetch_8:
1543  case Builtin::BI__sync_add_and_fetch_16:
1544  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Add, E,
1545  llvm::Instruction::Add);
1546  case Builtin::BI__sync_sub_and_fetch_1:
1547  case Builtin::BI__sync_sub_and_fetch_2:
1548  case Builtin::BI__sync_sub_and_fetch_4:
1549  case Builtin::BI__sync_sub_and_fetch_8:
1550  case Builtin::BI__sync_sub_and_fetch_16:
1551  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Sub, E,
1552  llvm::Instruction::Sub);
1553  case Builtin::BI__sync_and_and_fetch_1:
1554  case Builtin::BI__sync_and_and_fetch_2:
1555  case Builtin::BI__sync_and_and_fetch_4:
1556  case Builtin::BI__sync_and_and_fetch_8:
1557  case Builtin::BI__sync_and_and_fetch_16:
1560  case Builtin::BI__sync_or_and_fetch_1:
1561  case Builtin::BI__sync_or_and_fetch_2:
1562  case Builtin::BI__sync_or_and_fetch_4:
1563  case Builtin::BI__sync_or_and_fetch_8:
1564  case Builtin::BI__sync_or_and_fetch_16:
1565  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Or, E,
1566  llvm::Instruction::Or);
1567  case Builtin::BI__sync_xor_and_fetch_1:
1568  case Builtin::BI__sync_xor_and_fetch_2:
1569  case Builtin::BI__sync_xor_and_fetch_4:
1570  case Builtin::BI__sync_xor_and_fetch_8:
1571  case Builtin::BI__sync_xor_and_fetch_16:
1572  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Xor, E,
1573  llvm::Instruction::Xor);
1574  case Builtin::BI__sync_nand_and_fetch_1:
1575  case Builtin::BI__sync_nand_and_fetch_2:
1576  case Builtin::BI__sync_nand_and_fetch_4:
1577  case Builtin::BI__sync_nand_and_fetch_8:
1578  case Builtin::BI__sync_nand_and_fetch_16:
1579  return EmitBinaryAtomicPost(*this, llvm::AtomicRMWInst::Nand, E,
1580  llvm::Instruction::And, true);
1581 
1582  case Builtin::BI__sync_val_compare_and_swap_1:
1583  case Builtin::BI__sync_val_compare_and_swap_2:
1584  case Builtin::BI__sync_val_compare_and_swap_4:
1585  case Builtin::BI__sync_val_compare_and_swap_8:
1586  case Builtin::BI__sync_val_compare_and_swap_16:
1587  return RValue::get(MakeAtomicCmpXchgValue(*this, E, false));
1588 
1589  case Builtin::BI__sync_bool_compare_and_swap_1:
1590  case Builtin::BI__sync_bool_compare_and_swap_2:
1591  case Builtin::BI__sync_bool_compare_and_swap_4:
1592  case Builtin::BI__sync_bool_compare_and_swap_8:
1593  case Builtin::BI__sync_bool_compare_and_swap_16:
1594  return RValue::get(MakeAtomicCmpXchgValue(*this, E, true));
1595 
1596  case Builtin::BI__sync_swap_1:
1597  case Builtin::BI__sync_swap_2:
1598  case Builtin::BI__sync_swap_4:
1599  case Builtin::BI__sync_swap_8:
1600  case Builtin::BI__sync_swap_16:
1601  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1602 
1603  case Builtin::BI__sync_lock_test_and_set_1:
1604  case Builtin::BI__sync_lock_test_and_set_2:
1605  case Builtin::BI__sync_lock_test_and_set_4:
1606  case Builtin::BI__sync_lock_test_and_set_8:
1607  case Builtin::BI__sync_lock_test_and_set_16:
1608  return EmitBinaryAtomic(*this, llvm::AtomicRMWInst::Xchg, E);
1609 
1610  case Builtin::BI__sync_lock_release_1:
1611  case Builtin::BI__sync_lock_release_2:
1612  case Builtin::BI__sync_lock_release_4:
1613  case Builtin::BI__sync_lock_release_8:
1614  case Builtin::BI__sync_lock_release_16: {
1615  Value *Ptr = EmitScalarExpr(E->getArg(0));
1616  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
1617  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
1618  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
1619  StoreSize.getQuantity() * 8);
1620  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
1621  llvm::StoreInst *Store =
1622  Builder.CreateAlignedStore(llvm::Constant::getNullValue(ITy), Ptr,
1623  StoreSize);
1624  Store->setAtomic(llvm::AtomicOrdering::Release);
1625  return RValue::get(nullptr);
1626  }
1627 
1628  case Builtin::BI__sync_synchronize: {
1629  // We assume this is supposed to correspond to a C++0x-style
1630  // sequentially-consistent fence (i.e. this is only usable for
1631  // synchonization, not device I/O or anything like that). This intrinsic
1632  // is really badly designed in the sense that in theory, there isn't
1633  // any way to safely use it... but in practice, it mostly works
1634  // to use it with non-atomic loads and stores to get acquire/release
1635  // semantics.
1636  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent);
1637  return RValue::get(nullptr);
1638  }
1639 
1640  case Builtin::BI__builtin_nontemporal_load:
1641  return RValue::get(EmitNontemporalLoad(*this, E));
1642  case Builtin::BI__builtin_nontemporal_store:
1643  return RValue::get(EmitNontemporalStore(*this, E));
1644  case Builtin::BI__c11_atomic_is_lock_free:
1645  case Builtin::BI__atomic_is_lock_free: {
1646  // Call "bool __atomic_is_lock_free(size_t size, void *ptr)". For the
1647  // __c11 builtin, ptr is 0 (indicating a properly-aligned object), since
1648  // _Atomic(T) is always properly-aligned.
1649  const char *LibCallName = "__atomic_is_lock_free";
1650  CallArgList Args;
1651  Args.add(RValue::get(EmitScalarExpr(E->getArg(0))),
1652  getContext().getSizeType());
1653  if (BuiltinID == Builtin::BI__atomic_is_lock_free)
1654  Args.add(RValue::get(EmitScalarExpr(E->getArg(1))),
1655  getContext().VoidPtrTy);
1656  else
1657  Args.add(RValue::get(llvm::Constant::getNullValue(VoidPtrTy)),
1658  getContext().VoidPtrTy);
1659  const CGFunctionInfo &FuncInfo =
1660  CGM.getTypes().arrangeBuiltinFunctionCall(E->getType(), Args);
1661  llvm::FunctionType *FTy = CGM.getTypes().GetFunctionType(FuncInfo);
1662  llvm::Constant *Func = CGM.CreateRuntimeFunction(FTy, LibCallName);
1663  return EmitCall(FuncInfo, CGCallee::forDirect(Func),
1664  ReturnValueSlot(), Args);
1665  }
1666 
1667  case Builtin::BI__atomic_test_and_set: {
1668  // Look at the argument type to determine whether this is a volatile
1669  // operation. The parameter type is always volatile.
1670  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1671  bool Volatile =
1672  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1673 
1674  Value *Ptr = EmitScalarExpr(E->getArg(0));
1675  unsigned AddrSpace = Ptr->getType()->getPointerAddressSpace();
1676  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1677  Value *NewVal = Builder.getInt8(1);
1678  Value *Order = EmitScalarExpr(E->getArg(1));
1679  if (isa<llvm::ConstantInt>(Order)) {
1680  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1681  AtomicRMWInst *Result = nullptr;
1682  switch (ord) {
1683  case 0: // memory_order_relaxed
1684  default: // invalid order
1685  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1686  llvm::AtomicOrdering::Monotonic);
1687  break;
1688  case 1: // memory_order_consume
1689  case 2: // memory_order_acquire
1690  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1691  llvm::AtomicOrdering::Acquire);
1692  break;
1693  case 3: // memory_order_release
1694  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1695  llvm::AtomicOrdering::Release);
1696  break;
1697  case 4: // memory_order_acq_rel
1698 
1699  Result = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1700  llvm::AtomicOrdering::AcquireRelease);
1701  break;
1702  case 5: // memory_order_seq_cst
1703  Result = Builder.CreateAtomicRMW(
1704  llvm::AtomicRMWInst::Xchg, Ptr, NewVal,
1705  llvm::AtomicOrdering::SequentiallyConsistent);
1706  break;
1707  }
1708  Result->setVolatile(Volatile);
1709  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1710  }
1711 
1712  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1713 
1714  llvm::BasicBlock *BBs[5] = {
1715  createBasicBlock("monotonic", CurFn),
1716  createBasicBlock("acquire", CurFn),
1717  createBasicBlock("release", CurFn),
1718  createBasicBlock("acqrel", CurFn),
1719  createBasicBlock("seqcst", CurFn)
1720  };
1721  llvm::AtomicOrdering Orders[5] = {
1722  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Acquire,
1723  llvm::AtomicOrdering::Release, llvm::AtomicOrdering::AcquireRelease,
1724  llvm::AtomicOrdering::SequentiallyConsistent};
1725 
1726  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1727  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1728 
1729  Builder.SetInsertPoint(ContBB);
1730  PHINode *Result = Builder.CreatePHI(Int8Ty, 5, "was_set");
1731 
1732  for (unsigned i = 0; i < 5; ++i) {
1733  Builder.SetInsertPoint(BBs[i]);
1734  AtomicRMWInst *RMW = Builder.CreateAtomicRMW(llvm::AtomicRMWInst::Xchg,
1735  Ptr, NewVal, Orders[i]);
1736  RMW->setVolatile(Volatile);
1737  Result->addIncoming(RMW, BBs[i]);
1738  Builder.CreateBr(ContBB);
1739  }
1740 
1741  SI->addCase(Builder.getInt32(0), BBs[0]);
1742  SI->addCase(Builder.getInt32(1), BBs[1]);
1743  SI->addCase(Builder.getInt32(2), BBs[1]);
1744  SI->addCase(Builder.getInt32(3), BBs[2]);
1745  SI->addCase(Builder.getInt32(4), BBs[3]);
1746  SI->addCase(Builder.getInt32(5), BBs[4]);
1747 
1748  Builder.SetInsertPoint(ContBB);
1749  return RValue::get(Builder.CreateIsNotNull(Result, "tobool"));
1750  }
1751 
1752  case Builtin::BI__atomic_clear: {
1753  QualType PtrTy = E->getArg(0)->IgnoreImpCasts()->getType();
1754  bool Volatile =
1755  PtrTy->castAs<PointerType>()->getPointeeType().isVolatileQualified();
1756 
1757  Address Ptr = EmitPointerWithAlignment(E->getArg(0));
1758  unsigned AddrSpace = Ptr.getPointer()->getType()->getPointerAddressSpace();
1759  Ptr = Builder.CreateBitCast(Ptr, Int8Ty->getPointerTo(AddrSpace));
1760  Value *NewVal = Builder.getInt8(0);
1761  Value *Order = EmitScalarExpr(E->getArg(1));
1762  if (isa<llvm::ConstantInt>(Order)) {
1763  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1764  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1765  switch (ord) {
1766  case 0: // memory_order_relaxed
1767  default: // invalid order
1768  Store->setOrdering(llvm::AtomicOrdering::Monotonic);
1769  break;
1770  case 3: // memory_order_release
1771  Store->setOrdering(llvm::AtomicOrdering::Release);
1772  break;
1773  case 5: // memory_order_seq_cst
1774  Store->setOrdering(llvm::AtomicOrdering::SequentiallyConsistent);
1775  break;
1776  }
1777  return RValue::get(nullptr);
1778  }
1779 
1780  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1781 
1782  llvm::BasicBlock *BBs[3] = {
1783  createBasicBlock("monotonic", CurFn),
1784  createBasicBlock("release", CurFn),
1785  createBasicBlock("seqcst", CurFn)
1786  };
1787  llvm::AtomicOrdering Orders[3] = {
1788  llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Release,
1789  llvm::AtomicOrdering::SequentiallyConsistent};
1790 
1791  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1792  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, BBs[0]);
1793 
1794  for (unsigned i = 0; i < 3; ++i) {
1795  Builder.SetInsertPoint(BBs[i]);
1796  StoreInst *Store = Builder.CreateStore(NewVal, Ptr, Volatile);
1797  Store->setOrdering(Orders[i]);
1798  Builder.CreateBr(ContBB);
1799  }
1800 
1801  SI->addCase(Builder.getInt32(0), BBs[0]);
1802  SI->addCase(Builder.getInt32(3), BBs[1]);
1803  SI->addCase(Builder.getInt32(5), BBs[2]);
1804 
1805  Builder.SetInsertPoint(ContBB);
1806  return RValue::get(nullptr);
1807  }
1808 
1809  case Builtin::BI__atomic_thread_fence:
1810  case Builtin::BI__atomic_signal_fence:
1811  case Builtin::BI__c11_atomic_thread_fence:
1812  case Builtin::BI__c11_atomic_signal_fence: {
1813  llvm::SyncScope::ID SSID;
1814  if (BuiltinID == Builtin::BI__atomic_signal_fence ||
1815  BuiltinID == Builtin::BI__c11_atomic_signal_fence)
1816  SSID = llvm::SyncScope::SingleThread;
1817  else
1818  SSID = llvm::SyncScope::System;
1819  Value *Order = EmitScalarExpr(E->getArg(0));
1820  if (isa<llvm::ConstantInt>(Order)) {
1821  int ord = cast<llvm::ConstantInt>(Order)->getZExtValue();
1822  switch (ord) {
1823  case 0: // memory_order_relaxed
1824  default: // invalid order
1825  break;
1826  case 1: // memory_order_consume
1827  case 2: // memory_order_acquire
1828  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1829  break;
1830  case 3: // memory_order_release
1831  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1832  break;
1833  case 4: // memory_order_acq_rel
1834  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1835  break;
1836  case 5: // memory_order_seq_cst
1837  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1838  break;
1839  }
1840  return RValue::get(nullptr);
1841  }
1842 
1843  llvm::BasicBlock *AcquireBB, *ReleaseBB, *AcqRelBB, *SeqCstBB;
1844  AcquireBB = createBasicBlock("acquire", CurFn);
1845  ReleaseBB = createBasicBlock("release", CurFn);
1846  AcqRelBB = createBasicBlock("acqrel", CurFn);
1847  SeqCstBB = createBasicBlock("seqcst", CurFn);
1848  llvm::BasicBlock *ContBB = createBasicBlock("atomic.continue", CurFn);
1849 
1850  Order = Builder.CreateIntCast(Order, Builder.getInt32Ty(), false);
1851  llvm::SwitchInst *SI = Builder.CreateSwitch(Order, ContBB);
1852 
1853  Builder.SetInsertPoint(AcquireBB);
1854  Builder.CreateFence(llvm::AtomicOrdering::Acquire, SSID);
1855  Builder.CreateBr(ContBB);
1856  SI->addCase(Builder.getInt32(1), AcquireBB);
1857  SI->addCase(Builder.getInt32(2), AcquireBB);
1858 
1859  Builder.SetInsertPoint(ReleaseBB);
1860  Builder.CreateFence(llvm::AtomicOrdering::Release, SSID);
1861  Builder.CreateBr(ContBB);
1862  SI->addCase(Builder.getInt32(3), ReleaseBB);
1863 
1864  Builder.SetInsertPoint(AcqRelBB);
1865  Builder.CreateFence(llvm::AtomicOrdering::AcquireRelease, SSID);
1866  Builder.CreateBr(ContBB);
1867  SI->addCase(Builder.getInt32(4), AcqRelBB);
1868 
1869  Builder.SetInsertPoint(SeqCstBB);
1870  Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent, SSID);
1871  Builder.CreateBr(ContBB);
1872  SI->addCase(Builder.getInt32(5), SeqCstBB);
1873 
1874  Builder.SetInsertPoint(ContBB);
1875  return RValue::get(nullptr);
1876  }
1877 
1878  // Library functions with special handling.
1879  case Builtin::BIsqrt:
1880  case Builtin::BIsqrtf:
1881  case Builtin::BIsqrtl: {
1882  // Transform a call to sqrt* into a @llvm.sqrt.* intrinsic call, but only
1883  // in finite- or unsafe-math mode (the intrinsic has different semantics
1884  // for handling negative numbers compared to the library function, so
1885  // -fmath-errno=0 is not enough).
1886  if (!FD->hasAttr<ConstAttr>())
1887  break;
1888  if (!(CGM.getCodeGenOpts().UnsafeFPMath ||
1889  CGM.getCodeGenOpts().NoNaNsFPMath))
1890  break;
1891  Value *Arg0 = EmitScalarExpr(E->getArg(0));
1892  llvm::Type *ArgType = Arg0->getType();
1893  Value *F = CGM.getIntrinsic(Intrinsic::sqrt, ArgType);
1894  return RValue::get(Builder.CreateCall(F, Arg0));
1895  }
1896 
1897  case Builtin::BI__builtin_pow:
1898  case Builtin::BI__builtin_powf:
1899  case Builtin::BI__builtin_powl:
1900  case Builtin::BIpow:
1901  case Builtin::BIpowf:
1902  case Builtin::BIpowl: {
1903  // Transform a call to pow* into a @llvm.pow.* intrinsic call.
1904  if (!FD->hasAttr<ConstAttr>())
1905  break;
1906  Value *Base = EmitScalarExpr(E->getArg(0));
1907  Value *Exponent = EmitScalarExpr(E->getArg(1));
1908  llvm::Type *ArgType = Base->getType();
1909  Value *F = CGM.getIntrinsic(Intrinsic::pow, ArgType);
1910  return RValue::get(Builder.CreateCall(F, {Base, Exponent}));
1911  }
1912 
1913  case Builtin::BIfma:
1914  case Builtin::BIfmaf:
1915  case Builtin::BIfmal:
1916  case Builtin::BI__builtin_fma:
1917  case Builtin::BI__builtin_fmaf:
1918  case Builtin::BI__builtin_fmal: {
1919  // Rewrite fma to intrinsic.
1920  Value *FirstArg = EmitScalarExpr(E->getArg(0));
1921  llvm::Type *ArgType = FirstArg->getType();
1922  Value *F = CGM.getIntrinsic(Intrinsic::fma, ArgType);
1923  return RValue::get(
1924  Builder.CreateCall(F, {FirstArg, EmitScalarExpr(E->getArg(1)),
1925  EmitScalarExpr(E->getArg(2))}));
1926  }
1927 
1928  case Builtin::BI__builtin_signbit:
1929  case Builtin::BI__builtin_signbitf:
1930  case Builtin::BI__builtin_signbitl: {
1931  return RValue::get(
1932  Builder.CreateZExt(EmitSignBit(*this, EmitScalarExpr(E->getArg(0))),
1933  ConvertType(E->getType())));
1934  }
1935  case Builtin::BI__builtin_annotation: {
1936  llvm::Value *AnnVal = EmitScalarExpr(E->getArg(0));
1937  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::annotation,
1938  AnnVal->getType());
1939 
1940  // Get the annotation string, go through casts. Sema requires this to be a
1941  // non-wide string literal, potentially casted, so the cast<> is safe.
1942  const Expr *AnnotationStrExpr = E->getArg(1)->IgnoreParenCasts();
1943  StringRef Str = cast<StringLiteral>(AnnotationStrExpr)->getString();
1944  return RValue::get(EmitAnnotationCall(F, AnnVal, Str, E->getExprLoc()));
1945  }
1946  case Builtin::BI__builtin_addcb:
1947  case Builtin::BI__builtin_addcs:
1948  case Builtin::BI__builtin_addc:
1949  case Builtin::BI__builtin_addcl:
1950  case Builtin::BI__builtin_addcll:
1951  case Builtin::BI__builtin_subcb:
1952  case Builtin::BI__builtin_subcs:
1953  case Builtin::BI__builtin_subc:
1954  case Builtin::BI__builtin_subcl:
1955  case Builtin::BI__builtin_subcll: {
1956 
1957  // We translate all of these builtins from expressions of the form:
1958  // int x = ..., y = ..., carryin = ..., carryout, result;
1959  // result = __builtin_addc(x, y, carryin, &carryout);
1960  //
1961  // to LLVM IR of the form:
1962  //
1963  // %tmp1 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %x, i32 %y)
1964  // %tmpsum1 = extractvalue {i32, i1} %tmp1, 0
1965  // %carry1 = extractvalue {i32, i1} %tmp1, 1
1966  // %tmp2 = call {i32, i1} @llvm.uadd.with.overflow.i32(i32 %tmpsum1,
1967  // i32 %carryin)
1968  // %result = extractvalue {i32, i1} %tmp2, 0
1969  // %carry2 = extractvalue {i32, i1} %tmp2, 1
1970  // %tmp3 = or i1 %carry1, %carry2
1971  // %tmp4 = zext i1 %tmp3 to i32
1972  // store i32 %tmp4, i32* %carryout
1973 
1974  // Scalarize our inputs.
1975  llvm::Value *X = EmitScalarExpr(E->getArg(0));
1976  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
1977  llvm::Value *Carryin = EmitScalarExpr(E->getArg(2));
1978  Address CarryOutPtr = EmitPointerWithAlignment(E->getArg(3));
1979 
1980  // Decide if we are lowering to a uadd.with.overflow or usub.with.overflow.
1981  llvm::Intrinsic::ID IntrinsicId;
1982  switch (BuiltinID) {
1983  default: llvm_unreachable("Unknown multiprecision builtin id.");
1984  case Builtin::BI__builtin_addcb:
1985  case Builtin::BI__builtin_addcs:
1986  case Builtin::BI__builtin_addc:
1987  case Builtin::BI__builtin_addcl:
1988  case Builtin::BI__builtin_addcll:
1989  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
1990  break;
1991  case Builtin::BI__builtin_subcb:
1992  case Builtin::BI__builtin_subcs:
1993  case Builtin::BI__builtin_subc:
1994  case Builtin::BI__builtin_subcl:
1995  case Builtin::BI__builtin_subcll:
1996  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
1997  break;
1998  }
1999 
2000  // Construct our resulting LLVM IR expression.
2001  llvm::Value *Carry1;
2002  llvm::Value *Sum1 = EmitOverflowIntrinsic(*this, IntrinsicId,
2003  X, Y, Carry1);
2004  llvm::Value *Carry2;
2005  llvm::Value *Sum2 = EmitOverflowIntrinsic(*this, IntrinsicId,
2006  Sum1, Carryin, Carry2);
2007  llvm::Value *CarryOut = Builder.CreateZExt(Builder.CreateOr(Carry1, Carry2),
2008  X->getType());
2009  Builder.CreateStore(CarryOut, CarryOutPtr);
2010  return RValue::get(Sum2);
2011  }
2012 
2013  case Builtin::BI__builtin_add_overflow:
2014  case Builtin::BI__builtin_sub_overflow:
2015  case Builtin::BI__builtin_mul_overflow: {
2016  const clang::Expr *LeftArg = E->getArg(0);
2017  const clang::Expr *RightArg = E->getArg(1);
2018  const clang::Expr *ResultArg = E->getArg(2);
2019 
2020  clang::QualType ResultQTy =
2021  ResultArg->getType()->castAs<PointerType>()->getPointeeType();
2022 
2023  WidthAndSignedness LeftInfo =
2024  getIntegerWidthAndSignedness(CGM.getContext(), LeftArg->getType());
2025  WidthAndSignedness RightInfo =
2026  getIntegerWidthAndSignedness(CGM.getContext(), RightArg->getType());
2027  WidthAndSignedness ResultInfo =
2028  getIntegerWidthAndSignedness(CGM.getContext(), ResultQTy);
2029  WidthAndSignedness EncompassingInfo =
2030  EncompassingIntegerType({LeftInfo, RightInfo, ResultInfo});
2031 
2032  llvm::Type *EncompassingLLVMTy =
2033  llvm::IntegerType::get(CGM.getLLVMContext(), EncompassingInfo.Width);
2034 
2035  llvm::Type *ResultLLVMTy = CGM.getTypes().ConvertType(ResultQTy);
2036 
2037  llvm::Intrinsic::ID IntrinsicId;
2038  switch (BuiltinID) {
2039  default:
2040  llvm_unreachable("Unknown overflow builtin id.");
2041  case Builtin::BI__builtin_add_overflow:
2042  IntrinsicId = EncompassingInfo.Signed
2043  ? llvm::Intrinsic::sadd_with_overflow
2044  : llvm::Intrinsic::uadd_with_overflow;
2045  break;
2046  case Builtin::BI__builtin_sub_overflow:
2047  IntrinsicId = EncompassingInfo.Signed
2048  ? llvm::Intrinsic::ssub_with_overflow
2049  : llvm::Intrinsic::usub_with_overflow;
2050  break;
2051  case Builtin::BI__builtin_mul_overflow:
2052  IntrinsicId = EncompassingInfo.Signed
2053  ? llvm::Intrinsic::smul_with_overflow
2054  : llvm::Intrinsic::umul_with_overflow;
2055  break;
2056  }
2057 
2058  llvm::Value *Left = EmitScalarExpr(LeftArg);
2059  llvm::Value *Right = EmitScalarExpr(RightArg);
2060  Address ResultPtr = EmitPointerWithAlignment(ResultArg);
2061 
2062  // Extend each operand to the encompassing type.
2063  Left = Builder.CreateIntCast(Left, EncompassingLLVMTy, LeftInfo.Signed);
2064  Right = Builder.CreateIntCast(Right, EncompassingLLVMTy, RightInfo.Signed);
2065 
2066  // Perform the operation on the extended values.
2067  llvm::Value *Overflow, *Result;
2068  Result = EmitOverflowIntrinsic(*this, IntrinsicId, Left, Right, Overflow);
2069 
2070  if (EncompassingInfo.Width > ResultInfo.Width) {
2071  // The encompassing type is wider than the result type, so we need to
2072  // truncate it.
2073  llvm::Value *ResultTrunc = Builder.CreateTrunc(Result, ResultLLVMTy);
2074 
2075  // To see if the truncation caused an overflow, we will extend
2076  // the result and then compare it to the original result.
2077  llvm::Value *ResultTruncExt = Builder.CreateIntCast(
2078  ResultTrunc, EncompassingLLVMTy, ResultInfo.Signed);
2079  llvm::Value *TruncationOverflow =
2080  Builder.CreateICmpNE(Result, ResultTruncExt);
2081 
2082  Overflow = Builder.CreateOr(Overflow, TruncationOverflow);
2083  Result = ResultTrunc;
2084  }
2085 
2086  // Finally, store the result using the pointer.
2087  bool isVolatile =
2088  ResultArg->getType()->getPointeeType().isVolatileQualified();
2089  Builder.CreateStore(EmitToMemory(Result, ResultQTy), ResultPtr, isVolatile);
2090 
2091  return RValue::get(Overflow);
2092  }
2093 
2094  case Builtin::BI__builtin_uadd_overflow:
2095  case Builtin::BI__builtin_uaddl_overflow:
2096  case Builtin::BI__builtin_uaddll_overflow:
2097  case Builtin::BI__builtin_usub_overflow:
2098  case Builtin::BI__builtin_usubl_overflow:
2099  case Builtin::BI__builtin_usubll_overflow:
2100  case Builtin::BI__builtin_umul_overflow:
2101  case Builtin::BI__builtin_umull_overflow:
2102  case Builtin::BI__builtin_umulll_overflow:
2103  case Builtin::BI__builtin_sadd_overflow:
2104  case Builtin::BI__builtin_saddl_overflow:
2105  case Builtin::BI__builtin_saddll_overflow:
2106  case Builtin::BI__builtin_ssub_overflow:
2107  case Builtin::BI__builtin_ssubl_overflow:
2108  case Builtin::BI__builtin_ssubll_overflow:
2109  case Builtin::BI__builtin_smul_overflow:
2110  case Builtin::BI__builtin_smull_overflow:
2111  case Builtin::BI__builtin_smulll_overflow: {
2112 
2113  // We translate all of these builtins directly to the relevant llvm IR node.
2114 
2115  // Scalarize our inputs.
2116  llvm::Value *X = EmitScalarExpr(E->getArg(0));
2117  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
2118  Address SumOutPtr = EmitPointerWithAlignment(E->getArg(2));
2119 
2120  // Decide which of the overflow intrinsics we are lowering to:
2121  llvm::Intrinsic::ID IntrinsicId;
2122  switch (BuiltinID) {
2123  default: llvm_unreachable("Unknown overflow builtin id.");
2124  case Builtin::BI__builtin_uadd_overflow:
2125  case Builtin::BI__builtin_uaddl_overflow:
2126  case Builtin::BI__builtin_uaddll_overflow:
2127  IntrinsicId = llvm::Intrinsic::uadd_with_overflow;
2128  break;
2129  case Builtin::BI__builtin_usub_overflow:
2130  case Builtin::BI__builtin_usubl_overflow:
2131  case Builtin::BI__builtin_usubll_overflow:
2132  IntrinsicId = llvm::Intrinsic::usub_with_overflow;
2133  break;
2134  case Builtin::BI__builtin_umul_overflow:
2135  case Builtin::BI__builtin_umull_overflow:
2136  case Builtin::BI__builtin_umulll_overflow:
2137  IntrinsicId = llvm::Intrinsic::umul_with_overflow;
2138  break;
2139  case Builtin::BI__builtin_sadd_overflow:
2140  case Builtin::BI__builtin_saddl_overflow:
2141  case Builtin::BI__builtin_saddll_overflow:
2142  IntrinsicId = llvm::Intrinsic::sadd_with_overflow;
2143  break;
2144  case Builtin::BI__builtin_ssub_overflow:
2145  case Builtin::BI__builtin_ssubl_overflow:
2146  case Builtin::BI__builtin_ssubll_overflow:
2147  IntrinsicId = llvm::Intrinsic::ssub_with_overflow;
2148  break;
2149  case Builtin::BI__builtin_smul_overflow:
2150  case Builtin::BI__builtin_smull_overflow:
2151  case Builtin::BI__builtin_smulll_overflow:
2152  IntrinsicId = llvm::Intrinsic::smul_with_overflow;
2153  break;
2154  }
2155 
2156 
2157  llvm::Value *Carry;
2158  llvm::Value *Sum = EmitOverflowIntrinsic(*this, IntrinsicId, X, Y, Carry);
2159  Builder.CreateStore(Sum, SumOutPtr);
2160 
2161  return RValue::get(Carry);
2162  }
2163  case Builtin::BI__builtin_addressof:
2164  return RValue::get(EmitLValue(E->getArg(0)).getPointer());
2165  case Builtin::BI__builtin_operator_new:
2166  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2167  E->getArg(0), false);
2168  case Builtin::BI__builtin_operator_delete:
2169  return EmitBuiltinNewDeleteCall(FD->getType()->castAs<FunctionProtoType>(),
2170  E->getArg(0), true);
2171  case Builtin::BI__noop:
2172  // __noop always evaluates to an integer literal zero.
2173  return RValue::get(ConstantInt::get(IntTy, 0));
2174  case Builtin::BI__builtin_call_with_static_chain: {
2175  const CallExpr *Call = cast<CallExpr>(E->getArg(0));
2176  const Expr *Chain = E->getArg(1);
2177  return EmitCall(Call->getCallee()->getType(),
2178  EmitCallee(Call->getCallee()), Call, ReturnValue,
2179  EmitScalarExpr(Chain));
2180  }
2181  case Builtin::BI_InterlockedExchange8:
2182  case Builtin::BI_InterlockedExchange16:
2183  case Builtin::BI_InterlockedExchange:
2184  case Builtin::BI_InterlockedExchangePointer:
2185  return RValue::get(
2186  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E));
2187  case Builtin::BI_InterlockedCompareExchangePointer: {
2188  llvm::Type *RTy;
2189  llvm::IntegerType *IntType =
2190  IntegerType::get(getLLVMContext(),
2191  getContext().getTypeSize(E->getType()));
2192  llvm::Type *IntPtrType = IntType->getPointerTo();
2193 
2194  llvm::Value *Destination =
2195  Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), IntPtrType);
2196 
2197  llvm::Value *Exchange = EmitScalarExpr(E->getArg(1));
2198  RTy = Exchange->getType();
2199  Exchange = Builder.CreatePtrToInt(Exchange, IntType);
2200 
2201  llvm::Value *Comparand =
2202  Builder.CreatePtrToInt(EmitScalarExpr(E->getArg(2)), IntType);
2203 
2204  auto Result =
2205  Builder.CreateAtomicCmpXchg(Destination, Comparand, Exchange,
2206  AtomicOrdering::SequentiallyConsistent,
2207  AtomicOrdering::SequentiallyConsistent);
2208  Result->setVolatile(true);
2209 
2210  return RValue::get(Builder.CreateIntToPtr(Builder.CreateExtractValue(Result,
2211  0),
2212  RTy));
2213  }
2214  case Builtin::BI_InterlockedCompareExchange8:
2215  case Builtin::BI_InterlockedCompareExchange16:
2216  case Builtin::BI_InterlockedCompareExchange:
2217  case Builtin::BI_InterlockedCompareExchange64: {
2218  AtomicCmpXchgInst *CXI = Builder.CreateAtomicCmpXchg(
2219  EmitScalarExpr(E->getArg(0)),
2220  EmitScalarExpr(E->getArg(2)),
2221  EmitScalarExpr(E->getArg(1)),
2222  AtomicOrdering::SequentiallyConsistent,
2223  AtomicOrdering::SequentiallyConsistent);
2224  CXI->setVolatile(true);
2225  return RValue::get(Builder.CreateExtractValue(CXI, 0));
2226  }
2227  case Builtin::BI_InterlockedIncrement16:
2228  case Builtin::BI_InterlockedIncrement:
2229  return RValue::get(
2230  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E));
2231  case Builtin::BI_InterlockedDecrement16:
2232  case Builtin::BI_InterlockedDecrement:
2233  return RValue::get(
2234  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E));
2235  case Builtin::BI_InterlockedAnd8:
2236  case Builtin::BI_InterlockedAnd16:
2237  case Builtin::BI_InterlockedAnd:
2238  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E));
2239  case Builtin::BI_InterlockedExchangeAdd8:
2240  case Builtin::BI_InterlockedExchangeAdd16:
2241  case Builtin::BI_InterlockedExchangeAdd:
2242  return RValue::get(
2243  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E));
2244  case Builtin::BI_InterlockedExchangeSub8:
2245  case Builtin::BI_InterlockedExchangeSub16:
2246  case Builtin::BI_InterlockedExchangeSub:
2247  return RValue::get(
2248  EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E));
2249  case Builtin::BI_InterlockedOr8:
2250  case Builtin::BI_InterlockedOr16:
2251  case Builtin::BI_InterlockedOr:
2252  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E));
2253  case Builtin::BI_InterlockedXor8:
2254  case Builtin::BI_InterlockedXor16:
2255  case Builtin::BI_InterlockedXor:
2256  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E));
2257  case Builtin::BI_interlockedbittestandset:
2258  return RValue::get(
2259  EmitMSVCBuiltinExpr(MSVCIntrin::_interlockedbittestandset, E));
2260 
2261  case Builtin::BI__exception_code:
2262  case Builtin::BI_exception_code:
2263  return RValue::get(EmitSEHExceptionCode());
2264  case Builtin::BI__exception_info:
2265  case Builtin::BI_exception_info:
2266  return RValue::get(EmitSEHExceptionInfo());
2267  case Builtin::BI__abnormal_termination:
2268  case Builtin::BI_abnormal_termination:
2269  return RValue::get(EmitSEHAbnormalTermination());
2270  case Builtin::BI_setjmpex: {
2271  if (getTarget().getTriple().isOSMSVCRT()) {
2272  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2273  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2274  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2275  llvm::Attribute::ReturnsTwice);
2276  llvm::Constant *SetJmpEx = CGM.CreateRuntimeFunction(
2277  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2278  "_setjmpex", ReturnsTwiceAttr, /*Local=*/true);
2279  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2280  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2281  llvm::Value *FrameAddr =
2282  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2283  ConstantInt::get(Int32Ty, 0));
2284  llvm::Value *Args[] = {Buf, FrameAddr};
2285  llvm::CallSite CS = EmitRuntimeCallOrInvoke(SetJmpEx, Args);
2286  CS.setAttributes(ReturnsTwiceAttr);
2287  return RValue::get(CS.getInstruction());
2288  }
2289  break;
2290  }
2291  case Builtin::BI_setjmp: {
2292  if (getTarget().getTriple().isOSMSVCRT()) {
2293  llvm::AttributeList ReturnsTwiceAttr = llvm::AttributeList::get(
2294  getLLVMContext(), llvm::AttributeList::FunctionIndex,
2295  llvm::Attribute::ReturnsTwice);
2296  llvm::Value *Buf = Builder.CreateBitOrPointerCast(
2297  EmitScalarExpr(E->getArg(0)), Int8PtrTy);
2298  llvm::CallSite CS;
2299  if (getTarget().getTriple().getArch() == llvm::Triple::x86) {
2300  llvm::Type *ArgTypes[] = {Int8PtrTy, IntTy};
2301  llvm::Constant *SetJmp3 = CGM.CreateRuntimeFunction(
2302  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/true),
2303  "_setjmp3", ReturnsTwiceAttr, /*Local=*/true);
2304  llvm::Value *Count = ConstantInt::get(IntTy, 0);
2305  llvm::Value *Args[] = {Buf, Count};
2306  CS = EmitRuntimeCallOrInvoke(SetJmp3, Args);
2307  } else {
2308  llvm::Type *ArgTypes[] = {Int8PtrTy, Int8PtrTy};
2309  llvm::Constant *SetJmp = CGM.CreateRuntimeFunction(
2310  llvm::FunctionType::get(IntTy, ArgTypes, /*isVarArg=*/false),
2311  "_setjmp", ReturnsTwiceAttr, /*Local=*/true);
2312  llvm::Value *FrameAddr =
2313  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::frameaddress),
2314  ConstantInt::get(Int32Ty, 0));
2315  llvm::Value *Args[] = {Buf, FrameAddr};
2316  CS = EmitRuntimeCallOrInvoke(SetJmp, Args);
2317  }
2318  CS.setAttributes(ReturnsTwiceAttr);
2319  return RValue::get(CS.getInstruction());
2320  }
2321  break;
2322  }
2323 
2324  case Builtin::BI__GetExceptionInfo: {
2325  if (llvm::GlobalVariable *GV =
2326  CGM.getCXXABI().getThrowInfo(FD->getParamDecl(0)->getType()))
2327  return RValue::get(llvm::ConstantExpr::getBitCast(GV, CGM.Int8PtrTy));
2328  break;
2329  }
2330 
2331  case Builtin::BI__fastfail:
2332  return RValue::get(EmitMSVCBuiltinExpr(MSVCIntrin::__fastfail, E));
2333 
2334  case Builtin::BI__builtin_coro_size: {
2335  auto & Context = getContext();
2336  auto SizeTy = Context.getSizeType();
2337  auto T = Builder.getIntNTy(Context.getTypeSize(SizeTy));
2338  Value *F = CGM.getIntrinsic(Intrinsic::coro_size, T);
2339  return RValue::get(Builder.CreateCall(F));
2340  }
2341 
2342  case Builtin::BI__builtin_coro_id:
2343  return EmitCoroutineIntrinsic(E, Intrinsic::coro_id);
2344  case Builtin::BI__builtin_coro_promise:
2345  return EmitCoroutineIntrinsic(E, Intrinsic::coro_promise);
2346  case Builtin::BI__builtin_coro_resume:
2347  return EmitCoroutineIntrinsic(E, Intrinsic::coro_resume);
2348  case Builtin::BI__builtin_coro_frame:
2349  return EmitCoroutineIntrinsic(E, Intrinsic::coro_frame);
2350  case Builtin::BI__builtin_coro_free:
2351  return EmitCoroutineIntrinsic(E, Intrinsic::coro_free);
2352  case Builtin::BI__builtin_coro_destroy:
2353  return EmitCoroutineIntrinsic(E, Intrinsic::coro_destroy);
2354  case Builtin::BI__builtin_coro_done:
2355  return EmitCoroutineIntrinsic(E, Intrinsic::coro_done);
2356  case Builtin::BI__builtin_coro_alloc:
2357  return EmitCoroutineIntrinsic(E, Intrinsic::coro_alloc);
2358  case Builtin::BI__builtin_coro_begin:
2359  return EmitCoroutineIntrinsic(E, Intrinsic::coro_begin);
2360  case Builtin::BI__builtin_coro_end:
2361  return EmitCoroutineIntrinsic(E, Intrinsic::coro_end);
2362  case Builtin::BI__builtin_coro_suspend:
2363  return EmitCoroutineIntrinsic(E, Intrinsic::coro_suspend);
2364  case Builtin::BI__builtin_coro_param:
2365  return EmitCoroutineIntrinsic(E, Intrinsic::coro_param);
2366 
2367  // OpenCL v2.0 s6.13.16.2, Built-in pipe read and write functions
2368  case Builtin::BIread_pipe:
2369  case Builtin::BIwrite_pipe: {
2370  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2371  *Arg1 = EmitScalarExpr(E->getArg(1));
2372  CGOpenCLRuntime OpenCLRT(CGM);
2373  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2374  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2375 
2376  // Type of the generic packet parameter.
2377  unsigned GenericAS =
2378  getContext().getTargetAddressSpace(LangAS::opencl_generic);
2379  llvm::Type *I8PTy = llvm::PointerType::get(
2380  llvm::Type::getInt8Ty(getLLVMContext()), GenericAS);
2381 
2382  // Testing which overloaded version we should generate the call for.
2383  if (2U == E->getNumArgs()) {
2384  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_2"
2385  : "__write_pipe_2";
2386  // Creating a generic function type to be able to call with any builtin or
2387  // user defined type.
2388  llvm::Type *ArgTys[] = {Arg0->getType(), I8PTy, Int32Ty, Int32Ty};
2389  llvm::FunctionType *FTy = llvm::FunctionType::get(
2390  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2391  Value *BCast = Builder.CreatePointerCast(Arg1, I8PTy);
2392  return RValue::get(
2393  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2394  {Arg0, BCast, PacketSize, PacketAlign}));
2395  } else {
2396  assert(4 == E->getNumArgs() &&
2397  "Illegal number of parameters to pipe function");
2398  const char *Name = (BuiltinID == Builtin::BIread_pipe) ? "__read_pipe_4"
2399  : "__write_pipe_4";
2400 
2401  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, I8PTy,
2402  Int32Ty, Int32Ty};
2403  Value *Arg2 = EmitScalarExpr(E->getArg(2)),
2404  *Arg3 = EmitScalarExpr(E->getArg(3));
2405  llvm::FunctionType *FTy = llvm::FunctionType::get(
2406  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2407  Value *BCast = Builder.CreatePointerCast(Arg3, I8PTy);
2408  // We know the third argument is an integer type, but we may need to cast
2409  // it to i32.
2410  if (Arg2->getType() != Int32Ty)
2411  Arg2 = Builder.CreateZExtOrTrunc(Arg2, Int32Ty);
2412  return RValue::get(Builder.CreateCall(
2413  CGM.CreateRuntimeFunction(FTy, Name),
2414  {Arg0, Arg1, Arg2, BCast, PacketSize, PacketAlign}));
2415  }
2416  }
2417  // OpenCL v2.0 s6.13.16 ,s9.17.3.5 - Built-in pipe reserve read and write
2418  // functions
2419  case Builtin::BIreserve_read_pipe:
2420  case Builtin::BIreserve_write_pipe:
2421  case Builtin::BIwork_group_reserve_read_pipe:
2422  case Builtin::BIwork_group_reserve_write_pipe:
2423  case Builtin::BIsub_group_reserve_read_pipe:
2424  case Builtin::BIsub_group_reserve_write_pipe: {
2425  // Composing the mangled name for the function.
2426  const char *Name;
2427  if (BuiltinID == Builtin::BIreserve_read_pipe)
2428  Name = "__reserve_read_pipe";
2429  else if (BuiltinID == Builtin::BIreserve_write_pipe)
2430  Name = "__reserve_write_pipe";
2431  else if (BuiltinID == Builtin::BIwork_group_reserve_read_pipe)
2432  Name = "__work_group_reserve_read_pipe";
2433  else if (BuiltinID == Builtin::BIwork_group_reserve_write_pipe)
2434  Name = "__work_group_reserve_write_pipe";
2435  else if (BuiltinID == Builtin::BIsub_group_reserve_read_pipe)
2436  Name = "__sub_group_reserve_read_pipe";
2437  else
2438  Name = "__sub_group_reserve_write_pipe";
2439 
2440  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2441  *Arg1 = EmitScalarExpr(E->getArg(1));
2442  llvm::Type *ReservedIDTy = ConvertType(getContext().OCLReserveIDTy);
2443  CGOpenCLRuntime OpenCLRT(CGM);
2444  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2445  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2446 
2447  // Building the generic function prototype.
2448  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty, Int32Ty};
2449  llvm::FunctionType *FTy = llvm::FunctionType::get(
2450  ReservedIDTy, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2451  // We know the second argument is an integer type, but we may need to cast
2452  // it to i32.
2453  if (Arg1->getType() != Int32Ty)
2454  Arg1 = Builder.CreateZExtOrTrunc(Arg1, Int32Ty);
2455  return RValue::get(
2456  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2457  {Arg0, Arg1, PacketSize, PacketAlign}));
2458  }
2459  // OpenCL v2.0 s6.13.16, s9.17.3.5 - Built-in pipe commit read and write
2460  // functions
2461  case Builtin::BIcommit_read_pipe:
2462  case Builtin::BIcommit_write_pipe:
2463  case Builtin::BIwork_group_commit_read_pipe:
2464  case Builtin::BIwork_group_commit_write_pipe:
2465  case Builtin::BIsub_group_commit_read_pipe:
2466  case Builtin::BIsub_group_commit_write_pipe: {
2467  const char *Name;
2468  if (BuiltinID == Builtin::BIcommit_read_pipe)
2469  Name = "__commit_read_pipe";
2470  else if (BuiltinID == Builtin::BIcommit_write_pipe)
2471  Name = "__commit_write_pipe";
2472  else if (BuiltinID == Builtin::BIwork_group_commit_read_pipe)
2473  Name = "__work_group_commit_read_pipe";
2474  else if (BuiltinID == Builtin::BIwork_group_commit_write_pipe)
2475  Name = "__work_group_commit_write_pipe";
2476  else if (BuiltinID == Builtin::BIsub_group_commit_read_pipe)
2477  Name = "__sub_group_commit_read_pipe";
2478  else
2479  Name = "__sub_group_commit_write_pipe";
2480 
2481  Value *Arg0 = EmitScalarExpr(E->getArg(0)),
2482  *Arg1 = EmitScalarExpr(E->getArg(1));
2483  CGOpenCLRuntime OpenCLRT(CGM);
2484  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2485  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2486 
2487  // Building the generic function prototype.
2488  llvm::Type *ArgTys[] = {Arg0->getType(), Arg1->getType(), Int32Ty, Int32Ty};
2489  llvm::FunctionType *FTy =
2490  llvm::FunctionType::get(llvm::Type::getVoidTy(getLLVMContext()),
2491  llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2492 
2493  return RValue::get(
2494  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2495  {Arg0, Arg1, PacketSize, PacketAlign}));
2496  }
2497  // OpenCL v2.0 s6.13.16.4 Built-in pipe query functions
2498  case Builtin::BIget_pipe_num_packets:
2499  case Builtin::BIget_pipe_max_packets: {
2500  const char *Name;
2501  if (BuiltinID == Builtin::BIget_pipe_num_packets)
2502  Name = "__get_pipe_num_packets";
2503  else
2504  Name = "__get_pipe_max_packets";
2505 
2506  // Building the generic function prototype.
2507  Value *Arg0 = EmitScalarExpr(E->getArg(0));
2508  CGOpenCLRuntime OpenCLRT(CGM);
2509  Value *PacketSize = OpenCLRT.getPipeElemSize(E->getArg(0));
2510  Value *PacketAlign = OpenCLRT.getPipeElemAlign(E->getArg(0));
2511  llvm::Type *ArgTys[] = {Arg0->getType(), Int32Ty, Int32Ty};
2512  llvm::FunctionType *FTy = llvm::FunctionType::get(
2513  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2514 
2515  return RValue::get(Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2516  {Arg0, PacketSize, PacketAlign}));
2517  }
2518 
2519  // OpenCL v2.0 s6.13.9 - Address space qualifier functions.
2520  case Builtin::BIto_global:
2521  case Builtin::BIto_local:
2522  case Builtin::BIto_private: {
2523  auto Arg0 = EmitScalarExpr(E->getArg(0));
2524  auto NewArgT = llvm::PointerType::get(Int8Ty,
2525  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2526  auto NewRetT = llvm::PointerType::get(Int8Ty,
2527  CGM.getContext().getTargetAddressSpace(
2529  auto FTy = llvm::FunctionType::get(NewRetT, {NewArgT}, false);
2530  llvm::Value *NewArg;
2531  if (Arg0->getType()->getPointerAddressSpace() !=
2532  NewArgT->getPointerAddressSpace())
2533  NewArg = Builder.CreateAddrSpaceCast(Arg0, NewArgT);
2534  else
2535  NewArg = Builder.CreateBitOrPointerCast(Arg0, NewArgT);
2536  auto NewName = std::string("__") + E->getDirectCallee()->getName().str();
2537  auto NewCall =
2538  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, NewName), {NewArg});
2539  return RValue::get(Builder.CreateBitOrPointerCast(NewCall,
2540  ConvertType(E->getType())));
2541  }
2542 
2543  // OpenCL v2.0, s6.13.17 - Enqueue kernel function.
2544  // It contains four different overload formats specified in Table 6.13.17.1.
2545  case Builtin::BIenqueue_kernel: {
2546  StringRef Name; // Generated function call name
2547  unsigned NumArgs = E->getNumArgs();
2548 
2549  llvm::Type *QueueTy = ConvertType(getContext().OCLQueueTy);
2550  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2551  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2552 
2553  llvm::Value *Queue = EmitScalarExpr(E->getArg(0));
2554  llvm::Value *Flags = EmitScalarExpr(E->getArg(1));
2555  LValue NDRangeL = EmitAggExprToLValue(E->getArg(2));
2556  llvm::Value *Range = NDRangeL.getAddress().getPointer();
2557  llvm::Type *RangeTy = NDRangeL.getAddress().getType();
2558 
2559  if (NumArgs == 4) {
2560  // The most basic form of the call with parameters:
2561  // queue_t, kernel_enqueue_flags_t, ndrange_t, block(void)
2562  Name = "__enqueue_kernel_basic";
2563  llvm::Type *ArgTys[] = {QueueTy, Int32Ty, RangeTy, GenericVoidPtrTy};
2564  llvm::FunctionType *FTy = llvm::FunctionType::get(
2565  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys, 4), false);
2566 
2567  llvm::Value *Block = Builder.CreatePointerCast(
2568  EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2569 
2570  AttrBuilder B;
2571  B.addAttribute(Attribute::ByVal);
2572  llvm::AttributeList ByValAttrSet =
2573  llvm::AttributeList::get(CGM.getModule().getContext(), 3U, B);
2574 
2575  auto RTCall =
2576  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name, ByValAttrSet),
2577  {Queue, Flags, Range, Block});
2578  RTCall->setAttributes(ByValAttrSet);
2579  return RValue::get(RTCall);
2580  }
2581  assert(NumArgs >= 5 && "Invalid enqueue_kernel signature");
2582 
2583  // Could have events and/or vaargs.
2584  if (E->getArg(3)->getType()->isBlockPointerType()) {
2585  // No events passed, but has variadic arguments.
2586  Name = "__enqueue_kernel_vaargs";
2587  llvm::Value *Block = Builder.CreatePointerCast(
2588  EmitScalarExpr(E->getArg(3)), GenericVoidPtrTy);
2589  // Create a vector of the arguments, as well as a constant value to
2590  // express to the runtime the number of variadic arguments.
2591  std::vector<llvm::Value *> Args = {Queue, Flags, Range, Block,
2592  ConstantInt::get(IntTy, NumArgs - 4)};
2593  std::vector<llvm::Type *> ArgTys = {QueueTy, IntTy, RangeTy,
2594  GenericVoidPtrTy, IntTy};
2595 
2596  // Each of the following arguments specifies the size of the corresponding
2597  // argument passed to the enqueued block.
2598  for (unsigned I = 4/*Position of the first size arg*/; I < NumArgs; ++I)
2599  Args.push_back(
2600  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2601 
2602  llvm::FunctionType *FTy = llvm::FunctionType::get(
2603  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2604  return RValue::get(
2605  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2607  }
2608  // Any calls now have event arguments passed.
2609  if (NumArgs >= 7) {
2610  llvm::Type *EventTy = ConvertType(getContext().OCLClkEventTy);
2611  llvm::Type *EventPtrTy = EventTy->getPointerTo(
2612  CGM.getContext().getTargetAddressSpace(LangAS::opencl_generic));
2613 
2614  llvm::Value *NumEvents =
2615  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(3)), Int32Ty);
2616  llvm::Value *EventList =
2617  E->getArg(4)->getType()->isArrayType()
2618  ? EmitArrayToPointerDecay(E->getArg(4)).getPointer()
2619  : EmitScalarExpr(E->getArg(4));
2620  llvm::Value *ClkEvent = EmitScalarExpr(E->getArg(5));
2621  // Convert to generic address space.
2622  EventList = Builder.CreatePointerCast(EventList, EventPtrTy);
2623  ClkEvent = Builder.CreatePointerCast(ClkEvent, EventPtrTy);
2624  llvm::Value *Block = Builder.CreatePointerCast(
2625  EmitScalarExpr(E->getArg(6)), GenericVoidPtrTy);
2626 
2627  std::vector<llvm::Type *> ArgTys = {
2628  QueueTy, Int32Ty, RangeTy, Int32Ty,
2629  EventPtrTy, EventPtrTy, GenericVoidPtrTy};
2630 
2631  std::vector<llvm::Value *> Args = {Queue, Flags, Range, NumEvents,
2632  EventList, ClkEvent, Block};
2633 
2634  if (NumArgs == 7) {
2635  // Has events but no variadics.
2636  Name = "__enqueue_kernel_basic_events";
2637  llvm::FunctionType *FTy = llvm::FunctionType::get(
2638  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), false);
2639  return RValue::get(
2640  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2642  }
2643  // Has event info and variadics
2644  // Pass the number of variadics to the runtime function too.
2645  Args.push_back(ConstantInt::get(Int32Ty, NumArgs - 7));
2646  ArgTys.push_back(Int32Ty);
2647  Name = "__enqueue_kernel_events_vaargs";
2648 
2649  // Each of the following arguments specifies the size of the corresponding
2650  // argument passed to the enqueued block.
2651  for (unsigned I = 7/*Position of the first size arg*/; I < NumArgs; ++I)
2652  Args.push_back(
2653  Builder.CreateZExtOrTrunc(EmitScalarExpr(E->getArg(I)), SizeTy));
2654 
2655  llvm::FunctionType *FTy = llvm::FunctionType::get(
2656  Int32Ty, llvm::ArrayRef<llvm::Type *>(ArgTys), true);
2657  return RValue::get(
2658  Builder.CreateCall(CGM.CreateRuntimeFunction(FTy, Name),
2660  }
2661  LLVM_FALLTHROUGH;
2662  }
2663  // OpenCL v2.0 s6.13.17.6 - Kernel query functions need bitcast of block
2664  // parameter.
2665  case Builtin::BIget_kernel_work_group_size: {
2666  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2667  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2668  Value *Arg = EmitScalarExpr(E->getArg(0));
2669  Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2670  return RValue::get(Builder.CreateCall(
2671  CGM.CreateRuntimeFunction(
2672  llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2673  "__get_kernel_work_group_size_impl"),
2674  Arg));
2675  }
2676  case Builtin::BIget_kernel_preferred_work_group_size_multiple: {
2677  llvm::Type *GenericVoidPtrTy = Builder.getInt8PtrTy(
2678  getContext().getTargetAddressSpace(LangAS::opencl_generic));
2679  Value *Arg = EmitScalarExpr(E->getArg(0));
2680  Arg = Builder.CreatePointerCast(Arg, GenericVoidPtrTy);
2681  return RValue::get(Builder.CreateCall(
2682  CGM.CreateRuntimeFunction(
2683  llvm::FunctionType::get(IntTy, GenericVoidPtrTy, false),
2684  "__get_kernel_preferred_work_group_multiple_impl"),
2685  Arg));
2686  }
2687  case Builtin::BIprintf:
2688  if (getTarget().getTriple().isNVPTX())
2689  return EmitNVPTXDevicePrintfCallExpr(E, ReturnValue);
2690  break;
2691  case Builtin::BI__builtin_canonicalize:
2692  case Builtin::BI__builtin_canonicalizef:
2693  case Builtin::BI__builtin_canonicalizel:
2694  return RValue::get(emitUnaryBuiltin(*this, E, Intrinsic::canonicalize));
2695 
2696  case Builtin::BI__builtin_thread_pointer: {
2697  if (!getContext().getTargetInfo().isTLSSupported())
2698  CGM.ErrorUnsupported(E, "__builtin_thread_pointer");
2699  // Fall through - it's already mapped to the intrinsic by GCCBuiltin.
2700  break;
2701  }
2702  case Builtin::BI__builtin_os_log_format: {
2703  assert(E->getNumArgs() >= 2 &&
2704  "__builtin_os_log_format takes at least 2 arguments");
2706  analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2707  Address BufAddr = EmitPointerWithAlignment(E->getArg(0));
2708  // Ignore argument 1, the format string. It is not currently used.
2709  CharUnits Offset;
2710  Builder.CreateStore(
2711  Builder.getInt8(Layout.getSummaryByte()),
2712  Builder.CreateConstByteGEP(BufAddr, Offset++, "summary"));
2713  Builder.CreateStore(
2714  Builder.getInt8(Layout.getNumArgsByte()),
2715  Builder.CreateConstByteGEP(BufAddr, Offset++, "numArgs"));
2716 
2717  llvm::SmallVector<llvm::Value *, 4> RetainableOperands;
2718  for (const auto &Item : Layout.Items) {
2719  Builder.CreateStore(
2720  Builder.getInt8(Item.getDescriptorByte()),
2721  Builder.CreateConstByteGEP(BufAddr, Offset++, "argDescriptor"));
2722  Builder.CreateStore(
2723  Builder.getInt8(Item.getSizeByte()),
2724  Builder.CreateConstByteGEP(BufAddr, Offset++, "argSize"));
2725  Address Addr = Builder.CreateConstByteGEP(BufAddr, Offset);
2726  if (const Expr *TheExpr = Item.getExpr()) {
2727  Addr = Builder.CreateElementBitCast(
2728  Addr, ConvertTypeForMem(TheExpr->getType()));
2729  // Check if this is a retainable type.
2730  if (TheExpr->getType()->isObjCRetainableType()) {
2731  assert(getEvaluationKind(TheExpr->getType()) == TEK_Scalar &&
2732  "Only scalar can be a ObjC retainable type");
2733  llvm::Value *SV = EmitScalarExpr(TheExpr, /*Ignore*/ false);
2734  RValue RV = RValue::get(SV);
2735  LValue LV = MakeAddrLValue(Addr, TheExpr->getType());
2736  EmitStoreThroughLValue(RV, LV);
2737  // Check if the object is constant, if not, save it in
2738  // RetainableOperands.
2739  if (!isa<Constant>(SV))
2740  RetainableOperands.push_back(SV);
2741  } else {
2742  EmitAnyExprToMem(TheExpr, Addr, Qualifiers(), /*isInit*/ true);
2743  }
2744  } else {
2745  Addr = Builder.CreateElementBitCast(Addr, Int32Ty);
2746  Builder.CreateStore(
2747  Builder.getInt32(Item.getConstValue().getQuantity()), Addr);
2748  }
2749  Offset += Item.size();
2750  }
2751 
2752  // Push a clang.arc.use cleanup for each object in RetainableOperands. The
2753  // cleanup will cause the use to appear after the final log call, keeping
2754  // the object valid while it's held in the log buffer. Note that if there's
2755  // a release cleanup on the object, it will already be active; since
2756  // cleanups are emitted in reverse order, the use will occur before the
2757  // object is released.
2758  if (!RetainableOperands.empty() && getLangOpts().ObjCAutoRefCount &&
2759  CGM.getCodeGenOpts().OptimizationLevel != 0)
2760  for (llvm::Value *object : RetainableOperands)
2761  pushFullExprCleanup<CallObjCArcUse>(getARCCleanupKind(), object);
2762 
2763  return RValue::get(BufAddr.getPointer());
2764  }
2765 
2766  case Builtin::BI__builtin_os_log_format_buffer_size: {
2768  analyze_os_log::computeOSLogBufferLayout(CGM.getContext(), E, Layout);
2769  return RValue::get(ConstantInt::get(ConvertType(E->getType()),
2770  Layout.size().getQuantity()));
2771  }
2772 
2773  case Builtin::BI__xray_customevent: {
2774  if (!ShouldXRayInstrumentFunction())
2775  return RValue::getIgnored();
2776  if (const auto *XRayAttr = CurFuncDecl->getAttr<XRayInstrumentAttr>()) {
2777  if (XRayAttr->neverXRayInstrument())
2778  return RValue::getIgnored();
2779  }
2780  Function *F = CGM.getIntrinsic(Intrinsic::xray_customevent);
2781  auto FTy = F->getFunctionType();
2782  auto Arg0 = E->getArg(0);
2783  auto Arg0Val = EmitScalarExpr(Arg0);
2784  auto Arg0Ty = Arg0->getType();
2785  auto PTy0 = FTy->getParamType(0);
2786  if (PTy0 != Arg0Val->getType()) {
2787  if (Arg0Ty->isArrayType())
2788  Arg0Val = EmitArrayToPointerDecay(Arg0).getPointer();
2789  else
2790  Arg0Val = Builder.CreatePointerCast(Arg0Val, PTy0);
2791  }
2792  auto Arg1 = EmitScalarExpr(E->getArg(1));
2793  auto PTy1 = FTy->getParamType(1);
2794  if (PTy1 != Arg1->getType())
2795  Arg1 = Builder.CreateTruncOrBitCast(Arg1, PTy1);
2796  return RValue::get(Builder.CreateCall(F, {Arg0Val, Arg1}));
2797  }
2798 
2799  case Builtin::BI__builtin_ms_va_start:
2800  case Builtin::BI__builtin_ms_va_end:
2801  return RValue::get(
2802  EmitVAStartEnd(EmitMSVAListRef(E->getArg(0)).getPointer(),
2803  BuiltinID == Builtin::BI__builtin_ms_va_start));
2804 
2805  case Builtin::BI__builtin_ms_va_copy: {
2806  // Lower this manually. We can't reliably determine whether or not any
2807  // given va_copy() is for a Win64 va_list from the calling convention
2808  // alone, because it's legal to do this from a System V ABI function.
2809  // With opaque pointer types, we won't have enough information in LLVM
2810  // IR to determine this from the argument types, either. Best to do it
2811  // now, while we have enough information.
2812  Address DestAddr = EmitMSVAListRef(E->getArg(0));
2813  Address SrcAddr = EmitMSVAListRef(E->getArg(1));
2814 
2815  llvm::Type *BPP = Int8PtrPtrTy;
2816 
2817  DestAddr = Address(Builder.CreateBitCast(DestAddr.getPointer(), BPP, "cp"),
2818  DestAddr.getAlignment());
2819  SrcAddr = Address(Builder.CreateBitCast(SrcAddr.getPointer(), BPP, "ap"),
2820  SrcAddr.getAlignment());
2821 
2822  Value *ArgPtr = Builder.CreateLoad(SrcAddr, "ap.val");
2823  return RValue::get(Builder.CreateStore(ArgPtr, DestAddr));
2824  }
2825  }
2826 
2827  // If this is an alias for a lib function (e.g. __builtin_sin), emit
2828  // the call using the normal call path, but using the unmangled
2829  // version of the function name.
2830  if (getContext().BuiltinInfo.isLibFunction(BuiltinID))
2831  return emitLibraryCall(*this, FD, E,
2832  CGM.getBuiltinLibFunction(FD, BuiltinID));
2833 
2834  // If this is a predefined lib function (e.g. malloc), emit the call
2835  // using exactly the normal call path.
2836  if (getContext().BuiltinInfo.isPredefinedLibFunction(BuiltinID))
2837  return emitLibraryCall(*this, FD, E,
2838  cast<llvm::Constant>(EmitScalarExpr(E->getCallee())));
2839 
2840  // Check that a call to a target specific builtin has the correct target
2841  // features.
2842  // This is down here to avoid non-target specific builtins, however, if
2843  // generic builtins start to require generic target features then we
2844  // can move this up to the beginning of the function.
2845  checkTargetFeatures(E, FD);
2846 
2847  // See if we have a target specific intrinsic.
2848  const char *Name = getContext().BuiltinInfo.getName(BuiltinID);
2849  Intrinsic::ID IntrinsicID = Intrinsic::not_intrinsic;
2850  StringRef Prefix =
2851  llvm::Triple::getArchTypePrefix(getTarget().getTriple().getArch());
2852  if (!Prefix.empty()) {
2853  IntrinsicID = Intrinsic::getIntrinsicForGCCBuiltin(Prefix.data(), Name);
2854  // NOTE we dont need to perform a compatibility flag check here since the
2855  // intrinsics are declared in Builtins*.def via LANGBUILTIN which filter the
2856  // MS builtins via ALL_MS_LANGUAGES and are filtered earlier.
2857  if (IntrinsicID == Intrinsic::not_intrinsic)
2858  IntrinsicID = Intrinsic::getIntrinsicForMSBuiltin(Prefix.data(), Name);
2859  }
2860 
2861  if (IntrinsicID != Intrinsic::not_intrinsic) {
2863 
2864  // Find out if any arguments are required to be integer constant
2865  // expressions.
2866  unsigned ICEArguments = 0;
2868  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
2869  assert(Error == ASTContext::GE_None && "Should not codegen an error");
2870 
2871  Function *F = CGM.getIntrinsic(IntrinsicID);
2872  llvm::FunctionType *FTy = F->getFunctionType();
2873 
2874  for (unsigned i = 0, e = E->getNumArgs(); i != e; ++i) {
2875  Value *ArgValue;
2876  // If this is a normal argument, just emit it as a scalar.
2877  if ((ICEArguments & (1 << i)) == 0) {
2878  ArgValue = EmitScalarExpr(E->getArg(i));
2879  } else {
2880  // If this is required to be a constant, constant fold it so that we
2881  // know that the generated intrinsic gets a ConstantInt.
2882  llvm::APSInt Result;
2883  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result,getContext());
2884  assert(IsConst && "Constant arg isn't actually constant?");
2885  (void)IsConst;
2886  ArgValue = llvm::ConstantInt::get(getLLVMContext(), Result);
2887  }
2888 
2889  // If the intrinsic arg type is different from the builtin arg type
2890  // we need to do a bit cast.
2891  llvm::Type *PTy = FTy->getParamType(i);
2892  if (PTy != ArgValue->getType()) {
2893  assert(PTy->canLosslesslyBitCastTo(FTy->getParamType(i)) &&
2894  "Must be able to losslessly bit cast to param");
2895  ArgValue = Builder.CreateBitCast(ArgValue, PTy);
2896  }
2897 
2898  Args.push_back(ArgValue);
2899  }
2900 
2901  Value *V = Builder.CreateCall(F, Args);
2902  QualType BuiltinRetType = E->getType();
2903 
2904  llvm::Type *RetTy = VoidTy;
2905  if (!BuiltinRetType->isVoidType())
2906  RetTy = ConvertType(BuiltinRetType);
2907 
2908  if (RetTy != V->getType()) {
2909  assert(V->getType()->canLosslesslyBitCastTo(RetTy) &&
2910  "Must be able to losslessly bit cast result type");
2911  V = Builder.CreateBitCast(V, RetTy);
2912  }
2913 
2914  return RValue::get(V);
2915  }
2916 
2917  // See if we have a target specific builtin that needs to be lowered.
2918  if (Value *V = EmitTargetBuiltinExpr(BuiltinID, E))
2919  return RValue::get(V);
2920 
2921  ErrorUnsupported(E, "builtin function");
2922 
2923  // Unknown builtin, for now just dump it out and return undef.
2924  return GetUndefRValue(E->getType());
2925 }
2926 
2928  unsigned BuiltinID, const CallExpr *E,
2929  llvm::Triple::ArchType Arch) {
2930  switch (Arch) {
2931  case llvm::Triple::arm:
2932  case llvm::Triple::armeb:
2933  case llvm::Triple::thumb:
2934  case llvm::Triple::thumbeb:
2935  return CGF->EmitARMBuiltinExpr(BuiltinID, E);
2936  case llvm::Triple::aarch64:
2937  case llvm::Triple::aarch64_be:
2938  return CGF->EmitAArch64BuiltinExpr(BuiltinID, E);
2939  case llvm::Triple::x86:
2940  case llvm::Triple::x86_64:
2941  return CGF->EmitX86BuiltinExpr(BuiltinID, E);
2942  case llvm::Triple::ppc:
2943  case llvm::Triple::ppc64:
2944  case llvm::Triple::ppc64le:
2945  return CGF->EmitPPCBuiltinExpr(BuiltinID, E);
2946  case llvm::Triple::r600:
2947  case llvm::Triple::amdgcn:
2948  return CGF->EmitAMDGPUBuiltinExpr(BuiltinID, E);
2949  case llvm::Triple::systemz:
2950  return CGF->EmitSystemZBuiltinExpr(BuiltinID, E);
2951  case llvm::Triple::nvptx:
2952  case llvm::Triple::nvptx64:
2953  return CGF->EmitNVPTXBuiltinExpr(BuiltinID, E);
2954  case llvm::Triple::wasm32:
2955  case llvm::Triple::wasm64:
2956  return CGF->EmitWebAssemblyBuiltinExpr(BuiltinID, E);
2957  default:
2958  return nullptr;
2959  }
2960 }
2961 
2962 Value *CodeGenFunction::EmitTargetBuiltinExpr(unsigned BuiltinID,
2963  const CallExpr *E) {
2964  if (getContext().BuiltinInfo.isAuxBuiltinID(BuiltinID)) {
2965  assert(getContext().getAuxTargetInfo() && "Missing aux target info");
2967  this, getContext().BuiltinInfo.getAuxBuiltinID(BuiltinID), E,
2968  getContext().getAuxTargetInfo()->getTriple().getArch());
2969  }
2970 
2971  return EmitTargetArchBuiltinExpr(this, BuiltinID, E,
2972  getTarget().getTriple().getArch());
2973 }
2974 
2975 static llvm::VectorType *GetNeonType(CodeGenFunction *CGF,
2976  NeonTypeFlags TypeFlags,
2977  bool V1Ty=false) {
2978  int IsQuad = TypeFlags.isQuad();
2979  switch (TypeFlags.getEltType()) {
2980  case NeonTypeFlags::Int8:
2981  case NeonTypeFlags::Poly8:
2982  return llvm::VectorType::get(CGF->Int8Ty, V1Ty ? 1 : (8 << IsQuad));
2983  case NeonTypeFlags::Int16:
2984  case NeonTypeFlags::Poly16:
2986  return llvm::VectorType::get(CGF->Int16Ty, V1Ty ? 1 : (4 << IsQuad));
2987  case NeonTypeFlags::Int32:
2988  return llvm::VectorType::get(CGF->Int32Ty, V1Ty ? 1 : (2 << IsQuad));
2989  case NeonTypeFlags::Int64:
2990  case NeonTypeFlags::Poly64:
2991  return llvm::VectorType::get(CGF->Int64Ty, V1Ty ? 1 : (1 << IsQuad));
2993  // FIXME: i128 and f128 doesn't get fully support in Clang and llvm.
2994  // There is a lot of i128 and f128 API missing.
2995  // so we use v16i8 to represent poly128 and get pattern matched.
2996  return llvm::VectorType::get(CGF->Int8Ty, 16);
2998  return llvm::VectorType::get(CGF->FloatTy, V1Ty ? 1 : (2 << IsQuad));
3000  return llvm::VectorType::get(CGF->DoubleTy, V1Ty ? 1 : (1 << IsQuad));
3001  }
3002  llvm_unreachable("Unknown vector element type!");
3003 }
3004 
3005 static llvm::VectorType *GetFloatNeonType(CodeGenFunction *CGF,
3006  NeonTypeFlags IntTypeFlags) {
3007  int IsQuad = IntTypeFlags.isQuad();
3008  switch (IntTypeFlags.getEltType()) {
3009  case NeonTypeFlags::Int32:
3010  return llvm::VectorType::get(CGF->FloatTy, (2 << IsQuad));
3011  case NeonTypeFlags::Int64:
3012  return llvm::VectorType::get(CGF->DoubleTy, (1 << IsQuad));
3013  default:
3014  llvm_unreachable("Type can't be converted to floating-point!");
3015  }
3016 }
3017 
3019  unsigned nElts = V->getType()->getVectorNumElements();
3020  Value* SV = llvm::ConstantVector::getSplat(nElts, C);
3021  return Builder.CreateShuffleVector(V, V, SV, "lane");
3022 }
3023 
3025  const char *name,
3026  unsigned shift, bool rightshift) {
3027  unsigned j = 0;
3028  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3029  ai != ae; ++ai, ++j)
3030  if (shift > 0 && shift == j)
3031  Ops[j] = EmitNeonShiftVector(Ops[j], ai->getType(), rightshift);
3032  else
3033  Ops[j] = Builder.CreateBitCast(Ops[j], ai->getType(), name);
3034 
3035  return Builder.CreateCall(F, Ops, name);
3036 }
3037 
3039  bool neg) {
3040  int SV = cast<ConstantInt>(V)->getSExtValue();
3041  return ConstantInt::get(Ty, neg ? -SV : SV);
3042 }
3043 
3044 // \brief Right-shift a vector by a constant.
3046  llvm::Type *Ty, bool usgn,
3047  const char *name) {
3048  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
3049 
3050  int ShiftAmt = cast<ConstantInt>(Shift)->getSExtValue();
3051  int EltSize = VTy->getScalarSizeInBits();
3052 
3053  Vec = Builder.CreateBitCast(Vec, Ty);
3054 
3055  // lshr/ashr are undefined when the shift amount is equal to the vector
3056  // element size.
3057  if (ShiftAmt == EltSize) {
3058  if (usgn) {
3059  // Right-shifting an unsigned value by its size yields 0.
3060  return llvm::ConstantAggregateZero::get(VTy);
3061  } else {
3062  // Right-shifting a signed value by its size is equivalent
3063  // to a shift of size-1.
3064  --ShiftAmt;
3065  Shift = ConstantInt::get(VTy->getElementType(), ShiftAmt);
3066  }
3067  }
3068 
3069  Shift = EmitNeonShiftVector(Shift, Ty, false);
3070  if (usgn)
3071  return Builder.CreateLShr(Vec, Shift, name);
3072  else
3073  return Builder.CreateAShr(Vec, Shift, name);
3074 }
3075 
3076 enum {
3077  AddRetType = (1 << 0),
3078  Add1ArgType = (1 << 1),
3079  Add2ArgTypes = (1 << 2),
3080 
3081  VectorizeRetType = (1 << 3),
3082  VectorizeArgTypes = (1 << 4),
3083 
3084  InventFloatType = (1 << 5),
3085  UnsignedAlts = (1 << 6),
3086 
3087  Use64BitVectors = (1 << 7),
3088  Use128BitVectors = (1 << 8),
3089 
3096 };
3097 
3098 namespace {
3099 struct NeonIntrinsicInfo {
3100  const char *NameHint;
3101  unsigned BuiltinID;
3102  unsigned LLVMIntrinsic;
3103  unsigned AltLLVMIntrinsic;
3104  unsigned TypeModifier;
3105 
3106  bool operator<(unsigned RHSBuiltinID) const {
3107  return BuiltinID < RHSBuiltinID;
3108  }
3109  bool operator<(const NeonIntrinsicInfo &TE) const {
3110  return BuiltinID < TE.BuiltinID;
3111  }
3112 };
3113 } // end anonymous namespace
3114 
3115 #define NEONMAP0(NameBase) \
3116  { #NameBase, NEON::BI__builtin_neon_ ## NameBase, 0, 0, 0 }
3117 
3118 #define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier) \
3119  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3120  Intrinsic::LLVMIntrinsic, 0, TypeModifier }
3121 
3122 #define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier) \
3123  { #NameBase, NEON:: BI__builtin_neon_ ## NameBase, \
3124  Intrinsic::LLVMIntrinsic, Intrinsic::AltLLVMIntrinsic, \
3125  TypeModifier }
3126 
3127 static const NeonIntrinsicInfo ARMSIMDIntrinsicMap [] = {
3128  NEONMAP2(vabd_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3129  NEONMAP2(vabdq_v, arm_neon_vabdu, arm_neon_vabds, Add1ArgType | UnsignedAlts),
3130  NEONMAP1(vabs_v, arm_neon_vabs, 0),
3131  NEONMAP1(vabsq_v, arm_neon_vabs, 0),
3132  NEONMAP0(vaddhn_v),
3133  NEONMAP1(vaesdq_v, arm_neon_aesd, 0),
3134  NEONMAP1(vaeseq_v, arm_neon_aese, 0),
3135  NEONMAP1(vaesimcq_v, arm_neon_aesimc, 0),
3136  NEONMAP1(vaesmcq_v, arm_neon_aesmc, 0),
3137  NEONMAP1(vbsl_v, arm_neon_vbsl, AddRetType),
3138  NEONMAP1(vbslq_v, arm_neon_vbsl, AddRetType),
3139  NEONMAP1(vcage_v, arm_neon_vacge, 0),
3140  NEONMAP1(vcageq_v, arm_neon_vacge, 0),
3141  NEONMAP1(vcagt_v, arm_neon_vacgt, 0),
3142  NEONMAP1(vcagtq_v, arm_neon_vacgt, 0),
3143  NEONMAP1(vcale_v, arm_neon_vacge, 0),
3144  NEONMAP1(vcaleq_v, arm_neon_vacge, 0),
3145  NEONMAP1(vcalt_v, arm_neon_vacgt, 0),
3146  NEONMAP1(vcaltq_v, arm_neon_vacgt, 0),
3147  NEONMAP1(vcls_v, arm_neon_vcls, Add1ArgType),
3148  NEONMAP1(vclsq_v, arm_neon_vcls, Add1ArgType),
3149  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3150  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3151  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3152  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3153  NEONMAP1(vcvt_f16_f32, arm_neon_vcvtfp2hf, 0),
3154  NEONMAP1(vcvt_f32_f16, arm_neon_vcvthf2fp, 0),
3155  NEONMAP0(vcvt_f32_v),
3156  NEONMAP2(vcvt_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3157  NEONMAP1(vcvt_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3158  NEONMAP1(vcvt_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3159  NEONMAP1(vcvt_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3160  NEONMAP1(vcvt_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3161  NEONMAP0(vcvt_s32_v),
3162  NEONMAP0(vcvt_s64_v),
3163  NEONMAP0(vcvt_u32_v),
3164  NEONMAP0(vcvt_u64_v),
3165  NEONMAP1(vcvta_s32_v, arm_neon_vcvtas, 0),
3166  NEONMAP1(vcvta_s64_v, arm_neon_vcvtas, 0),
3167  NEONMAP1(vcvta_u32_v, arm_neon_vcvtau, 0),
3168  NEONMAP1(vcvta_u64_v, arm_neon_vcvtau, 0),
3169  NEONMAP1(vcvtaq_s32_v, arm_neon_vcvtas, 0),
3170  NEONMAP1(vcvtaq_s64_v, arm_neon_vcvtas, 0),
3171  NEONMAP1(vcvtaq_u32_v, arm_neon_vcvtau, 0),
3172  NEONMAP1(vcvtaq_u64_v, arm_neon_vcvtau, 0),
3173  NEONMAP1(vcvtm_s32_v, arm_neon_vcvtms, 0),
3174  NEONMAP1(vcvtm_s64_v, arm_neon_vcvtms, 0),
3175  NEONMAP1(vcvtm_u32_v, arm_neon_vcvtmu, 0),
3176  NEONMAP1(vcvtm_u64_v, arm_neon_vcvtmu, 0),
3177  NEONMAP1(vcvtmq_s32_v, arm_neon_vcvtms, 0),
3178  NEONMAP1(vcvtmq_s64_v, arm_neon_vcvtms, 0),
3179  NEONMAP1(vcvtmq_u32_v, arm_neon_vcvtmu, 0),
3180  NEONMAP1(vcvtmq_u64_v, arm_neon_vcvtmu, 0),
3181  NEONMAP1(vcvtn_s32_v, arm_neon_vcvtns, 0),
3182  NEONMAP1(vcvtn_s64_v, arm_neon_vcvtns, 0),
3183  NEONMAP1(vcvtn_u32_v, arm_neon_vcvtnu, 0),
3184  NEONMAP1(vcvtn_u64_v, arm_neon_vcvtnu, 0),
3185  NEONMAP1(vcvtnq_s32_v, arm_neon_vcvtns, 0),
3186  NEONMAP1(vcvtnq_s64_v, arm_neon_vcvtns, 0),
3187  NEONMAP1(vcvtnq_u32_v, arm_neon_vcvtnu, 0),
3188  NEONMAP1(vcvtnq_u64_v, arm_neon_vcvtnu, 0),
3189  NEONMAP1(vcvtp_s32_v, arm_neon_vcvtps, 0),
3190  NEONMAP1(vcvtp_s64_v, arm_neon_vcvtps, 0),
3191  NEONMAP1(vcvtp_u32_v, arm_neon_vcvtpu, 0),
3192  NEONMAP1(vcvtp_u64_v, arm_neon_vcvtpu, 0),
3193  NEONMAP1(vcvtpq_s32_v, arm_neon_vcvtps, 0),
3194  NEONMAP1(vcvtpq_s64_v, arm_neon_vcvtps, 0),
3195  NEONMAP1(vcvtpq_u32_v, arm_neon_vcvtpu, 0),
3196  NEONMAP1(vcvtpq_u64_v, arm_neon_vcvtpu, 0),
3197  NEONMAP0(vcvtq_f32_v),
3198  NEONMAP2(vcvtq_n_f32_v, arm_neon_vcvtfxu2fp, arm_neon_vcvtfxs2fp, 0),
3199  NEONMAP1(vcvtq_n_s32_v, arm_neon_vcvtfp2fxs, 0),
3200  NEONMAP1(vcvtq_n_s64_v, arm_neon_vcvtfp2fxs, 0),
3201  NEONMAP1(vcvtq_n_u32_v, arm_neon_vcvtfp2fxu, 0),
3202  NEONMAP1(vcvtq_n_u64_v, arm_neon_vcvtfp2fxu, 0),
3203  NEONMAP0(vcvtq_s32_v),
3204  NEONMAP0(vcvtq_s64_v),
3205  NEONMAP0(vcvtq_u32_v),
3206  NEONMAP0(vcvtq_u64_v),
3207  NEONMAP0(vext_v),
3208  NEONMAP0(vextq_v),
3209  NEONMAP0(vfma_v),
3210  NEONMAP0(vfmaq_v),
3211  NEONMAP2(vhadd_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3212  NEONMAP2(vhaddq_v, arm_neon_vhaddu, arm_neon_vhadds, Add1ArgType | UnsignedAlts),
3213  NEONMAP2(vhsub_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3214  NEONMAP2(vhsubq_v, arm_neon_vhsubu, arm_neon_vhsubs, Add1ArgType | UnsignedAlts),
3215  NEONMAP0(vld1_dup_v),
3216  NEONMAP1(vld1_v, arm_neon_vld1, 0),
3217  NEONMAP0(vld1q_dup_v),
3218  NEONMAP1(vld1q_v, arm_neon_vld1, 0),
3219  NEONMAP1(vld2_lane_v, arm_neon_vld2lane, 0),
3220  NEONMAP1(vld2_v, arm_neon_vld2, 0),
3221  NEONMAP1(vld2q_lane_v, arm_neon_vld2lane, 0),
3222  NEONMAP1(vld2q_v, arm_neon_vld2, 0),
3223  NEONMAP1(vld3_lane_v, arm_neon_vld3lane, 0),
3224  NEONMAP1(vld3_v, arm_neon_vld3, 0),
3225  NEONMAP1(vld3q_lane_v, arm_neon_vld3lane, 0),
3226  NEONMAP1(vld3q_v, arm_neon_vld3, 0),
3227  NEONMAP1(vld4_lane_v, arm_neon_vld4lane, 0),
3228  NEONMAP1(vld4_v, arm_neon_vld4, 0),
3229  NEONMAP1(vld4q_lane_v, arm_neon_vld4lane, 0),
3230  NEONMAP1(vld4q_v, arm_neon_vld4, 0),
3231  NEONMAP2(vmax_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3232  NEONMAP1(vmaxnm_v, arm_neon_vmaxnm, Add1ArgType),
3233  NEONMAP1(vmaxnmq_v, arm_neon_vmaxnm, Add1ArgType),
3234  NEONMAP2(vmaxq_v, arm_neon_vmaxu, arm_neon_vmaxs, Add1ArgType | UnsignedAlts),
3235  NEONMAP2(vmin_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3236  NEONMAP1(vminnm_v, arm_neon_vminnm, Add1ArgType),
3237  NEONMAP1(vminnmq_v, arm_neon_vminnm, Add1ArgType),
3238  NEONMAP2(vminq_v, arm_neon_vminu, arm_neon_vmins, Add1ArgType | UnsignedAlts),
3239  NEONMAP0(vmovl_v),
3240  NEONMAP0(vmovn_v),
3241  NEONMAP1(vmul_v, arm_neon_vmulp, Add1ArgType),
3242  NEONMAP0(vmull_v),
3243  NEONMAP1(vmulq_v, arm_neon_vmulp, Add1ArgType),
3244  NEONMAP2(vpadal_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3245  NEONMAP2(vpadalq_v, arm_neon_vpadalu, arm_neon_vpadals, UnsignedAlts),
3246  NEONMAP1(vpadd_v, arm_neon_vpadd, Add1ArgType),
3247  NEONMAP2(vpaddl_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3248  NEONMAP2(vpaddlq_v, arm_neon_vpaddlu, arm_neon_vpaddls, UnsignedAlts),
3249  NEONMAP1(vpaddq_v, arm_neon_vpadd, Add1ArgType),
3250  NEONMAP2(vpmax_v, arm_neon_vpmaxu, arm_neon_vpmaxs, Add1ArgType | UnsignedAlts),
3251  NEONMAP2(vpmin_v, arm_neon_vpminu, arm_neon_vpmins, Add1ArgType | UnsignedAlts),
3252  NEONMAP1(vqabs_v, arm_neon_vqabs, Add1ArgType),
3253  NEONMAP1(vqabsq_v, arm_neon_vqabs, Add1ArgType),
3254  NEONMAP2(vqadd_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3255  NEONMAP2(vqaddq_v, arm_neon_vqaddu, arm_neon_vqadds, Add1ArgType | UnsignedAlts),
3256  NEONMAP2(vqdmlal_v, arm_neon_vqdmull, arm_neon_vqadds, 0),
3257  NEONMAP2(vqdmlsl_v, arm_neon_vqdmull, arm_neon_vqsubs, 0),
3258  NEONMAP1(vqdmulh_v, arm_neon_vqdmulh, Add1ArgType),
3259  NEONMAP1(vqdmulhq_v, arm_neon_vqdmulh, Add1ArgType),
3260  NEONMAP1(vqdmull_v, arm_neon_vqdmull, Add1ArgType),
3261  NEONMAP2(vqmovn_v, arm_neon_vqmovnu, arm_neon_vqmovns, Add1ArgType | UnsignedAlts),
3262  NEONMAP1(vqmovun_v, arm_neon_vqmovnsu, Add1ArgType),
3263  NEONMAP1(vqneg_v, arm_neon_vqneg, Add1ArgType),
3264  NEONMAP1(vqnegq_v, arm_neon_vqneg, Add1ArgType),
3265  NEONMAP1(vqrdmulh_v, arm_neon_vqrdmulh, Add1ArgType),
3266  NEONMAP1(vqrdmulhq_v, arm_neon_vqrdmulh, Add1ArgType),
3267  NEONMAP2(vqrshl_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3268  NEONMAP2(vqrshlq_v, arm_neon_vqrshiftu, arm_neon_vqrshifts, Add1ArgType | UnsignedAlts),
3269  NEONMAP2(vqshl_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3270  NEONMAP2(vqshl_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3271  NEONMAP2(vqshlq_n_v, arm_neon_vqshiftu, arm_neon_vqshifts, UnsignedAlts),
3272  NEONMAP2(vqshlq_v, arm_neon_vqshiftu, arm_neon_vqshifts, Add1ArgType | UnsignedAlts),
3273  NEONMAP1(vqshlu_n_v, arm_neon_vqshiftsu, 0),
3274  NEONMAP1(vqshluq_n_v, arm_neon_vqshiftsu, 0),
3275  NEONMAP2(vqsub_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3276  NEONMAP2(vqsubq_v, arm_neon_vqsubu, arm_neon_vqsubs, Add1ArgType | UnsignedAlts),
3277  NEONMAP1(vraddhn_v, arm_neon_vraddhn, Add1ArgType),
3278  NEONMAP2(vrecpe_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3279  NEONMAP2(vrecpeq_v, arm_neon_vrecpe, arm_neon_vrecpe, 0),
3280  NEONMAP1(vrecps_v, arm_neon_vrecps, Add1ArgType),
3281  NEONMAP1(vrecpsq_v, arm_neon_vrecps, Add1ArgType),
3282  NEONMAP2(vrhadd_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3283  NEONMAP2(vrhaddq_v, arm_neon_vrhaddu, arm_neon_vrhadds, Add1ArgType | UnsignedAlts),
3284  NEONMAP1(vrnd_v, arm_neon_vrintz, Add1ArgType),
3285  NEONMAP1(vrnda_v, arm_neon_vrinta, Add1ArgType),
3286  NEONMAP1(vrndaq_v, arm_neon_vrinta, Add1ArgType),
3287  NEONMAP1(vrndm_v, arm_neon_vrintm, Add1ArgType),
3288  NEONMAP1(vrndmq_v, arm_neon_vrintm, Add1ArgType),
3289  NEONMAP1(vrndn_v, arm_neon_vrintn, Add1ArgType),
3290  NEONMAP1(vrndnq_v, arm_neon_vrintn, Add1ArgType),
3291  NEONMAP1(vrndp_v, arm_neon_vrintp, Add1ArgType),
3292  NEONMAP1(vrndpq_v, arm_neon_vrintp, Add1ArgType),
3293  NEONMAP1(vrndq_v, arm_neon_vrintz, Add1ArgType),
3294  NEONMAP1(vrndx_v, arm_neon_vrintx, Add1ArgType),
3295  NEONMAP1(vrndxq_v, arm_neon_vrintx, Add1ArgType),
3296  NEONMAP2(vrshl_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3297  NEONMAP2(vrshlq_v, arm_neon_vrshiftu, arm_neon_vrshifts, Add1ArgType | UnsignedAlts),
3298  NEONMAP2(vrshr_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3299  NEONMAP2(vrshrq_n_v, arm_neon_vrshiftu, arm_neon_vrshifts, UnsignedAlts),
3300  NEONMAP2(vrsqrte_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3301  NEONMAP2(vrsqrteq_v, arm_neon_vrsqrte, arm_neon_vrsqrte, 0),
3302  NEONMAP1(vrsqrts_v, arm_neon_vrsqrts, Add1ArgType),
3303  NEONMAP1(vrsqrtsq_v, arm_neon_vrsqrts, Add1ArgType),
3304  NEONMAP1(vrsubhn_v, arm_neon_vrsubhn, Add1ArgType),
3305  NEONMAP1(vsha1su0q_v, arm_neon_sha1su0, 0),
3306  NEONMAP1(vsha1su1q_v, arm_neon_sha1su1, 0),
3307  NEONMAP1(vsha256h2q_v, arm_neon_sha256h2, 0),
3308  NEONMAP1(vsha256hq_v, arm_neon_sha256h, 0),
3309  NEONMAP1(vsha256su0q_v, arm_neon_sha256su0, 0),
3310  NEONMAP1(vsha256su1q_v, arm_neon_sha256su1, 0),
3311  NEONMAP0(vshl_n_v),
3312  NEONMAP2(vshl_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3313  NEONMAP0(vshll_n_v),
3314  NEONMAP0(vshlq_n_v),
3315  NEONMAP2(vshlq_v, arm_neon_vshiftu, arm_neon_vshifts, Add1ArgType | UnsignedAlts),
3316  NEONMAP0(vshr_n_v),
3317  NEONMAP0(vshrn_n_v),
3318  NEONMAP0(vshrq_n_v),
3319  NEONMAP1(vst1_v, arm_neon_vst1, 0),
3320  NEONMAP1(vst1q_v, arm_neon_vst1, 0),
3321  NEONMAP1(vst2_lane_v, arm_neon_vst2lane, 0),
3322  NEONMAP1(vst2_v, arm_neon_vst2, 0),
3323  NEONMAP1(vst2q_lane_v, arm_neon_vst2lane, 0),
3324  NEONMAP1(vst2q_v, arm_neon_vst2, 0),
3325  NEONMAP1(vst3_lane_v, arm_neon_vst3lane, 0),
3326  NEONMAP1(vst3_v, arm_neon_vst3, 0),
3327  NEONMAP1(vst3q_lane_v, arm_neon_vst3lane, 0),
3328  NEONMAP1(vst3q_v, arm_neon_vst3, 0),
3329  NEONMAP1(vst4_lane_v, arm_neon_vst4lane, 0),
3330  NEONMAP1(vst4_v, arm_neon_vst4, 0),
3331  NEONMAP1(vst4q_lane_v, arm_neon_vst4lane, 0),
3332  NEONMAP1(vst4q_v, arm_neon_vst4, 0),
3333  NEONMAP0(vsubhn_v),
3334  NEONMAP0(vtrn_v),
3335  NEONMAP0(vtrnq_v),
3336  NEONMAP0(vtst_v),
3337  NEONMAP0(vtstq_v),
3338  NEONMAP0(vuzp_v),
3339  NEONMAP0(vuzpq_v),
3340  NEONMAP0(vzip_v),
3341  NEONMAP0(vzipq_v)
3342 };
3343 
3344 static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[] = {
3345  NEONMAP1(vabs_v, aarch64_neon_abs, 0),
3346  NEONMAP1(vabsq_v, aarch64_neon_abs, 0),
3347  NEONMAP0(vaddhn_v),
3348  NEONMAP1(vaesdq_v, aarch64_crypto_aesd, 0),
3349  NEONMAP1(vaeseq_v, aarch64_crypto_aese, 0),
3350  NEONMAP1(vaesimcq_v, aarch64_crypto_aesimc, 0),
3351  NEONMAP1(vaesmcq_v, aarch64_crypto_aesmc, 0),
3352  NEONMAP1(vcage_v, aarch64_neon_facge, 0),
3353  NEONMAP1(vcageq_v, aarch64_neon_facge, 0),
3354  NEONMAP1(vcagt_v, aarch64_neon_facgt, 0),
3355  NEONMAP1(vcagtq_v, aarch64_neon_facgt, 0),
3356  NEONMAP1(vcale_v, aarch64_neon_facge, 0),
3357  NEONMAP1(vcaleq_v, aarch64_neon_facge, 0),
3358  NEONMAP1(vcalt_v, aarch64_neon_facgt, 0),
3359  NEONMAP1(vcaltq_v, aarch64_neon_facgt, 0),
3360  NEONMAP1(vcls_v, aarch64_neon_cls, Add1ArgType),
3361  NEONMAP1(vclsq_v, aarch64_neon_cls, Add1ArgType),
3362  NEONMAP1(vclz_v, ctlz, Add1ArgType),
3363  NEONMAP1(vclzq_v, ctlz, Add1ArgType),
3364  NEONMAP1(vcnt_v, ctpop, Add1ArgType),
3365  NEONMAP1(vcntq_v, ctpop, Add1ArgType),
3366  NEONMAP1(vcvt_f16_f32, aarch64_neon_vcvtfp2hf, 0),
3367  NEONMAP1(vcvt_f32_f16, aarch64_neon_vcvthf2fp, 0),
3368  NEONMAP0(vcvt_f32_v),
3369  NEONMAP2(vcvt_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3370  NEONMAP2(vcvt_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3371  NEONMAP1(vcvt_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3372  NEONMAP1(vcvt_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3373  NEONMAP1(vcvt_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3374  NEONMAP1(vcvt_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3375  NEONMAP0(vcvtq_f32_v),
3376  NEONMAP2(vcvtq_n_f32_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3377  NEONMAP2(vcvtq_n_f64_v, aarch64_neon_vcvtfxu2fp, aarch64_neon_vcvtfxs2fp, 0),
3378  NEONMAP1(vcvtq_n_s32_v, aarch64_neon_vcvtfp2fxs, 0),
3379  NEONMAP1(vcvtq_n_s64_v, aarch64_neon_vcvtfp2fxs, 0),
3380  NEONMAP1(vcvtq_n_u32_v, aarch64_neon_vcvtfp2fxu, 0),
3381  NEONMAP1(vcvtq_n_u64_v, aarch64_neon_vcvtfp2fxu, 0),
3382  NEONMAP1(vcvtx_f32_v, aarch64_neon_fcvtxn, AddRetType | Add1ArgType),
3383  NEONMAP0(vext_v),
3384  NEONMAP0(vextq_v),
3385  NEONMAP0(vfma_v),
3386  NEONMAP0(vfmaq_v),
3387  NEONMAP2(vhadd_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3388  NEONMAP2(vhaddq_v, aarch64_neon_uhadd, aarch64_neon_shadd, Add1ArgType | UnsignedAlts),
3389  NEONMAP2(vhsub_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3390  NEONMAP2(vhsubq_v, aarch64_neon_uhsub, aarch64_neon_shsub, Add1ArgType | UnsignedAlts),
3391  NEONMAP0(vmovl_v),
3392  NEONMAP0(vmovn_v),
3393  NEONMAP1(vmul_v, aarch64_neon_pmul, Add1ArgType),
3394  NEONMAP1(vmulq_v, aarch64_neon_pmul, Add1ArgType),
3395  NEONMAP1(vpadd_v, aarch64_neon_addp, Add1ArgType),
3396  NEONMAP2(vpaddl_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3397  NEONMAP2(vpaddlq_v, aarch64_neon_uaddlp, aarch64_neon_saddlp, UnsignedAlts),
3398  NEONMAP1(vpaddq_v, aarch64_neon_addp, Add1ArgType),
3399  NEONMAP1(vqabs_v, aarch64_neon_sqabs, Add1ArgType),
3400  NEONMAP1(vqabsq_v, aarch64_neon_sqabs, Add1ArgType),
3401  NEONMAP2(vqadd_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3402  NEONMAP2(vqaddq_v, aarch64_neon_uqadd, aarch64_neon_sqadd, Add1ArgType | UnsignedAlts),
3403  NEONMAP2(vqdmlal_v, aarch64_neon_sqdmull, aarch64_neon_sqadd, 0),
3404  NEONMAP2(vqdmlsl_v, aarch64_neon_sqdmull, aarch64_neon_sqsub, 0),
3405  NEONMAP1(vqdmulh_v, aarch64_neon_sqdmulh, Add1ArgType),
3406  NEONMAP1(vqdmulhq_v, aarch64_neon_sqdmulh, Add1ArgType),
3407  NEONMAP1(vqdmull_v, aarch64_neon_sqdmull, Add1ArgType),
3408  NEONMAP2(vqmovn_v, aarch64_neon_uqxtn, aarch64_neon_sqxtn, Add1ArgType | UnsignedAlts),
3409  NEONMAP1(vqmovun_v, aarch64_neon_sqxtun, Add1ArgType),
3410  NEONMAP1(vqneg_v, aarch64_neon_sqneg, Add1ArgType),
3411  NEONMAP1(vqnegq_v, aarch64_neon_sqneg, Add1ArgType),
3412  NEONMAP1(vqrdmulh_v, aarch64_neon_sqrdmulh, Add1ArgType),
3413  NEONMAP1(vqrdmulhq_v, aarch64_neon_sqrdmulh, Add1ArgType),
3414  NEONMAP2(vqrshl_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3415  NEONMAP2(vqrshlq_v, aarch64_neon_uqrshl, aarch64_neon_sqrshl, Add1ArgType | UnsignedAlts),
3416  NEONMAP2(vqshl_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl, UnsignedAlts),
3417  NEONMAP2(vqshl_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3418  NEONMAP2(vqshlq_n_v, aarch64_neon_uqshl, aarch64_neon_sqshl,UnsignedAlts),
3419  NEONMAP2(vqshlq_v, aarch64_neon_uqshl, aarch64_neon_sqshl, Add1ArgType | UnsignedAlts),
3420  NEONMAP1(vqshlu_n_v, aarch64_neon_sqshlu, 0),
3421  NEONMAP1(vqshluq_n_v, aarch64_neon_sqshlu, 0),
3422  NEONMAP2(vqsub_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3423  NEONMAP2(vqsubq_v, aarch64_neon_uqsub, aarch64_neon_sqsub, Add1ArgType | UnsignedAlts),
3424  NEONMAP1(vraddhn_v, aarch64_neon_raddhn, Add1ArgType),
3425  NEONMAP2(vrecpe_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3426  NEONMAP2(vrecpeq_v, aarch64_neon_frecpe, aarch64_neon_urecpe, 0),
3427  NEONMAP1(vrecps_v, aarch64_neon_frecps, Add1ArgType),
3428  NEONMAP1(vrecpsq_v, aarch64_neon_frecps, Add1ArgType),
3429  NEONMAP2(vrhadd_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3430  NEONMAP2(vrhaddq_v, aarch64_neon_urhadd, aarch64_neon_srhadd, Add1ArgType | UnsignedAlts),
3431  NEONMAP2(vrshl_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3432  NEONMAP2(vrshlq_v, aarch64_neon_urshl, aarch64_neon_srshl, Add1ArgType | UnsignedAlts),
3433  NEONMAP2(vrshr_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3434  NEONMAP2(vrshrq_n_v, aarch64_neon_urshl, aarch64_neon_srshl, UnsignedAlts),
3435  NEONMAP2(vrsqrte_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3436  NEONMAP2(vrsqrteq_v, aarch64_neon_frsqrte, aarch64_neon_ursqrte, 0),
3437  NEONMAP1(vrsqrts_v, aarch64_neon_frsqrts, Add1ArgType),
3438  NEONMAP1(vrsqrtsq_v, aarch64_neon_frsqrts, Add1ArgType),
3439  NEONMAP1(vrsubhn_v, aarch64_neon_rsubhn, Add1ArgType),
3440  NEONMAP1(vsha1su0q_v, aarch64_crypto_sha1su0, 0),
3441  NEONMAP1(vsha1su1q_v, aarch64_crypto_sha1su1, 0),
3442  NEONMAP1(vsha256h2q_v, aarch64_crypto_sha256h2, 0),
3443  NEONMAP1(vsha256hq_v, aarch64_crypto_sha256h, 0),
3444  NEONMAP1(vsha256su0q_v, aarch64_crypto_sha256su0, 0),
3445  NEONMAP1(vsha256su1q_v, aarch64_crypto_sha256su1, 0),
3446  NEONMAP0(vshl_n_v),
3447  NEONMAP2(vshl_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3448  NEONMAP0(vshll_n_v),
3449  NEONMAP0(vshlq_n_v),
3450  NEONMAP2(vshlq_v, aarch64_neon_ushl, aarch64_neon_sshl, Add1ArgType | UnsignedAlts),
3451  NEONMAP0(vshr_n_v),
3452  NEONMAP0(vshrn_n_v),
3453  NEONMAP0(vshrq_n_v),
3454  NEONMAP0(vsubhn_v),
3455  NEONMAP0(vtst_v),
3456  NEONMAP0(vtstq_v),
3457 };
3458 
3459 static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[] = {
3460  NEONMAP1(vabdd_f64, aarch64_sisd_fabd, Add1ArgType),
3461  NEONMAP1(vabds_f32, aarch64_sisd_fabd, Add1ArgType),
3462  NEONMAP1(vabsd_s64, aarch64_neon_abs, Add1ArgType),
3463  NEONMAP1(vaddlv_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3464  NEONMAP1(vaddlv_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3465  NEONMAP1(vaddlvq_s32, aarch64_neon_saddlv, AddRetType | Add1ArgType),
3466  NEONMAP1(vaddlvq_u32, aarch64_neon_uaddlv, AddRetType | Add1ArgType),
3467  NEONMAP1(vaddv_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3468  NEONMAP1(vaddv_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3469  NEONMAP1(vaddv_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3470  NEONMAP1(vaddvq_f32, aarch64_neon_faddv, AddRetType | Add1ArgType),
3471  NEONMAP1(vaddvq_f64, aarch64_neon_faddv, AddRetType | Add1ArgType),
3472  NEONMAP1(vaddvq_s32, aarch64_neon_saddv, AddRetType | Add1ArgType),
3473  NEONMAP1(vaddvq_s64, aarch64_neon_saddv, AddRetType | Add1ArgType),
3474  NEONMAP1(vaddvq_u32, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3475  NEONMAP1(vaddvq_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3476  NEONMAP1(vcaged_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3477  NEONMAP1(vcages_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3478  NEONMAP1(vcagtd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3479  NEONMAP1(vcagts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3480  NEONMAP1(vcaled_f64, aarch64_neon_facge, AddRetType | Add1ArgType),
3481  NEONMAP1(vcales_f32, aarch64_neon_facge, AddRetType | Add1ArgType),
3482  NEONMAP1(vcaltd_f64, aarch64_neon_facgt, AddRetType | Add1ArgType),
3483  NEONMAP1(vcalts_f32, aarch64_neon_facgt, AddRetType | Add1ArgType),
3484  NEONMAP1(vcvtad_s64_f64, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3485  NEONMAP1(vcvtad_u64_f64, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3486  NEONMAP1(vcvtas_s32_f32, aarch64_neon_fcvtas, AddRetType | Add1ArgType),
3487  NEONMAP1(vcvtas_u32_f32, aarch64_neon_fcvtau, AddRetType | Add1ArgType),
3488  NEONMAP1(vcvtd_n_f64_s64, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3489  NEONMAP1(vcvtd_n_f64_u64, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3490  NEONMAP1(vcvtd_n_s64_f64, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3491  NEONMAP1(vcvtd_n_u64_f64, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3492  NEONMAP1(vcvtmd_s64_f64, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3493  NEONMAP1(vcvtmd_u64_f64, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3494  NEONMAP1(vcvtms_s32_f32, aarch64_neon_fcvtms, AddRetType | Add1ArgType),
3495  NEONMAP1(vcvtms_u32_f32, aarch64_neon_fcvtmu, AddRetType | Add1ArgType),
3496  NEONMAP1(vcvtnd_s64_f64, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3497  NEONMAP1(vcvtnd_u64_f64, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3498  NEONMAP1(vcvtns_s32_f32, aarch64_neon_fcvtns, AddRetType | Add1ArgType),
3499  NEONMAP1(vcvtns_u32_f32, aarch64_neon_fcvtnu, AddRetType | Add1ArgType),
3500  NEONMAP1(vcvtpd_s64_f64, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3501  NEONMAP1(vcvtpd_u64_f64, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3502  NEONMAP1(vcvtps_s32_f32, aarch64_neon_fcvtps, AddRetType | Add1ArgType),
3503  NEONMAP1(vcvtps_u32_f32, aarch64_neon_fcvtpu, AddRetType | Add1ArgType),
3504  NEONMAP1(vcvts_n_f32_s32, aarch64_neon_vcvtfxs2fp, AddRetType | Add1ArgType),
3505  NEONMAP1(vcvts_n_f32_u32, aarch64_neon_vcvtfxu2fp, AddRetType | Add1ArgType),
3506  NEONMAP1(vcvts_n_s32_f32, aarch64_neon_vcvtfp2fxs, AddRetType | Add1ArgType),
3507  NEONMAP1(vcvts_n_u32_f32, aarch64_neon_vcvtfp2fxu, AddRetType | Add1ArgType),
3508  NEONMAP1(vcvtxd_f32_f64, aarch64_sisd_fcvtxn, 0),
3509  NEONMAP1(vmaxnmv_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3510  NEONMAP1(vmaxnmvq_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3511  NEONMAP1(vmaxnmvq_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3512  NEONMAP1(vmaxv_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3513  NEONMAP1(vmaxv_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3514  NEONMAP1(vmaxv_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3515  NEONMAP1(vmaxvq_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3516  NEONMAP1(vmaxvq_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3517  NEONMAP1(vmaxvq_s32, aarch64_neon_smaxv, AddRetType | Add1ArgType),
3518  NEONMAP1(vmaxvq_u32, aarch64_neon_umaxv, AddRetType | Add1ArgType),
3519  NEONMAP1(vminnmv_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3520  NEONMAP1(vminnmvq_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3521  NEONMAP1(vminnmvq_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3522  NEONMAP1(vminv_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3523  NEONMAP1(vminv_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3524  NEONMAP1(vminv_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3525  NEONMAP1(vminvq_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3526  NEONMAP1(vminvq_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3527  NEONMAP1(vminvq_s32, aarch64_neon_sminv, AddRetType | Add1ArgType),
3528  NEONMAP1(vminvq_u32, aarch64_neon_uminv, AddRetType | Add1ArgType),
3529  NEONMAP1(vmull_p64, aarch64_neon_pmull64, 0),
3530  NEONMAP1(vmulxd_f64, aarch64_neon_fmulx, Add1ArgType),
3531  NEONMAP1(vmulxs_f32, aarch64_neon_fmulx, Add1ArgType),
3532  NEONMAP1(vpaddd_s64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3533  NEONMAP1(vpaddd_u64, aarch64_neon_uaddv, AddRetType | Add1ArgType),
3534  NEONMAP1(vpmaxnmqd_f64, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3535  NEONMAP1(vpmaxnms_f32, aarch64_neon_fmaxnmv, AddRetType | Add1ArgType),
3536  NEONMAP1(vpmaxqd_f64, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3537  NEONMAP1(vpmaxs_f32, aarch64_neon_fmaxv, AddRetType | Add1ArgType),
3538  NEONMAP1(vpminnmqd_f64, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3539  NEONMAP1(vpminnms_f32, aarch64_neon_fminnmv, AddRetType | Add1ArgType),
3540  NEONMAP1(vpminqd_f64, aarch64_neon_fminv, AddRetType | Add1ArgType),
3541  NEONMAP1(vpmins_f32, aarch64_neon_fminv, AddRetType | Add1ArgType),
3542  NEONMAP1(vqabsb_s8, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3543  NEONMAP1(vqabsd_s64, aarch64_neon_sqabs, Add1ArgType),
3544  NEONMAP1(vqabsh_s16, aarch64_neon_sqabs, Vectorize1ArgType | Use64BitVectors),
3545  NEONMAP1(vqabss_s32, aarch64_neon_sqabs, Add1ArgType),
3546  NEONMAP1(vqaddb_s8, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3547  NEONMAP1(vqaddb_u8, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3548  NEONMAP1(vqaddd_s64, aarch64_neon_sqadd, Add1ArgType),
3549  NEONMAP1(vqaddd_u64, aarch64_neon_uqadd, Add1ArgType),
3550  NEONMAP1(vqaddh_s16, aarch64_neon_sqadd, Vectorize1ArgType | Use64BitVectors),
3551  NEONMAP1(vqaddh_u16, aarch64_neon_uqadd, Vectorize1ArgType | Use64BitVectors),
3552  NEONMAP1(vqadds_s32, aarch64_neon_sqadd, Add1ArgType),
3553  NEONMAP1(vqadds_u32, aarch64_neon_uqadd, Add1ArgType),
3554  NEONMAP1(vqdmulhh_s16, aarch64_neon_sqdmulh, Vectorize1ArgType | Use64BitVectors),
3555  NEONMAP1(vqdmulhs_s32, aarch64_neon_sqdmulh, Add1ArgType),
3556  NEONMAP1(vqdmullh_s16, aarch64_neon_sqdmull, VectorRet | Use128BitVectors),
3557  NEONMAP1(vqdmulls_s32, aarch64_neon_sqdmulls_scalar, 0),
3558  NEONMAP1(vqmovnd_s64, aarch64_neon_scalar_sqxtn, AddRetType | Add1ArgType),
3559  NEONMAP1(vqmovnd_u64, aarch64_neon_scalar_uqxtn, AddRetType | Add1ArgType),
3560  NEONMAP1(vqmovnh_s16, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3561  NEONMAP1(vqmovnh_u16, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3562  NEONMAP1(vqmovns_s32, aarch64_neon_sqxtn, VectorRet | Use64BitVectors),
3563  NEONMAP1(vqmovns_u32, aarch64_neon_uqxtn, VectorRet | Use64BitVectors),
3564  NEONMAP1(vqmovund_s64, aarch64_neon_scalar_sqxtun, AddRetType | Add1ArgType),
3565  NEONMAP1(vqmovunh_s16, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3566  NEONMAP1(vqmovuns_s32, aarch64_neon_sqxtun, VectorRet | Use64BitVectors),
3567  NEONMAP1(vqnegb_s8, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3568  NEONMAP1(vqnegd_s64, aarch64_neon_sqneg, Add1ArgType),
3569  NEONMAP1(vqnegh_s16, aarch64_neon_sqneg, Vectorize1ArgType | Use64BitVectors),
3570  NEONMAP1(vqnegs_s32, aarch64_neon_sqneg, Add1ArgType),
3571  NEONMAP1(vqrdmulhh_s16, aarch64_neon_sqrdmulh, Vectorize1ArgType | Use64BitVectors),
3572  NEONMAP1(vqrdmulhs_s32, aarch64_neon_sqrdmulh, Add1ArgType),
3573  NEONMAP1(vqrshlb_s8, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3574  NEONMAP1(vqrshlb_u8, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3575  NEONMAP1(vqrshld_s64, aarch64_neon_sqrshl, Add1ArgType),
3576  NEONMAP1(vqrshld_u64, aarch64_neon_uqrshl, Add1ArgType),
3577  NEONMAP1(vqrshlh_s16, aarch64_neon_sqrshl, Vectorize1ArgType | Use64BitVectors),
3578  NEONMAP1(vqrshlh_u16, aarch64_neon_uqrshl, Vectorize1ArgType | Use64BitVectors),
3579  NEONMAP1(vqrshls_s32, aarch64_neon_sqrshl, Add1ArgType),
3580  NEONMAP1(vqrshls_u32, aarch64_neon_uqrshl, Add1ArgType),
3581  NEONMAP1(vqrshrnd_n_s64, aarch64_neon_sqrshrn, AddRetType),
3582  NEONMAP1(vqrshrnd_n_u64, aarch64_neon_uqrshrn, AddRetType),
3583  NEONMAP1(vqrshrnh_n_s16, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3584  NEONMAP1(vqrshrnh_n_u16, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3585  NEONMAP1(vqrshrns_n_s32, aarch64_neon_sqrshrn, VectorRet | Use64BitVectors),
3586  NEONMAP1(vqrshrns_n_u32, aarch64_neon_uqrshrn, VectorRet | Use64BitVectors),
3587  NEONMAP1(vqrshrund_n_s64, aarch64_neon_sqrshrun, AddRetType),
3588  NEONMAP1(vqrshrunh_n_s16, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3589  NEONMAP1(vqrshruns_n_s32, aarch64_neon_sqrshrun, VectorRet | Use64BitVectors),
3590  NEONMAP1(vqshlb_n_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3591  NEONMAP1(vqshlb_n_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3592  NEONMAP1(vqshlb_s8, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3593  NEONMAP1(vqshlb_u8, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3594  NEONMAP1(vqshld_s64, aarch64_neon_sqshl, Add1ArgType),
3595  NEONMAP1(vqshld_u64, aarch64_neon_uqshl, Add1ArgType),
3596  NEONMAP1(vqshlh_n_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3597  NEONMAP1(vqshlh_n_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3598  NEONMAP1(vqshlh_s16, aarch64_neon_sqshl, Vectorize1ArgType | Use64BitVectors),
3599  NEONMAP1(vqshlh_u16, aarch64_neon_uqshl, Vectorize1ArgType | Use64BitVectors),
3600  NEONMAP1(vqshls_n_s32, aarch64_neon_sqshl, Add1ArgType),
3601  NEONMAP1(vqshls_n_u32, aarch64_neon_uqshl, Add1ArgType),
3602  NEONMAP1(vqshls_s32, aarch64_neon_sqshl, Add1ArgType),
3603  NEONMAP1(vqshls_u32, aarch64_neon_uqshl, Add1ArgType),
3604  NEONMAP1(vqshlub_n_s8, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3605  NEONMAP1(vqshluh_n_s16, aarch64_neon_sqshlu, Vectorize1ArgType | Use64BitVectors),
3606  NEONMAP1(vqshlus_n_s32, aarch64_neon_sqshlu, Add1ArgType),
3607  NEONMAP1(vqshrnd_n_s64, aarch64_neon_sqshrn, AddRetType),
3608  NEONMAP1(vqshrnd_n_u64, aarch64_neon_uqshrn, AddRetType),
3609  NEONMAP1(vqshrnh_n_s16, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3610  NEONMAP1(vqshrnh_n_u16, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3611  NEONMAP1(vqshrns_n_s32, aarch64_neon_sqshrn, VectorRet | Use64BitVectors),
3612  NEONMAP1(vqshrns_n_u32, aarch64_neon_uqshrn, VectorRet | Use64BitVectors),
3613  NEONMAP1(vqshrund_n_s64, aarch64_neon_sqshrun, AddRetType),
3614  NEONMAP1(vqshrunh_n_s16, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3615  NEONMAP1(vqshruns_n_s32, aarch64_neon_sqshrun, VectorRet | Use64BitVectors),
3616  NEONMAP1(vqsubb_s8, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3617  NEONMAP1(vqsubb_u8, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3618  NEONMAP1(vqsubd_s64, aarch64_neon_sqsub, Add1ArgType),
3619  NEONMAP1(vqsubd_u64, aarch64_neon_uqsub, Add1ArgType),
3620  NEONMAP1(vqsubh_s16, aarch64_neon_sqsub, Vectorize1ArgType | Use64BitVectors),
3621  NEONMAP1(vqsubh_u16, aarch64_neon_uqsub, Vectorize1ArgType | Use64BitVectors),
3622  NEONMAP1(vqsubs_s32, aarch64_neon_sqsub, Add1ArgType),
3623  NEONMAP1(vqsubs_u32, aarch64_neon_uqsub, Add1ArgType),
3624  NEONMAP1(vrecped_f64, aarch64_neon_frecpe, Add1ArgType),
3625  NEONMAP1(vrecpes_f32, aarch64_neon_frecpe, Add1ArgType),
3626  NEONMAP1(vrecpxd_f64, aarch64_neon_frecpx, Add1ArgType),
3627  NEONMAP1(vrecpxs_f32, aarch64_neon_frecpx, Add1ArgType),
3628  NEONMAP1(vrshld_s64, aarch64_neon_srshl, Add1ArgType),
3629  NEONMAP1(vrshld_u64, aarch64_neon_urshl, Add1ArgType),
3630  NEONMAP1(vrsqrted_f64, aarch64_neon_frsqrte, Add1ArgType),
3631  NEONMAP1(vrsqrtes_f32, aarch64_neon_frsqrte, Add1ArgType),
3632  NEONMAP1(vrsqrtsd_f64, aarch64_neon_frsqrts, Add1ArgType),
3633  NEONMAP1(vrsqrtss_f32, aarch64_neon_frsqrts, Add1ArgType),
3634  NEONMAP1(vsha1cq_u32, aarch64_crypto_sha1c, 0),
3635  NEONMAP1(vsha1h_u32, aarch64_crypto_sha1h, 0),
3636  NEONMAP1(vsha1mq_u32, aarch64_crypto_sha1m, 0),
3637  NEONMAP1(vsha1pq_u32, aarch64_crypto_sha1p, 0),
3638  NEONMAP1(vshld_s64, aarch64_neon_sshl, Add1ArgType),
3639  NEONMAP1(vshld_u64, aarch64_neon_ushl, Add1ArgType),
3640  NEONMAP1(vslid_n_s64, aarch64_neon_vsli, Vectorize1ArgType),
3641  NEONMAP1(vslid_n_u64, aarch64_neon_vsli, Vectorize1ArgType),
3642  NEONMAP1(vsqaddb_u8, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3643  NEONMAP1(vsqaddd_u64, aarch64_neon_usqadd, Add1ArgType),
3644  NEONMAP1(vsqaddh_u16, aarch64_neon_usqadd, Vectorize1ArgType | Use64BitVectors),
3645  NEONMAP1(vsqadds_u32, aarch64_neon_usqadd, Add1ArgType),
3646  NEONMAP1(vsrid_n_s64, aarch64_neon_vsri, Vectorize1ArgType),
3647  NEONMAP1(vsrid_n_u64, aarch64_neon_vsri, Vectorize1ArgType),
3648  NEONMAP1(vuqaddb_s8, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3649  NEONMAP1(vuqaddd_s64, aarch64_neon_suqadd, Add1ArgType),
3650  NEONMAP1(vuqaddh_s16, aarch64_neon_suqadd, Vectorize1ArgType | Use64BitVectors),
3651  NEONMAP1(vuqadds_s32, aarch64_neon_suqadd, Add1ArgType),
3652 };
3653 
3654 #undef NEONMAP0
3655 #undef NEONMAP1
3656 #undef NEONMAP2
3657 
3659 
3662 
3663 
3664 static const NeonIntrinsicInfo *
3666  unsigned BuiltinID, bool &MapProvenSorted) {
3667 
3668 #ifndef NDEBUG
3669  if (!MapProvenSorted) {
3670  assert(std::is_sorted(std::begin(IntrinsicMap), std::end(IntrinsicMap)));
3671  MapProvenSorted = true;
3672  }
3673 #endif
3674 
3675  const NeonIntrinsicInfo *Builtin =
3676  std::lower_bound(IntrinsicMap.begin(), IntrinsicMap.end(), BuiltinID);
3677 
3678  if (Builtin != IntrinsicMap.end() && Builtin->BuiltinID == BuiltinID)
3679  return Builtin;
3680 
3681  return nullptr;
3682 }
3683 
3684 Function *CodeGenFunction::LookupNeonLLVMIntrinsic(unsigned IntrinsicID,
3685  unsigned Modifier,
3686  llvm::Type *ArgType,
3687  const CallExpr *E) {
3688  int VectorSize = 0;
3689  if (Modifier & Use64BitVectors)
3690  VectorSize = 64;
3691  else if (Modifier & Use128BitVectors)
3692  VectorSize = 128;
3693 
3694  // Return type.
3696  if (Modifier & AddRetType) {
3698  if (Modifier & VectorizeRetType)
3699  Ty = llvm::VectorType::get(
3700  Ty, VectorSize ? VectorSize / Ty->getPrimitiveSizeInBits() : 1);
3701 
3702  Tys.push_back(Ty);
3703  }
3704 
3705  // Arguments.
3706  if (Modifier & VectorizeArgTypes) {
3707  int Elts = VectorSize ? VectorSize / ArgType->getPrimitiveSizeInBits() : 1;
3708  ArgType = llvm::VectorType::get(ArgType, Elts);
3709  }
3710 
3711  if (Modifier & (Add1ArgType | Add2ArgTypes))
3712  Tys.push_back(ArgType);
3713 
3714  if (Modifier & Add2ArgTypes)
3715  Tys.push_back(ArgType);
3716 
3717  if (Modifier & InventFloatType)
3718  Tys.push_back(FloatTy);
3719 
3720  return CGM.getIntrinsic(IntrinsicID, Tys);
3721 }
3722 
3724  const NeonIntrinsicInfo &SISDInfo,
3726  const CallExpr *E) {
3727  unsigned BuiltinID = SISDInfo.BuiltinID;
3728  unsigned int Int = SISDInfo.LLVMIntrinsic;
3729  unsigned Modifier = SISDInfo.TypeModifier;
3730  const char *s = SISDInfo.NameHint;
3731 
3732  switch (BuiltinID) {
3733  case NEON::BI__builtin_neon_vcled_s64:
3734  case NEON::BI__builtin_neon_vcled_u64:
3735  case NEON::BI__builtin_neon_vcles_f32:
3736  case NEON::BI__builtin_neon_vcled_f64:
3737  case NEON::BI__builtin_neon_vcltd_s64:
3738  case NEON::BI__builtin_neon_vcltd_u64:
3739  case NEON::BI__builtin_neon_vclts_f32:
3740  case NEON::BI__builtin_neon_vcltd_f64:
3741  case NEON::BI__builtin_neon_vcales_f32:
3742  case NEON::BI__builtin_neon_vcaled_f64:
3743  case NEON::BI__builtin_neon_vcalts_f32:
3744  case NEON::BI__builtin_neon_vcaltd_f64:
3745  // Only one direction of comparisons actually exist, cmle is actually a cmge
3746  // with swapped operands. The table gives us the right intrinsic but we
3747  // still need to do the swap.
3748  std::swap(Ops[0], Ops[1]);
3749  break;
3750  }
3751 
3752  assert(Int && "Generic code assumes a valid intrinsic");
3753 
3754  // Determine the type(s) of this overloaded AArch64 intrinsic.
3755  const Expr *Arg = E->getArg(0);
3756  llvm::Type *ArgTy = CGF.ConvertType(Arg->getType());
3757  Function *F = CGF.LookupNeonLLVMIntrinsic(Int, Modifier, ArgTy, E);
3758 
3759  int j = 0;
3760  ConstantInt *C0 = ConstantInt::get(CGF.SizeTy, 0);
3761  for (Function::const_arg_iterator ai = F->arg_begin(), ae = F->arg_end();
3762  ai != ae; ++ai, ++j) {
3763  llvm::Type *ArgTy = ai->getType();
3764  if (Ops[j]->getType()->getPrimitiveSizeInBits() ==
3765  ArgTy->getPrimitiveSizeInBits())
3766  continue;
3767 
3768  assert(ArgTy->isVectorTy() && !Ops[j]->getType()->isVectorTy());
3769  // The constant argument to an _n_ intrinsic always has Int32Ty, so truncate
3770  // it before inserting.
3771  Ops[j] =
3772  CGF.Builder.CreateTruncOrBitCast(Ops[j], ArgTy->getVectorElementType());
3773  Ops[j] =
3774  CGF.Builder.CreateInsertElement(UndefValue::get(ArgTy), Ops[j], C0);
3775  }
3776 
3777  Value *Result = CGF.EmitNeonCall(F, Ops, s);
3778  llvm::Type *ResultType = CGF.ConvertType(E->getType());
3779  if (ResultType->getPrimitiveSizeInBits() <
3780  Result->getType()->getPrimitiveSizeInBits())
3781  return CGF.Builder.CreateExtractElement(Result, C0);
3782 
3783  return CGF.Builder.CreateBitCast(Result, ResultType, s);
3784 }
3785 
3787  unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic,
3788  const char *NameHint, unsigned Modifier, const CallExpr *E,
3789  SmallVectorImpl<llvm::Value *> &Ops, Address PtrOp0, Address PtrOp1) {
3790  // Get the last argument, which specifies the vector type.
3791  llvm::APSInt NeonTypeConst;
3792  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
3793  if (!Arg->isIntegerConstantExpr(NeonTypeConst, getContext()))
3794  return nullptr;
3795 
3796  // Determine the type of this overloaded NEON intrinsic.
3797  NeonTypeFlags Type(NeonTypeConst.getZExtValue());
3798  bool Usgn = Type.isUnsigned();
3799  bool Quad = Type.isQuad();
3800 
3801  llvm::VectorType *VTy = GetNeonType(this, Type);
3802  llvm::Type *Ty = VTy;
3803  if (!Ty)
3804  return nullptr;
3805 
3806  auto getAlignmentValue32 = [&](Address addr) -> Value* {
3807  return Builder.getInt32(addr.getAlignment().getQuantity());
3808  };
3809 
3810  unsigned Int = LLVMIntrinsic;
3811  if ((Modifier & UnsignedAlts) && !Usgn)
3812  Int = AltLLVMIntrinsic;
3813 
3814  switch (BuiltinID) {
3815  default: break;
3816  case NEON::BI__builtin_neon_vabs_v:
3817  case NEON::BI__builtin_neon_vabsq_v:
3818  if (VTy->getElementType()->isFloatingPointTy())
3819  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::fabs, Ty), Ops, "vabs");
3820  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), Ops, "vabs");
3821  case NEON::BI__builtin_neon_vaddhn_v: {
3822  llvm::VectorType *SrcTy =
3823  llvm::VectorType::getExtendedElementVectorType(VTy);
3824 
3825  // %sum = add <4 x i32> %lhs, %rhs
3826  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
3827  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
3828  Ops[0] = Builder.CreateAdd(Ops[0], Ops[1], "vaddhn");
3829 
3830  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
3831  Constant *ShiftAmt =
3832  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
3833  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vaddhn");
3834 
3835  // %res = trunc <4 x i32> %high to <4 x i16>
3836  return Builder.CreateTrunc(Ops[0], VTy, "vaddhn");
3837  }
3838  case NEON::BI__builtin_neon_vcale_v:
3839  case NEON::BI__builtin_neon_vcaleq_v:
3840  case NEON::BI__builtin_neon_vcalt_v:
3841  case NEON::BI__builtin_neon_vcaltq_v:
3842  std::swap(Ops[0], Ops[1]);
3843  LLVM_FALLTHROUGH;
3844  case NEON::BI__builtin_neon_vcage_v:
3845  case NEON::BI__builtin_neon_vcageq_v:
3846  case NEON::BI__builtin_neon_vcagt_v:
3847  case NEON::BI__builtin_neon_vcagtq_v: {
3848  llvm::Type *VecFlt = llvm::VectorType::get(
3849  VTy->getScalarSizeInBits() == 32 ? FloatTy : DoubleTy,
3850  VTy->getNumElements());
3851  llvm::Type *Tys[] = { VTy, VecFlt };
3852  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3853  return EmitNeonCall(F, Ops, NameHint);
3854  }
3855  case NEON::BI__builtin_neon_vclz_v:
3856  case NEON::BI__builtin_neon_vclzq_v:
3857  // We generate target-independent intrinsic, which needs a second argument
3858  // for whether or not clz of zero is undefined; on ARM it isn't.
3859  Ops.push_back(Builder.getInt1(getTarget().isCLZForZeroUndef()));
3860  break;
3861  case NEON::BI__builtin_neon_vcvt_f32_v:
3862  case NEON::BI__builtin_neon_vcvtq_f32_v:
3863  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3864  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float32, false, Quad));
3865  return Usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
3866  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
3867  case NEON::BI__builtin_neon_vcvt_n_f32_v:
3868  case NEON::BI__builtin_neon_vcvt_n_f64_v:
3869  case NEON::BI__builtin_neon_vcvtq_n_f32_v:
3870  case NEON::BI__builtin_neon_vcvtq_n_f64_v: {
3871  llvm::Type *Tys[2] = { GetFloatNeonType(this, Type), Ty };
3872  Int = Usgn ? LLVMIntrinsic : AltLLVMIntrinsic;
3873  Function *F = CGM.getIntrinsic(Int, Tys);
3874  return EmitNeonCall(F, Ops, "vcvt_n");
3875  }
3876  case NEON::BI__builtin_neon_vcvt_n_s32_v:
3877  case NEON::BI__builtin_neon_vcvt_n_u32_v:
3878  case NEON::BI__builtin_neon_vcvt_n_s64_v:
3879  case NEON::BI__builtin_neon_vcvt_n_u64_v:
3880  case NEON::BI__builtin_neon_vcvtq_n_s32_v:
3881  case NEON::BI__builtin_neon_vcvtq_n_u32_v:
3882  case NEON::BI__builtin_neon_vcvtq_n_s64_v:
3883  case NEON::BI__builtin_neon_vcvtq_n_u64_v: {
3884  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3885  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3886  return EmitNeonCall(F, Ops, "vcvt_n");
3887  }
3888  case NEON::BI__builtin_neon_vcvt_s32_v:
3889  case NEON::BI__builtin_neon_vcvt_u32_v:
3890  case NEON::BI__builtin_neon_vcvt_s64_v:
3891  case NEON::BI__builtin_neon_vcvt_u64_v:
3892  case NEON::BI__builtin_neon_vcvtq_s32_v:
3893  case NEON::BI__builtin_neon_vcvtq_u32_v:
3894  case NEON::BI__builtin_neon_vcvtq_s64_v:
3895  case NEON::BI__builtin_neon_vcvtq_u64_v: {
3896  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
3897  return Usgn ? Builder.CreateFPToUI(Ops[0], Ty, "vcvt")
3898  : Builder.CreateFPToSI(Ops[0], Ty, "vcvt");
3899  }
3900  case NEON::BI__builtin_neon_vcvta_s32_v:
3901  case NEON::BI__builtin_neon_vcvta_s64_v:
3902  case NEON::BI__builtin_neon_vcvta_u32_v:
3903  case NEON::BI__builtin_neon_vcvta_u64_v:
3904  case NEON::BI__builtin_neon_vcvtaq_s32_v:
3905  case NEON::BI__builtin_neon_vcvtaq_s64_v:
3906  case NEON::BI__builtin_neon_vcvtaq_u32_v:
3907  case NEON::BI__builtin_neon_vcvtaq_u64_v:
3908  case NEON::BI__builtin_neon_vcvtn_s32_v:
3909  case NEON::BI__builtin_neon_vcvtn_s64_v:
3910  case NEON::BI__builtin_neon_vcvtn_u32_v:
3911  case NEON::BI__builtin_neon_vcvtn_u64_v:
3912  case NEON::BI__builtin_neon_vcvtnq_s32_v:
3913  case NEON::BI__builtin_neon_vcvtnq_s64_v:
3914  case NEON::BI__builtin_neon_vcvtnq_u32_v:
3915  case NEON::BI__builtin_neon_vcvtnq_u64_v:
3916  case NEON::BI__builtin_neon_vcvtp_s32_v:
3917  case NEON::BI__builtin_neon_vcvtp_s64_v:
3918  case NEON::BI__builtin_neon_vcvtp_u32_v:
3919  case NEON::BI__builtin_neon_vcvtp_u64_v:
3920  case NEON::BI__builtin_neon_vcvtpq_s32_v:
3921  case NEON::BI__builtin_neon_vcvtpq_s64_v:
3922  case NEON::BI__builtin_neon_vcvtpq_u32_v:
3923  case NEON::BI__builtin_neon_vcvtpq_u64_v:
3924  case NEON::BI__builtin_neon_vcvtm_s32_v:
3925  case NEON::BI__builtin_neon_vcvtm_s64_v:
3926  case NEON::BI__builtin_neon_vcvtm_u32_v:
3927  case NEON::BI__builtin_neon_vcvtm_u64_v:
3928  case NEON::BI__builtin_neon_vcvtmq_s32_v:
3929  case NEON::BI__builtin_neon_vcvtmq_s64_v:
3930  case NEON::BI__builtin_neon_vcvtmq_u32_v:
3931  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
3932  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
3933  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, NameHint);
3934  }
3935  case NEON::BI__builtin_neon_vext_v:
3936  case NEON::BI__builtin_neon_vextq_v: {
3937  int CV = cast<ConstantInt>(Ops[2])->getSExtValue();
3938  SmallVector<uint32_t, 16> Indices;
3939  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
3940  Indices.push_back(i+CV);
3941 
3942  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3943  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3944  return Builder.CreateShuffleVector(Ops[0], Ops[1], Indices, "vext");
3945  }
3946  case NEON::BI__builtin_neon_vfma_v:
3947  case NEON::BI__builtin_neon_vfmaq_v: {
3949  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3950  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
3951  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
3952 
3953  // NEON intrinsic puts accumulator first, unlike the LLVM fma.
3954  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
3955  }
3956  case NEON::BI__builtin_neon_vld1_v:
3957  case NEON::BI__builtin_neon_vld1q_v: {
3958  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3959  Ops.push_back(getAlignmentValue32(PtrOp0));
3960  return EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Tys), Ops, "vld1");
3961  }
3962  case NEON::BI__builtin_neon_vld2_v:
3963  case NEON::BI__builtin_neon_vld2q_v:
3964  case NEON::BI__builtin_neon_vld3_v:
3965  case NEON::BI__builtin_neon_vld3q_v:
3966  case NEON::BI__builtin_neon_vld4_v:
3967  case NEON::BI__builtin_neon_vld4q_v: {
3968  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3969  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3970  Value *Align = getAlignmentValue32(PtrOp1);
3971  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, NameHint);
3972  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3973  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
3974  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
3975  }
3976  case NEON::BI__builtin_neon_vld1_dup_v:
3977  case NEON::BI__builtin_neon_vld1q_dup_v: {
3978  Value *V = UndefValue::get(Ty);
3979  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
3980  PtrOp0 = Builder.CreateBitCast(PtrOp0, Ty);
3981  LoadInst *Ld = Builder.CreateLoad(PtrOp0);
3982  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
3983  Ops[0] = Builder.CreateInsertElement(V, Ld, CI);
3984  return EmitNeonSplat(Ops[0], CI);
3985  }
3986  case NEON::BI__builtin_neon_vld2_lane_v:
3987  case NEON::BI__builtin_neon_vld2q_lane_v:
3988  case NEON::BI__builtin_neon_vld3_lane_v:
3989  case NEON::BI__builtin_neon_vld3q_lane_v:
3990  case NEON::BI__builtin_neon_vld4_lane_v:
3991  case NEON::BI__builtin_neon_vld4q_lane_v: {
3992  llvm::Type *Tys[] = {Ty, Int8PtrTy};
3993  Function *F = CGM.getIntrinsic(LLVMIntrinsic, Tys);
3994  for (unsigned I = 2; I < Ops.size() - 1; ++I)
3995  Ops[I] = Builder.CreateBitCast(Ops[I], Ty);
3996  Ops.push_back(getAlignmentValue32(PtrOp1));
3997  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), NameHint);
3998  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
3999  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4000  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
4001  }
4002  case NEON::BI__builtin_neon_vmovl_v: {
4003  llvm::Type *DTy =llvm::VectorType::getTruncatedElementVectorType(VTy);
4004  Ops[0] = Builder.CreateBitCast(Ops[0], DTy);
4005  if (Usgn)
4006  return Builder.CreateZExt(Ops[0], Ty, "vmovl");
4007  return Builder.CreateSExt(Ops[0], Ty, "vmovl");
4008  }
4009  case NEON::BI__builtin_neon_vmovn_v: {
4010  llvm::Type *QTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4011  Ops[0] = Builder.CreateBitCast(Ops[0], QTy);
4012  return Builder.CreateTrunc(Ops[0], Ty, "vmovn");
4013  }
4014  case NEON::BI__builtin_neon_vmull_v:
4015  // FIXME: the integer vmull operations could be emitted in terms of pure
4016  // LLVM IR (2 exts followed by a mul). Unfortunately LLVM has a habit of
4017  // hoisting the exts outside loops. Until global ISel comes along that can
4018  // see through such movement this leads to bad CodeGen. So we need an
4019  // intrinsic for now.
4020  Int = Usgn ? Intrinsic::arm_neon_vmullu : Intrinsic::arm_neon_vmulls;
4021  Int = Type.isPoly() ? (unsigned)Intrinsic::arm_neon_vmullp : Int;
4022  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
4023  case NEON::BI__builtin_neon_vpadal_v:
4024  case NEON::BI__builtin_neon_vpadalq_v: {
4025  // The source operand type has twice as many elements of half the size.
4026  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4027  llvm::Type *EltTy =
4028  llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4029  llvm::Type *NarrowTy =
4030  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4031  llvm::Type *Tys[2] = { Ty, NarrowTy };
4032  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, NameHint);
4033  }
4034  case NEON::BI__builtin_neon_vpaddl_v:
4035  case NEON::BI__builtin_neon_vpaddlq_v: {
4036  // The source operand type has twice as many elements of half the size.
4037  unsigned EltBits = VTy->getElementType()->getPrimitiveSizeInBits();
4038  llvm::Type *EltTy = llvm::IntegerType::get(getLLVMContext(), EltBits / 2);
4039  llvm::Type *NarrowTy =
4040  llvm::VectorType::get(EltTy, VTy->getNumElements() * 2);
4041  llvm::Type *Tys[2] = { Ty, NarrowTy };
4042  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vpaddl");
4043  }
4044  case NEON::BI__builtin_neon_vqdmlal_v:
4045  case NEON::BI__builtin_neon_vqdmlsl_v: {
4046  SmallVector<Value *, 2> MulOps(Ops.begin() + 1, Ops.end());
4047  Ops[1] =
4048  EmitNeonCall(CGM.getIntrinsic(LLVMIntrinsic, Ty), MulOps, "vqdmlal");
4049  Ops.resize(2);
4050  return EmitNeonCall(CGM.getIntrinsic(AltLLVMIntrinsic, Ty), Ops, NameHint);
4051  }
4052  case NEON::BI__builtin_neon_vqshl_n_v:
4053  case NEON::BI__builtin_neon_vqshlq_n_v:
4054  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshl_n",
4055  1, false);
4056  case NEON::BI__builtin_neon_vqshlu_n_v:
4057  case NEON::BI__builtin_neon_vqshluq_n_v:
4058  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshlu_n",
4059  1, false);
4060  case NEON::BI__builtin_neon_vrecpe_v:
4061  case NEON::BI__builtin_neon_vrecpeq_v:
4062  case NEON::BI__builtin_neon_vrsqrte_v:
4063  case NEON::BI__builtin_neon_vrsqrteq_v:
4064  Int = Ty->isFPOrFPVectorTy() ? LLVMIntrinsic : AltLLVMIntrinsic;
4065  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, NameHint);
4066 
4067  case NEON::BI__builtin_neon_vrshr_n_v:
4068  case NEON::BI__builtin_neon_vrshrq_n_v:
4069  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshr_n",
4070  1, true);
4071  case NEON::BI__builtin_neon_vshl_n_v:
4072  case NEON::BI__builtin_neon_vshlq_n_v:
4073  Ops[1] = EmitNeonShiftVector(Ops[1], Ty, false);
4074  return Builder.CreateShl(Builder.CreateBitCast(Ops[0],Ty), Ops[1],
4075  "vshl_n");
4076  case NEON::BI__builtin_neon_vshll_n_v: {
4077  llvm::Type *SrcTy = llvm::VectorType::getTruncatedElementVectorType(VTy);
4078  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4079  if (Usgn)
4080  Ops[0] = Builder.CreateZExt(Ops[0], VTy);
4081  else
4082  Ops[0] = Builder.CreateSExt(Ops[0], VTy);
4083  Ops[1] = EmitNeonShiftVector(Ops[1], VTy, false);
4084  return Builder.CreateShl(Ops[0], Ops[1], "vshll_n");
4085  }
4086  case NEON::BI__builtin_neon_vshrn_n_v: {
4087  llvm::Type *SrcTy = llvm::VectorType::getExtendedElementVectorType(VTy);
4088  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4089  Ops[1] = EmitNeonShiftVector(Ops[1], SrcTy, false);
4090  if (Usgn)
4091  Ops[0] = Builder.CreateLShr(Ops[0], Ops[1]);
4092  else
4093  Ops[0] = Builder.CreateAShr(Ops[0], Ops[1]);
4094  return Builder.CreateTrunc(Ops[0], Ty, "vshrn_n");
4095  }
4096  case NEON::BI__builtin_neon_vshr_n_v:
4097  case NEON::BI__builtin_neon_vshrq_n_v:
4098  return EmitNeonRShiftImm(Ops[0], Ops[1], Ty, Usgn, "vshr_n");
4099  case NEON::BI__builtin_neon_vst1_v:
4100  case NEON::BI__builtin_neon_vst1q_v:
4101  case NEON::BI__builtin_neon_vst2_v:
4102  case NEON::BI__builtin_neon_vst2q_v:
4103  case NEON::BI__builtin_neon_vst3_v:
4104  case NEON::BI__builtin_neon_vst3q_v:
4105  case NEON::BI__builtin_neon_vst4_v:
4106  case NEON::BI__builtin_neon_vst4q_v:
4107  case NEON::BI__builtin_neon_vst2_lane_v:
4108  case NEON::BI__builtin_neon_vst2q_lane_v:
4109  case NEON::BI__builtin_neon_vst3_lane_v:
4110  case NEON::BI__builtin_neon_vst3q_lane_v:
4111  case NEON::BI__builtin_neon_vst4_lane_v:
4112  case NEON::BI__builtin_neon_vst4q_lane_v: {
4113  llvm::Type *Tys[] = {Int8PtrTy, Ty};
4114  Ops.push_back(getAlignmentValue32(PtrOp0));
4115  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
4116  }
4117  case NEON::BI__builtin_neon_vsubhn_v: {
4118  llvm::VectorType *SrcTy =
4119  llvm::VectorType::getExtendedElementVectorType(VTy);
4120 
4121  // %sum = add <4 x i32> %lhs, %rhs
4122  Ops[0] = Builder.CreateBitCast(Ops[0], SrcTy);
4123  Ops[1] = Builder.CreateBitCast(Ops[1], SrcTy);
4124  Ops[0] = Builder.CreateSub(Ops[0], Ops[1], "vsubhn");
4125 
4126  // %high = lshr <4 x i32> %sum, <i32 16, i32 16, i32 16, i32 16>
4127  Constant *ShiftAmt =
4128  ConstantInt::get(SrcTy, SrcTy->getScalarSizeInBits() / 2);
4129  Ops[0] = Builder.CreateLShr(Ops[0], ShiftAmt, "vsubhn");
4130 
4131  // %res = trunc <4 x i32> %high to <4 x i16>
4132  return Builder.CreateTrunc(Ops[0], VTy, "vsubhn");
4133  }
4134  case NEON::BI__builtin_neon_vtrn_v:
4135  case NEON::BI__builtin_neon_vtrnq_v: {
4136  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4137  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4138  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4139  Value *SV = nullptr;
4140 
4141  for (unsigned vi = 0; vi != 2; ++vi) {
4142  SmallVector<uint32_t, 16> Indices;
4143  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4144  Indices.push_back(i+vi);
4145  Indices.push_back(i+e+vi);
4146  }
4147  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4148  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
4149  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4150  }
4151  return SV;
4152  }
4153  case NEON::BI__builtin_neon_vtst_v:
4154  case NEON::BI__builtin_neon_vtstq_v: {
4155  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
4156  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4157  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
4158  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
4159  ConstantAggregateZero::get(Ty));
4160  return Builder.CreateSExt(Ops[0], Ty, "vtst");
4161  }
4162  case NEON::BI__builtin_neon_vuzp_v:
4163  case NEON::BI__builtin_neon_vuzpq_v: {
4164  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4165  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4166  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4167  Value *SV = nullptr;
4168 
4169  for (unsigned vi = 0; vi != 2; ++vi) {
4170  SmallVector<uint32_t, 16> Indices;
4171  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
4172  Indices.push_back(2*i+vi);
4173 
4174  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4175  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
4176  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4177  }
4178  return SV;
4179  }
4180  case NEON::BI__builtin_neon_vzip_v:
4181  case NEON::BI__builtin_neon_vzipq_v: {
4182  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
4183  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4184  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
4185  Value *SV = nullptr;
4186 
4187  for (unsigned vi = 0; vi != 2; ++vi) {
4188  SmallVector<uint32_t, 16> Indices;
4189  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
4190  Indices.push_back((i + vi*e) >> 1);
4191  Indices.push_back(((i + vi*e) >> 1)+e);
4192  }
4193  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
4194  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
4195  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
4196  }
4197  return SV;
4198  }
4199  }
4200 
4201  assert(Int && "Expected valid intrinsic number");
4202 
4203  // Determine the type(s) of this overloaded AArch64 intrinsic.
4204  Function *F = LookupNeonLLVMIntrinsic(Int, Modifier, Ty, E);
4205 
4206  Value *Result = EmitNeonCall(F, Ops, NameHint);
4207  llvm::Type *ResultType = ConvertType(E->getType());
4208  // AArch64 intrinsic one-element vector type cast to
4209  // scalar type expected by the builtin
4210  return Builder.CreateBitCast(Result, ResultType, NameHint);
4211 }
4212 
4214  Value *Op, llvm::Type *Ty, const CmpInst::Predicate Fp,
4215  const CmpInst::Predicate Ip, const Twine &Name) {
4216  llvm::Type *OTy = Op->getType();
4217 
4218  // FIXME: this is utterly horrific. We should not be looking at previous
4219  // codegen context to find out what needs doing. Unfortunately TableGen
4220  // currently gives us exactly the same calls for vceqz_f32 and vceqz_s32
4221  // (etc).
4222  if (BitCastInst *BI = dyn_cast<BitCastInst>(Op))
4223  OTy = BI->getOperand(0)->getType();
4224 
4225  Op = Builder.CreateBitCast(Op, OTy);
4226  if (OTy->getScalarType()->isFloatingPointTy()) {
4227  Op = Builder.CreateFCmp(Fp, Op, Constant::getNullValue(OTy));
4228  } else {
4229  Op = Builder.CreateICmp(Ip, Op, Constant::getNullValue(OTy));
4230  }
4231  return Builder.CreateSExt(Op, Ty, Name);
4232 }
4233 
4235  Value *ExtOp, Value *IndexOp,
4236  llvm::Type *ResTy, unsigned IntID,
4237  const char *Name) {
4238  SmallVector<Value *, 2> TblOps;
4239  if (ExtOp)
4240  TblOps.push_back(ExtOp);
4241 
4242  // Build a vector containing sequential number like (0, 1, 2, ..., 15)
4243  SmallVector<uint32_t, 16> Indices;
4244  llvm::VectorType *TblTy = cast<llvm::VectorType>(Ops[0]->getType());
4245  for (unsigned i = 0, e = TblTy->getNumElements(); i != e; ++i) {
4246  Indices.push_back(2*i);
4247  Indices.push_back(2*i+1);
4248  }
4249 
4250  int PairPos = 0, End = Ops.size() - 1;
4251  while (PairPos < End) {
4252  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4253  Ops[PairPos+1], Indices,
4254  Name));
4255  PairPos += 2;
4256  }
4257 
4258  // If there's an odd number of 64-bit lookup table, fill the high 64-bit
4259  // of the 128-bit lookup table with zero.
4260  if (PairPos == End) {
4261  Value *ZeroTbl = ConstantAggregateZero::get(TblTy);
4262  TblOps.push_back(CGF.Builder.CreateShuffleVector(Ops[PairPos],
4263  ZeroTbl, Indices, Name));
4264  }
4265 
4266  Function *TblF;
4267  TblOps.push_back(IndexOp);
4268  TblF = CGF.CGM.getIntrinsic(IntID, ResTy);
4269 
4270  return CGF.EmitNeonCall(TblF, TblOps, Name);
4271 }
4272 
4273 Value *CodeGenFunction::GetValueForARMHint(unsigned BuiltinID) {
4274  unsigned Value;
4275  switch (BuiltinID) {
4276  default:
4277  return nullptr;
4278  case ARM::BI__builtin_arm_nop:
4279  Value = 0;
4280  break;
4281  case ARM::BI__builtin_arm_yield:
4282  case ARM::BI__yield:
4283  Value = 1;
4284  break;
4285  case ARM::BI__builtin_arm_wfe:
4286  case ARM::BI__wfe:
4287  Value = 2;
4288  break;
4289  case ARM::BI__builtin_arm_wfi:
4290  case ARM::BI__wfi:
4291  Value = 3;
4292  break;
4293  case ARM::BI__builtin_arm_sev:
4294  case ARM::BI__sev:
4295  Value = 4;
4296  break;
4297  case ARM::BI__builtin_arm_sevl:
4298  case ARM::BI__sevl:
4299  Value = 5;
4300  break;
4301  }
4302 
4303  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_hint),
4304  llvm::ConstantInt::get(Int32Ty, Value));
4305 }
4306 
4307 // Generates the IR for the read/write special register builtin,
4308 // ValueType is the type of the value that is to be written or read,
4309 // RegisterType is the type of the register being written to or read from.
4311  const CallExpr *E,
4312  llvm::Type *RegisterType,
4313  llvm::Type *ValueType,
4314  bool IsRead,
4315  StringRef SysReg = "") {
4316  // write and register intrinsics only support 32 and 64 bit operations.
4317  assert((RegisterType->isIntegerTy(32) || RegisterType->isIntegerTy(64))
4318  && "Unsupported size for register.");
4319 
4321  CodeGen::CodeGenModule &CGM = CGF.CGM;
4322  LLVMContext &Context = CGM.getLLVMContext();
4323 
4324  if (SysReg.empty()) {
4325  const Expr *SysRegStrExpr = E->getArg(0)->IgnoreParenCasts();
4326  SysReg = cast<clang::StringLiteral>(SysRegStrExpr)->getString();
4327  }
4328 
4329  llvm::Metadata *Ops[] = { llvm::MDString::get(Context, SysReg) };
4330  llvm::MDNode *RegName = llvm::MDNode::get(Context, Ops);
4331  llvm::Value *Metadata = llvm::MetadataAsValue::get(Context, RegName);
4332 
4333  llvm::Type *Types[] = { RegisterType };
4334 
4335  bool MixedTypes = RegisterType->isIntegerTy(64) && ValueType->isIntegerTy(32);
4336  assert(!(RegisterType->isIntegerTy(32) && ValueType->isIntegerTy(64))
4337  && "Can't fit 64-bit value in 32-bit register");
4338 
4339  if (IsRead) {
4340  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::read_register, Types);
4341  llvm::Value *Call = Builder.CreateCall(F, Metadata);
4342 
4343  if (MixedTypes)
4344  // Read into 64 bit register and then truncate result to 32 bit.
4345  return Builder.CreateTrunc(Call, ValueType);
4346 
4347  if (ValueType->isPointerTy())
4348  // Have i32/i64 result (Call) but want to return a VoidPtrTy (i8*).
4349  return Builder.CreateIntToPtr(Call, ValueType);
4350 
4351  return Call;
4352  }
4353 
4354  llvm::Value *F = CGM.getIntrinsic(llvm::Intrinsic::write_register, Types);
4355  llvm::Value *ArgValue = CGF.EmitScalarExpr(E->getArg(1));
4356  if (MixedTypes) {
4357  // Extend 32 bit write value to 64 bit to pass to write.
4358  ArgValue = Builder.CreateZExt(ArgValue, RegisterType);
4359  return Builder.CreateCall(F, { Metadata, ArgValue });
4360  }
4361 
4362  if (ValueType->isPointerTy()) {
4363  // Have VoidPtrTy ArgValue but want to return an i32/i64.
4364  ArgValue = Builder.CreatePtrToInt(ArgValue, RegisterType);
4365  return Builder.CreateCall(F, { Metadata, ArgValue });
4366  }
4367 
4368  return Builder.CreateCall(F, { Metadata, ArgValue });
4369 }
4370 
4371 /// Return true if BuiltinID is an overloaded Neon intrinsic with an extra
4372 /// argument that specifies the vector type.
4373 static bool HasExtraNeonArgument(unsigned BuiltinID) {
4374  switch (BuiltinID) {
4375  default: break;
4376  case NEON::BI__builtin_neon_vget_lane_i8:
4377  case NEON::BI__builtin_neon_vget_lane_i16:
4378  case NEON::BI__builtin_neon_vget_lane_i32:
4379  case NEON::BI__builtin_neon_vget_lane_i64:
4380  case NEON::BI__builtin_neon_vget_lane_f32:
4381  case NEON::BI__builtin_neon_vgetq_lane_i8:
4382  case NEON::BI__builtin_neon_vgetq_lane_i16:
4383  case NEON::BI__builtin_neon_vgetq_lane_i32:
4384  case NEON::BI__builtin_neon_vgetq_lane_i64:
4385  case NEON::BI__builtin_neon_vgetq_lane_f32:
4386  case NEON::BI__builtin_neon_vset_lane_i8:
4387  case NEON::BI__builtin_neon_vset_lane_i16:
4388  case NEON::BI__builtin_neon_vset_lane_i32:
4389  case NEON::BI__builtin_neon_vset_lane_i64:
4390  case NEON::BI__builtin_neon_vset_lane_f32:
4391  case NEON::BI__builtin_neon_vsetq_lane_i8:
4392  case NEON::BI__builtin_neon_vsetq_lane_i16:
4393  case NEON::BI__builtin_neon_vsetq_lane_i32:
4394  case NEON::BI__builtin_neon_vsetq_lane_i64:
4395  case NEON::BI__builtin_neon_vsetq_lane_f32:
4396  case NEON::BI__builtin_neon_vsha1h_u32:
4397  case NEON::BI__builtin_neon_vsha1cq_u32:
4398  case NEON::BI__builtin_neon_vsha1pq_u32:
4399  case NEON::BI__builtin_neon_vsha1mq_u32:
4400  case ARM::BI_MoveToCoprocessor:
4401  case ARM::BI_MoveToCoprocessor2:
4402  return false;
4403  }
4404  return true;
4405 }
4406 
4407 Value *CodeGenFunction::EmitARMBuiltinExpr(unsigned BuiltinID,
4408  const CallExpr *E) {
4409  if (auto Hint = GetValueForARMHint(BuiltinID))
4410  return Hint;
4411 
4412  if (BuiltinID == ARM::BI__emit) {
4413  bool IsThumb = getTarget().getTriple().getArch() == llvm::Triple::thumb;
4414  llvm::FunctionType *FTy =
4415  llvm::FunctionType::get(VoidTy, /*Variadic=*/false);
4416 
4417  APSInt Value;
4418  if (!E->getArg(0)->EvaluateAsInt(Value, CGM.getContext()))
4419  llvm_unreachable("Sema will ensure that the parameter is constant");
4420 
4421  uint64_t ZExtValue = Value.zextOrTrunc(IsThumb ? 16 : 32).getZExtValue();
4422 
4423  llvm::InlineAsm *Emit =
4424  IsThumb ? InlineAsm::get(FTy, ".inst.n 0x" + utohexstr(ZExtValue), "",
4425  /*SideEffects=*/true)
4426  : InlineAsm::get(FTy, ".inst 0x" + utohexstr(ZExtValue), "",
4427  /*SideEffects=*/true);
4428 
4429  return Builder.CreateCall(Emit);
4430  }
4431 
4432  if (BuiltinID == ARM::BI__builtin_arm_dbg) {
4433  Value *Option = EmitScalarExpr(E->getArg(0));
4434  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_dbg), Option);
4435  }
4436 
4437  if (BuiltinID == ARM::BI__builtin_arm_prefetch) {
4438  Value *Address = EmitScalarExpr(E->getArg(0));
4439  Value *RW = EmitScalarExpr(E->getArg(1));
4440  Value *IsData = EmitScalarExpr(E->getArg(2));
4441 
4442  // Locality is not supported on ARM target
4443  Value *Locality = llvm::ConstantInt::get(Int32Ty, 3);
4444 
4445  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
4446  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
4447  }
4448 
4449  if (BuiltinID == ARM::BI__builtin_arm_rbit) {
4450  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
4451  return Builder.CreateCall(
4452  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
4453  }
4454 
4455  if (BuiltinID == ARM::BI__clear_cache) {
4456  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
4457  const FunctionDecl *FD = E->getDirectCallee();
4458  Value *Ops[2];
4459  for (unsigned i = 0; i < 2; i++)
4460  Ops[i] = EmitScalarExpr(E->getArg(i));
4461  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
4462  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
4463  StringRef Name = FD->getName();
4464  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
4465  }
4466 
4467  if (BuiltinID == ARM::BI__builtin_arm_mcrr ||
4468  BuiltinID == ARM::BI__builtin_arm_mcrr2) {
4469  Function *F;
4470 
4471  switch (BuiltinID) {
4472  default: llvm_unreachable("unexpected builtin");
4473  case ARM::BI__builtin_arm_mcrr:
4474  F = CGM.getIntrinsic(Intrinsic::arm_mcrr);
4475  break;
4476  case ARM::BI__builtin_arm_mcrr2:
4477  F = CGM.getIntrinsic(Intrinsic::arm_mcrr2);
4478  break;
4479  }
4480 
4481  // MCRR{2} instruction has 5 operands but
4482  // the intrinsic has 4 because Rt and Rt2
4483  // are represented as a single unsigned 64
4484  // bit integer in the intrinsic definition
4485  // but internally it's represented as 2 32
4486  // bit integers.
4487 
4488  Value *Coproc = EmitScalarExpr(E->getArg(0));
4489  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4490  Value *RtAndRt2 = EmitScalarExpr(E->getArg(2));
4491  Value *CRm = EmitScalarExpr(E->getArg(3));
4492 
4493  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4494  Value *Rt = Builder.CreateTruncOrBitCast(RtAndRt2, Int32Ty);
4495  Value *Rt2 = Builder.CreateLShr(RtAndRt2, C1);
4496  Rt2 = Builder.CreateTruncOrBitCast(Rt2, Int32Ty);
4497 
4498  return Builder.CreateCall(F, {Coproc, Opc1, Rt, Rt2, CRm});
4499  }
4500 
4501  if (BuiltinID == ARM::BI__builtin_arm_mrrc ||
4502  BuiltinID == ARM::BI__builtin_arm_mrrc2) {
4503  Function *F;
4504 
4505  switch (BuiltinID) {
4506  default: llvm_unreachable("unexpected builtin");
4507  case ARM::BI__builtin_arm_mrrc:
4508  F = CGM.getIntrinsic(Intrinsic::arm_mrrc);
4509  break;
4510  case ARM::BI__builtin_arm_mrrc2:
4511  F = CGM.getIntrinsic(Intrinsic::arm_mrrc2);
4512  break;
4513  }
4514 
4515  Value *Coproc = EmitScalarExpr(E->getArg(0));
4516  Value *Opc1 = EmitScalarExpr(E->getArg(1));
4517  Value *CRm = EmitScalarExpr(E->getArg(2));
4518  Value *RtAndRt2 = Builder.CreateCall(F, {Coproc, Opc1, CRm});
4519 
4520  // Returns an unsigned 64 bit integer, represented
4521  // as two 32 bit integers.
4522 
4523  Value *Rt = Builder.CreateExtractValue(RtAndRt2, 1);
4524  Value *Rt1 = Builder.CreateExtractValue(RtAndRt2, 0);
4525  Rt = Builder.CreateZExt(Rt, Int64Ty);
4526  Rt1 = Builder.CreateZExt(Rt1, Int64Ty);
4527 
4528  Value *ShiftCast = llvm::ConstantInt::get(Int64Ty, 32);
4529  RtAndRt2 = Builder.CreateShl(Rt, ShiftCast, "shl", true);
4530  RtAndRt2 = Builder.CreateOr(RtAndRt2, Rt1);
4531 
4532  return Builder.CreateBitCast(RtAndRt2, ConvertType(E->getType()));
4533  }
4534 
4535  if (BuiltinID == ARM::BI__builtin_arm_ldrexd ||
4536  ((BuiltinID == ARM::BI__builtin_arm_ldrex ||
4537  BuiltinID == ARM::BI__builtin_arm_ldaex) &&
4538  getContext().getTypeSize(E->getType()) == 64) ||
4539  BuiltinID == ARM::BI__ldrexd) {
4540  Function *F;
4541 
4542  switch (BuiltinID) {
4543  default: llvm_unreachable("unexpected builtin");
4544  case ARM::BI__builtin_arm_ldaex:
4545  F = CGM.getIntrinsic(Intrinsic::arm_ldaexd);
4546  break;
4547  case ARM::BI__builtin_arm_ldrexd:
4548  case ARM::BI__builtin_arm_ldrex:
4549  case ARM::BI__ldrexd:
4550  F = CGM.getIntrinsic(Intrinsic::arm_ldrexd);
4551  break;
4552  }
4553 
4554  Value *LdPtr = EmitScalarExpr(E->getArg(0));
4555  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
4556  "ldrexd");
4557 
4558  Value *Val0 = Builder.CreateExtractValue(Val, 1);
4559  Value *Val1 = Builder.CreateExtractValue(Val, 0);
4560  Val0 = Builder.CreateZExt(Val0, Int64Ty);
4561  Val1 = Builder.CreateZExt(Val1, Int64Ty);
4562 
4563  Value *ShiftCst = llvm::ConstantInt::get(Int64Ty, 32);
4564  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
4565  Val = Builder.CreateOr(Val, Val1);
4566  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
4567  }
4568 
4569  if (BuiltinID == ARM::BI__builtin_arm_ldrex ||
4570  BuiltinID == ARM::BI__builtin_arm_ldaex) {
4571  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
4572 
4573  QualType Ty = E->getType();
4574  llvm::Type *RealResTy = ConvertType(Ty);
4575  llvm::Type *PtrTy = llvm::IntegerType::get(
4576  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
4577  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
4578 
4579  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_ldaex
4580  ? Intrinsic::arm_ldaex
4581  : Intrinsic::arm_ldrex,
4582  PtrTy);
4583  Value *Val = Builder.CreateCall(F, LoadAddr, "ldrex");
4584 
4585  if (RealResTy->isPointerTy())
4586  return Builder.CreateIntToPtr(Val, RealResTy);
4587  else {
4588  llvm::Type *IntResTy = llvm::IntegerType::get(
4589  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
4590  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
4591  return Builder.CreateBitCast(Val, RealResTy);
4592  }
4593  }
4594 
4595  if (BuiltinID == ARM::BI__builtin_arm_strexd ||
4596  ((BuiltinID == ARM::BI__builtin_arm_stlex ||
4597  BuiltinID == ARM::BI__builtin_arm_strex) &&
4598  getContext().getTypeSize(E->getArg(0)->getType()) == 64)) {
4599  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4600  ? Intrinsic::arm_stlexd
4601  : Intrinsic::arm_strexd);
4602  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty);
4603 
4604  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
4605  Value *Val = EmitScalarExpr(E->getArg(0));
4606  Builder.CreateStore(Val, Tmp);
4607 
4608  Address LdPtr = Builder.CreateBitCast(Tmp,llvm::PointerType::getUnqual(STy));
4609  Val = Builder.CreateLoad(LdPtr);
4610 
4611  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
4612  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
4613  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), Int8PtrTy);
4614  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "strexd");
4615  }
4616 
4617  if (BuiltinID == ARM::BI__builtin_arm_strex ||
4618  BuiltinID == ARM::BI__builtin_arm_stlex) {
4619  Value *StoreVal = EmitScalarExpr(E->getArg(0));
4620  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
4621 
4622  QualType Ty = E->getArg(0)->getType();
4623  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
4624  getContext().getTypeSize(Ty));
4625  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
4626 
4627  if (StoreVal->getType()->isPointerTy())
4628  StoreVal = Builder.CreatePtrToInt(StoreVal, Int32Ty);
4629  else {
4630  llvm::Type *IntTy = llvm::IntegerType::get(
4631  getLLVMContext(),
4632  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
4633  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
4634  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int32Ty);
4635  }
4636 
4637  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI__builtin_arm_stlex
4638  ? Intrinsic::arm_stlex
4639  : Intrinsic::arm_strex,
4640  StoreAddr->getType());
4641  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "strex");
4642  }
4643 
4644  switch (BuiltinID) {
4645  case ARM::BI__iso_volatile_load8:
4646  case ARM::BI__iso_volatile_load16:
4647  case ARM::BI__iso_volatile_load32:
4648  case ARM::BI__iso_volatile_load64: {
4649  Value *Ptr = EmitScalarExpr(E->getArg(0));
4650  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4651  CharUnits LoadSize = getContext().getTypeSizeInChars(ElTy);
4652  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4653  LoadSize.getQuantity() * 8);
4654  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4655  llvm::LoadInst *Load =
4656  Builder.CreateAlignedLoad(Ptr, LoadSize);
4657  Load->setVolatile(true);
4658  return Load;
4659  }
4660  case ARM::BI__iso_volatile_store8:
4661  case ARM::BI__iso_volatile_store16:
4662  case ARM::BI__iso_volatile_store32:
4663  case ARM::BI__iso_volatile_store64: {
4664  Value *Ptr = EmitScalarExpr(E->getArg(0));
4665  Value *Value = EmitScalarExpr(E->getArg(1));
4666  QualType ElTy = E->getArg(0)->getType()->getPointeeType();
4667  CharUnits StoreSize = getContext().getTypeSizeInChars(ElTy);
4668  llvm::Type *ITy = llvm::IntegerType::get(getLLVMContext(),
4669  StoreSize.getQuantity() * 8);
4670  Ptr = Builder.CreateBitCast(Ptr, ITy->getPointerTo());
4671  llvm::StoreInst *Store =
4672  Builder.CreateAlignedStore(Value, Ptr,
4673  StoreSize);
4674  Store->setVolatile(true);
4675  return Store;
4676  }
4677  }
4678 
4679  if (BuiltinID == ARM::BI__builtin_arm_clrex) {
4680  Function *F = CGM.getIntrinsic(Intrinsic::arm_clrex);
4681  return Builder.CreateCall(F);
4682  }
4683 
4684  // CRC32
4685  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
4686  switch (BuiltinID) {
4687  case ARM::BI__builtin_arm_crc32b:
4688  CRCIntrinsicID = Intrinsic::arm_crc32b; break;
4689  case ARM::BI__builtin_arm_crc32cb:
4690  CRCIntrinsicID = Intrinsic::arm_crc32cb; break;
4691  case ARM::BI__builtin_arm_crc32h:
4692  CRCIntrinsicID = Intrinsic::arm_crc32h; break;
4693  case ARM::BI__builtin_arm_crc32ch:
4694  CRCIntrinsicID = Intrinsic::arm_crc32ch; break;
4695  case ARM::BI__builtin_arm_crc32w:
4696  case ARM::BI__builtin_arm_crc32d:
4697  CRCIntrinsicID = Intrinsic::arm_crc32w; break;
4698  case ARM::BI__builtin_arm_crc32cw:
4699  case ARM::BI__builtin_arm_crc32cd:
4700  CRCIntrinsicID = Intrinsic::arm_crc32cw; break;
4701  }
4702 
4703  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
4704  Value *Arg0 = EmitScalarExpr(E->getArg(0));
4705  Value *Arg1 = EmitScalarExpr(E->getArg(1));
4706 
4707  // crc32{c,}d intrinsics are implemnted as two calls to crc32{c,}w
4708  // intrinsics, hence we need different codegen for these cases.
4709  if (BuiltinID == ARM::BI__builtin_arm_crc32d ||
4710  BuiltinID == ARM::BI__builtin_arm_crc32cd) {
4711  Value *C1 = llvm::ConstantInt::get(Int64Ty, 32);
4712  Value *Arg1a = Builder.CreateTruncOrBitCast(Arg1, Int32Ty);
4713  Value *Arg1b = Builder.CreateLShr(Arg1, C1);
4714  Arg1b = Builder.CreateTruncOrBitCast(Arg1b, Int32Ty);
4715 
4716  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4717  Value *Res = Builder.CreateCall(F, {Arg0, Arg1a});
4718  return Builder.CreateCall(F, {Res, Arg1b});
4719  } else {
4720  Arg1 = Builder.CreateZExtOrBitCast(Arg1, Int32Ty);
4721 
4722  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
4723  return Builder.CreateCall(F, {Arg0, Arg1});
4724  }
4725  }
4726 
4727  if (BuiltinID == ARM::BI__builtin_arm_rsr ||
4728  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4729  BuiltinID == ARM::BI__builtin_arm_rsrp ||
4730  BuiltinID == ARM::BI__builtin_arm_wsr ||
4731  BuiltinID == ARM::BI__builtin_arm_wsr64 ||
4732  BuiltinID == ARM::BI__builtin_arm_wsrp) {
4733 
4734  bool IsRead = BuiltinID == ARM::BI__builtin_arm_rsr ||
4735  BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4736  BuiltinID == ARM::BI__builtin_arm_rsrp;
4737 
4738  bool IsPointerBuiltin = BuiltinID == ARM::BI__builtin_arm_rsrp ||
4739  BuiltinID == ARM::BI__builtin_arm_wsrp;
4740 
4741  bool Is64Bit = BuiltinID == ARM::BI__builtin_arm_rsr64 ||
4742  BuiltinID == ARM::BI__builtin_arm_wsr64;
4743 
4744  llvm::Type *ValueType;
4745  llvm::Type *RegisterType;
4746  if (IsPointerBuiltin) {
4747  ValueType = VoidPtrTy;
4748  RegisterType = Int32Ty;
4749  } else if (Is64Bit) {
4750  ValueType = RegisterType = Int64Ty;
4751  } else {
4752  ValueType = RegisterType = Int32Ty;
4753  }
4754 
4755  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
4756  }
4757 
4758  // Find out if any arguments are required to be integer constant
4759  // expressions.
4760  unsigned ICEArguments = 0;
4762  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
4763  assert(Error == ASTContext::GE_None && "Should not codegen an error");
4764 
4765  auto getAlignmentValue32 = [&](Address addr) -> Value* {
4766  return Builder.getInt32(addr.getAlignment().getQuantity());
4767  };
4768 
4769  Address PtrOp0 = Address::invalid();
4770  Address PtrOp1 = Address::invalid();
4772  bool HasExtraArg = HasExtraNeonArgument(BuiltinID);
4773  unsigned NumArgs = E->getNumArgs() - (HasExtraArg ? 1 : 0);
4774  for (unsigned i = 0, e = NumArgs; i != e; i++) {
4775  if (i == 0) {
4776  switch (BuiltinID) {
4777  case NEON::BI__builtin_neon_vld1_v:
4778  case NEON::BI__builtin_neon_vld1q_v:
4779  case NEON::BI__builtin_neon_vld1q_lane_v:
4780  case NEON::BI__builtin_neon_vld1_lane_v:
4781  case NEON::BI__builtin_neon_vld1_dup_v:
4782  case NEON::BI__builtin_neon_vld1q_dup_v:
4783  case NEON::BI__builtin_neon_vst1_v:
4784  case NEON::BI__builtin_neon_vst1q_v:
4785  case NEON::BI__builtin_neon_vst1q_lane_v:
4786  case NEON::BI__builtin_neon_vst1_lane_v:
4787  case NEON::BI__builtin_neon_vst2_v:
4788  case NEON::BI__builtin_neon_vst2q_v:
4789  case NEON::BI__builtin_neon_vst2_lane_v:
4790  case NEON::BI__builtin_neon_vst2q_lane_v:
4791  case NEON::BI__builtin_neon_vst3_v:
4792  case NEON::BI__builtin_neon_vst3q_v:
4793  case NEON::BI__builtin_neon_vst3_lane_v:
4794  case NEON::BI__builtin_neon_vst3q_lane_v:
4795  case NEON::BI__builtin_neon_vst4_v:
4796  case NEON::BI__builtin_neon_vst4q_v:
4797  case NEON::BI__builtin_neon_vst4_lane_v:
4798  case NEON::BI__builtin_neon_vst4q_lane_v:
4799  // Get the alignment for the argument in addition to the value;
4800  // we'll use it later.
4801  PtrOp0 = EmitPointerWithAlignment(E->getArg(0));
4802  Ops.push_back(PtrOp0.getPointer());
4803  continue;
4804  }
4805  }
4806  if (i == 1) {
4807  switch (BuiltinID) {
4808  case NEON::BI__builtin_neon_vld2_v:
4809  case NEON::BI__builtin_neon_vld2q_v:
4810  case NEON::BI__builtin_neon_vld3_v:
4811  case NEON::BI__builtin_neon_vld3q_v:
4812  case NEON::BI__builtin_neon_vld4_v:
4813  case NEON::BI__builtin_neon_vld4q_v:
4814  case NEON::BI__builtin_neon_vld2_lane_v:
4815  case NEON::BI__builtin_neon_vld2q_lane_v:
4816  case NEON::BI__builtin_neon_vld3_lane_v:
4817  case NEON::BI__builtin_neon_vld3q_lane_v:
4818  case NEON::BI__builtin_neon_vld4_lane_v:
4819  case NEON::BI__builtin_neon_vld4q_lane_v:
4820  case NEON::BI__builtin_neon_vld2_dup_v:
4821  case NEON::BI__builtin_neon_vld3_dup_v:
4822  case NEON::BI__builtin_neon_vld4_dup_v:
4823  // Get the alignment for the argument in addition to the value;
4824  // we'll use it later.
4825  PtrOp1 = EmitPointerWithAlignment(E->getArg(1));
4826  Ops.push_back(PtrOp1.getPointer());
4827  continue;
4828  }
4829  }
4830 
4831  if ((ICEArguments & (1 << i)) == 0) {
4832  Ops.push_back(EmitScalarExpr(E->getArg(i)));
4833  } else {
4834  // If this is required to be a constant, constant fold it so that we know
4835  // that the generated intrinsic gets a ConstantInt.
4836  llvm::APSInt Result;
4837  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
4838  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
4839  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
4840  }
4841  }
4842 
4843  switch (BuiltinID) {
4844  default: break;
4845 
4846  case NEON::BI__builtin_neon_vget_lane_i8:
4847  case NEON::BI__builtin_neon_vget_lane_i16:
4848  case NEON::BI__builtin_neon_vget_lane_i32:
4849  case NEON::BI__builtin_neon_vget_lane_i64:
4850  case NEON::BI__builtin_neon_vget_lane_f32:
4851  case NEON::BI__builtin_neon_vgetq_lane_i8:
4852  case NEON::BI__builtin_neon_vgetq_lane_i16:
4853  case NEON::BI__builtin_neon_vgetq_lane_i32:
4854  case NEON::BI__builtin_neon_vgetq_lane_i64:
4855  case NEON::BI__builtin_neon_vgetq_lane_f32:
4856  return Builder.CreateExtractElement(Ops[0], Ops[1], "vget_lane");
4857 
4858  case NEON::BI__builtin_neon_vset_lane_i8:
4859  case NEON::BI__builtin_neon_vset_lane_i16:
4860  case NEON::BI__builtin_neon_vset_lane_i32:
4861  case NEON::BI__builtin_neon_vset_lane_i64:
4862  case NEON::BI__builtin_neon_vset_lane_f32:
4863  case NEON::BI__builtin_neon_vsetq_lane_i8:
4864  case NEON::BI__builtin_neon_vsetq_lane_i16:
4865  case NEON::BI__builtin_neon_vsetq_lane_i32:
4866  case NEON::BI__builtin_neon_vsetq_lane_i64:
4867  case NEON::BI__builtin_neon_vsetq_lane_f32:
4868  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
4869 
4870  case NEON::BI__builtin_neon_vsha1h_u32:
4871  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1h), Ops,
4872  "vsha1h");
4873  case NEON::BI__builtin_neon_vsha1cq_u32:
4874  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1c), Ops,
4875  "vsha1h");
4876  case NEON::BI__builtin_neon_vsha1pq_u32:
4877  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1p), Ops,
4878  "vsha1h");
4879  case NEON::BI__builtin_neon_vsha1mq_u32:
4880  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_sha1m), Ops,
4881  "vsha1h");
4882 
4883  // The ARM _MoveToCoprocessor builtins put the input register value as
4884  // the first argument, but the LLVM intrinsic expects it as the third one.
4885  case ARM::BI_MoveToCoprocessor:
4886  case ARM::BI_MoveToCoprocessor2: {
4887  Function *F = CGM.getIntrinsic(BuiltinID == ARM::BI_MoveToCoprocessor ?
4888  Intrinsic::arm_mcr : Intrinsic::arm_mcr2);
4889  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0],
4890  Ops[3], Ops[4], Ops[5]});
4891  }
4892  case ARM::BI_BitScanForward:
4893  case ARM::BI_BitScanForward64:
4894  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
4895  case ARM::BI_BitScanReverse:
4896  case ARM::BI_BitScanReverse64:
4897  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
4898 
4899  case ARM::BI_InterlockedAnd64:
4900  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
4901  case ARM::BI_InterlockedExchange64:
4902  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
4903  case ARM::BI_InterlockedExchangeAdd64:
4904  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
4905  case ARM::BI_InterlockedExchangeSub64:
4906  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
4907  case ARM::BI_InterlockedOr64:
4908  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
4909  case ARM::BI_InterlockedXor64:
4910  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
4911  case ARM::BI_InterlockedDecrement64:
4912  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
4913  case ARM::BI_InterlockedIncrement64:
4914  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
4915  }
4916 
4917  // Get the last argument, which specifies the vector type.
4918  assert(HasExtraArg);
4919  llvm::APSInt Result;
4920  const Expr *Arg = E->getArg(E->getNumArgs()-1);
4921  if (!Arg->isIntegerConstantExpr(Result, getContext()))
4922  return nullptr;
4923 
4924  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f ||
4925  BuiltinID == ARM::BI__builtin_arm_vcvtr_d) {
4926  // Determine the overloaded type of this builtin.
4927  llvm::Type *Ty;
4928  if (BuiltinID == ARM::BI__builtin_arm_vcvtr_f)
4929  Ty = FloatTy;
4930  else
4931  Ty = DoubleTy;
4932 
4933  // Determine whether this is an unsigned conversion or not.
4934  bool usgn = Result.getZExtValue() == 1;
4935  unsigned Int = usgn ? Intrinsic::arm_vcvtru : Intrinsic::arm_vcvtr;
4936 
4937  // Call the appropriate intrinsic.
4938  Function *F = CGM.getIntrinsic(Int, Ty);
4939  return Builder.CreateCall(F, Ops, "vcvtr");
4940  }
4941 
4942  // Determine the type of this overloaded NEON intrinsic.
4943  NeonTypeFlags Type(Result.getZExtValue());
4944  bool usgn = Type.isUnsigned();
4945  bool rightShift = false;
4946 
4947  llvm::VectorType *VTy = GetNeonType(this, Type);
4948  llvm::Type *Ty = VTy;
4949  if (!Ty)
4950  return nullptr;
4951 
4952  // Many NEON builtins have identical semantics and uses in ARM and
4953  // AArch64. Emit these in a single function.
4954  auto IntrinsicMap = makeArrayRef(ARMSIMDIntrinsicMap);
4955  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
4956  IntrinsicMap, BuiltinID, NEONSIMDIntrinsicsProvenSorted);
4957  if (Builtin)
4959  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
4960  Builtin->NameHint, Builtin->TypeModifier, E, Ops, PtrOp0, PtrOp1);
4961 
4962  unsigned Int;
4963  switch (BuiltinID) {
4964  default: return nullptr;
4965  case NEON::BI__builtin_neon_vld1q_lane_v:
4966  // Handle 64-bit integer elements as a special case. Use shuffles of
4967  // one-element vectors to avoid poor code for i64 in the backend.
4968  if (VTy->getElementType()->isIntegerTy(64)) {
4969  // Extract the other lane.
4970  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4971  uint32_t Lane = cast<ConstantInt>(Ops[2])->getZExtValue();
4972  Value *SV = llvm::ConstantVector::get(ConstantInt::get(Int32Ty, 1-Lane));
4973  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
4974  // Load the value as a one-element vector.
4975  Ty = llvm::VectorType::get(VTy->getElementType(), 1);
4976  llvm::Type *Tys[] = {Ty, Int8PtrTy};
4977  Function *F = CGM.getIntrinsic(Intrinsic::arm_neon_vld1, Tys);
4978  Value *Align = getAlignmentValue32(PtrOp0);
4979  Value *Ld = Builder.CreateCall(F, {Ops[0], Align});
4980  // Combine them.
4981  uint32_t Indices[] = {1 - Lane, Lane};
4982  SV = llvm::ConstantDataVector::get(getLLVMContext(), Indices);
4983  return Builder.CreateShuffleVector(Ops[1], Ld, SV, "vld1q_lane");
4984  }
4985  // fall through
4986  case NEON::BI__builtin_neon_vld1_lane_v: {
4987  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
4988  PtrOp0 = Builder.CreateElementBitCast(PtrOp0, VTy->getElementType());
4989  Value *Ld = Builder.CreateLoad(PtrOp0);
4990  return Builder.CreateInsertElement(Ops[1], Ld, Ops[2], "vld1_lane");
4991  }
4992  case NEON::BI__builtin_neon_vld2_dup_v:
4993  case NEON::BI__builtin_neon_vld3_dup_v:
4994  case NEON::BI__builtin_neon_vld4_dup_v: {
4995  // Handle 64-bit elements as a special-case. There is no "dup" needed.
4996  if (VTy->getElementType()->getPrimitiveSizeInBits() == 64) {
4997  switch (BuiltinID) {
4998  case NEON::BI__builtin_neon_vld2_dup_v:
4999  Int = Intrinsic::arm_neon_vld2;
5000  break;
5001  case NEON::BI__builtin_neon_vld3_dup_v:
5002  Int = Intrinsic::arm_neon_vld3;
5003  break;
5004  case NEON::BI__builtin_neon_vld4_dup_v:
5005  Int = Intrinsic::arm_neon_vld4;
5006  break;
5007  default: llvm_unreachable("unknown vld_dup intrinsic?");
5008  }
5009  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5010  Function *F = CGM.getIntrinsic(Int, Tys);
5011  llvm::Value *Align = getAlignmentValue32(PtrOp1);
5012  Ops[1] = Builder.CreateCall(F, {Ops[1], Align}, "vld_dup");
5013  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5014  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5015  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5016  }
5017  switch (BuiltinID) {
5018  case NEON::BI__builtin_neon_vld2_dup_v:
5019  Int = Intrinsic::arm_neon_vld2lane;
5020  break;
5021  case NEON::BI__builtin_neon_vld3_dup_v:
5022  Int = Intrinsic::arm_neon_vld3lane;
5023  break;
5024  case NEON::BI__builtin_neon_vld4_dup_v:
5025  Int = Intrinsic::arm_neon_vld4lane;
5026  break;
5027  default: llvm_unreachable("unknown vld_dup intrinsic?");
5028  }
5029  llvm::Type *Tys[] = {Ty, Int8PtrTy};
5030  Function *F = CGM.getIntrinsic(Int, Tys);
5031  llvm::StructType *STy = cast<llvm::StructType>(F->getReturnType());
5032 
5034  Args.push_back(Ops[1]);
5035  Args.append(STy->getNumElements(), UndefValue::get(Ty));
5036 
5037  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
5038  Args.push_back(CI);
5039  Args.push_back(getAlignmentValue32(PtrOp1));
5040 
5041  Ops[1] = Builder.CreateCall(F, Args, "vld_dup");
5042  // splat lane 0 to all elts in each vector of the result.
5043  for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
5044  Value *Val = Builder.CreateExtractValue(Ops[1], i);
5045  Value *Elt = Builder.CreateBitCast(Val, Ty);
5046  Elt = EmitNeonSplat(Elt, CI);
5047  Elt = Builder.CreateBitCast(Elt, Val->getType());
5048  Ops[1] = Builder.CreateInsertValue(Ops[1], Elt, i);
5049  }
5050  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5051  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5052  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
5053  }
5054  case NEON::BI__builtin_neon_vqrshrn_n_v:
5055  Int =
5056  usgn ? Intrinsic::arm_neon_vqrshiftnu : Intrinsic::arm_neon_vqrshiftns;
5057  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n",
5058  1, true);
5059  case NEON::BI__builtin_neon_vqrshrun_n_v:
5060  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqrshiftnsu, Ty),
5061  Ops, "vqrshrun_n", 1, true);
5062  case NEON::BI__builtin_neon_vqshrn_n_v:
5063  Int = usgn ? Intrinsic::arm_neon_vqshiftnu : Intrinsic::arm_neon_vqshiftns;
5064  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n",
5065  1, true);
5066  case NEON::BI__builtin_neon_vqshrun_n_v:
5067  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vqshiftnsu, Ty),
5068  Ops, "vqshrun_n", 1, true);
5069  case NEON::BI__builtin_neon_vrecpe_v:
5070  case NEON::BI__builtin_neon_vrecpeq_v:
5071  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrecpe, Ty),
5072  Ops, "vrecpe");
5073  case NEON::BI__builtin_neon_vrshrn_n_v:
5074  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vrshiftn, Ty),
5075  Ops, "vrshrn_n", 1, true);
5076  case NEON::BI__builtin_neon_vrsra_n_v:
5077  case NEON::BI__builtin_neon_vrsraq_n_v:
5078  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5079  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5080  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, true);
5081  Int = usgn ? Intrinsic::arm_neon_vrshiftu : Intrinsic::arm_neon_vrshifts;
5082  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Ty), {Ops[1], Ops[2]});
5083  return Builder.CreateAdd(Ops[0], Ops[1], "vrsra_n");
5084  case NEON::BI__builtin_neon_vsri_n_v:
5085  case NEON::BI__builtin_neon_vsriq_n_v:
5086  rightShift = true;
5087  LLVM_FALLTHROUGH;
5088  case NEON::BI__builtin_neon_vsli_n_v:
5089  case NEON::BI__builtin_neon_vsliq_n_v:
5090  Ops[2] = EmitNeonShiftVector(Ops[2], Ty, rightShift);
5091  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vshiftins, Ty),
5092  Ops, "vsli_n");
5093  case NEON::BI__builtin_neon_vsra_n_v:
5094  case NEON::BI__builtin_neon_vsraq_n_v:
5095  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
5096  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
5097  return Builder.CreateAdd(Ops[0], Ops[1]);
5098  case NEON::BI__builtin_neon_vst1q_lane_v:
5099  // Handle 64-bit integer elements as a special case. Use a shuffle to get
5100  // a one-element vector and avoid poor code for i64 in the backend.
5101  if (VTy->getElementType()->isIntegerTy(64)) {
5102  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5103  Value *SV = llvm::ConstantVector::get(cast<llvm::Constant>(Ops[2]));
5104  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV);
5105  Ops[2] = getAlignmentValue32(PtrOp0);
5106  llvm::Type *Tys[] = {Int8PtrTy, Ops[1]->getType()};
5107  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::arm_neon_vst1,
5108  Tys), Ops);
5109  }
5110  // fall through
5111  case NEON::BI__builtin_neon_vst1_lane_v: {
5112  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
5113  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
5114  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
5115  auto St = Builder.CreateStore(Ops[1], Builder.CreateBitCast(PtrOp0, Ty));
5116  return St;
5117  }
5118  case NEON::BI__builtin_neon_vtbl1_v:
5119  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl1),
5120  Ops, "vtbl1");
5121  case NEON::BI__builtin_neon_vtbl2_v:
5122  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl2),
5123  Ops, "vtbl2");
5124  case NEON::BI__builtin_neon_vtbl3_v:
5125  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl3),
5126  Ops, "vtbl3");
5127  case NEON::BI__builtin_neon_vtbl4_v:
5128  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbl4),
5129  Ops, "vtbl4");
5130  case NEON::BI__builtin_neon_vtbx1_v:
5131  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx1),
5132  Ops, "vtbx1");
5133  case NEON::BI__builtin_neon_vtbx2_v:
5134  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx2),
5135  Ops, "vtbx2");
5136  case NEON::BI__builtin_neon_vtbx3_v:
5137  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx3),
5138  Ops, "vtbx3");
5139  case NEON::BI__builtin_neon_vtbx4_v:
5140  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::arm_neon_vtbx4),
5141  Ops, "vtbx4");
5142  }
5143 }
5144 
5145 static Value *EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID,
5146  const CallExpr *E,
5147  SmallVectorImpl<Value *> &Ops) {
5148  unsigned int Int = 0;
5149  const char *s = nullptr;
5150 
5151  switch (BuiltinID) {
5152  default:
5153  return nullptr;
5154  case NEON::BI__builtin_neon_vtbl1_v:
5155  case NEON::BI__builtin_neon_vqtbl1_v:
5156  case NEON::BI__builtin_neon_vqtbl1q_v:
5157  case NEON::BI__builtin_neon_vtbl2_v:
5158  case NEON::BI__builtin_neon_vqtbl2_v:
5159  case NEON::BI__builtin_neon_vqtbl2q_v:
5160  case NEON::BI__builtin_neon_vtbl3_v:
5161  case NEON::BI__builtin_neon_vqtbl3_v:
5162  case NEON::BI__builtin_neon_vqtbl3q_v:
5163  case NEON::BI__builtin_neon_vtbl4_v:
5164  case NEON::BI__builtin_neon_vqtbl4_v:
5165  case NEON::BI__builtin_neon_vqtbl4q_v:
5166  break;
5167  case NEON::BI__builtin_neon_vtbx1_v:
5168  case NEON::BI__builtin_neon_vqtbx1_v:
5169  case NEON::BI__builtin_neon_vqtbx1q_v:
5170  case NEON::BI__builtin_neon_vtbx2_v:
5171  case NEON::BI__builtin_neon_vqtbx2_v:
5172  case NEON::BI__builtin_neon_vqtbx2q_v:
5173  case NEON::BI__builtin_neon_vtbx3_v:
5174  case NEON::BI__builtin_neon_vqtbx3_v:
5175  case NEON::BI__builtin_neon_vqtbx3q_v:
5176  case NEON::BI__builtin_neon_vtbx4_v:
5177  case NEON::BI__builtin_neon_vqtbx4_v:
5178  case NEON::BI__builtin_neon_vqtbx4q_v:
5179  break;
5180  }
5181 
5182  assert(E->getNumArgs() >= 3);
5183 
5184  // Get the last argument, which specifies the vector type.
5185  llvm::APSInt Result;
5186  const Expr *Arg = E->getArg(E->getNumArgs() - 1);
5187  if (!Arg->isIntegerConstantExpr(Result, CGF.getContext()))
5188  return nullptr;
5189 
5190  // Determine the type of this overloaded NEON intrinsic.
5191  NeonTypeFlags Type(Result.getZExtValue());
5192  llvm::VectorType *Ty = GetNeonType(&CGF, Type);
5193  if (!Ty)
5194  return nullptr;
5195 
5197 
5198  // AArch64 scalar builtins are not overloaded, they do not have an extra
5199  // argument that specifies the vector type, need to handle each case.
5200  switch (BuiltinID) {
5201  case NEON::BI__builtin_neon_vtbl1_v: {
5202  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 1), nullptr,
5203  Ops[1], Ty, Intrinsic::aarch64_neon_tbl1,
5204  "vtbl1");
5205  }
5206  case NEON::BI__builtin_neon_vtbl2_v: {
5207  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 2), nullptr,
5208  Ops[2], Ty, Intrinsic::aarch64_neon_tbl1,
5209  "vtbl1");
5210  }
5211  case NEON::BI__builtin_neon_vtbl3_v: {
5212  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 3), nullptr,
5213  Ops[3], Ty, Intrinsic::aarch64_neon_tbl2,
5214  "vtbl2");
5215  }
5216  case NEON::BI__builtin_neon_vtbl4_v: {
5217  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(0, 4), nullptr,
5218  Ops[4], Ty, Intrinsic::aarch64_neon_tbl2,
5219  "vtbl2");
5220  }
5221  case NEON::BI__builtin_neon_vtbx1_v: {
5222  Value *TblRes =
5223  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 1), nullptr, Ops[2],
5224  Ty, Intrinsic::aarch64_neon_tbl1, "vtbl1");
5225 
5226  llvm::Constant *EightV = ConstantInt::get(Ty, 8);
5227  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[2], EightV);
5228  CmpRes = Builder.CreateSExt(CmpRes, Ty);
5229 
5230  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5231  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5232  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5233  }
5234  case NEON::BI__builtin_neon_vtbx2_v: {
5235  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 2), Ops[0],
5236  Ops[3], Ty, Intrinsic::aarch64_neon_tbx1,
5237  "vtbx1");
5238  }
5239  case NEON::BI__builtin_neon_vtbx3_v: {
5240  Value *TblRes =
5241  packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 3), nullptr, Ops[4],
5242  Ty, Intrinsic::aarch64_neon_tbl2, "vtbl2");
5243 
5244  llvm::Constant *TwentyFourV = ConstantInt::get(Ty, 24);
5245  Value *CmpRes = Builder.CreateICmp(ICmpInst::ICMP_UGE, Ops[4],
5246  TwentyFourV);
5247  CmpRes = Builder.CreateSExt(CmpRes, Ty);
5248 
5249  Value *EltsFromInput = Builder.CreateAnd(CmpRes, Ops[0]);
5250  Value *EltsFromTbl = Builder.CreateAnd(Builder.CreateNot(CmpRes), TblRes);
5251  return Builder.CreateOr(EltsFromInput, EltsFromTbl, "vtbx");
5252  }
5253  case NEON::BI__builtin_neon_vtbx4_v: {
5254  return packTBLDVectorList(CGF, makeArrayRef(Ops).slice(1, 4), Ops[0],
5255  Ops[5], Ty, Intrinsic::aarch64_neon_tbx2,
5256  "vtbx2");
5257  }
5258  case NEON::BI__builtin_neon_vqtbl1_v:
5259  case NEON::BI__builtin_neon_vqtbl1q_v:
5260  Int = Intrinsic::aarch64_neon_tbl1; s = "vtbl1"; break;
5261  case NEON::BI__builtin_neon_vqtbl2_v:
5262  case NEON::BI__builtin_neon_vqtbl2q_v: {
5263  Int = Intrinsic::aarch64_neon_tbl2; s = "vtbl2"; break;
5264  case NEON::BI__builtin_neon_vqtbl3_v:
5265  case NEON::BI__builtin_neon_vqtbl3q_v:
5266  Int = Intrinsic::aarch64_neon_tbl3; s = "vtbl3"; break;
5267  case NEON::BI__builtin_neon_vqtbl4_v:
5268  case NEON::BI__builtin_neon_vqtbl4q_v:
5269  Int = Intrinsic::aarch64_neon_tbl4; s = "vtbl4"; break;
5270  case NEON::BI__builtin_neon_vqtbx1_v:
5271  case NEON::BI__builtin_neon_vqtbx1q_v:
5272  Int = Intrinsic::aarch64_neon_tbx1; s = "vtbx1"; break;
5273  case NEON::BI__builtin_neon_vqtbx2_v:
5274  case NEON::BI__builtin_neon_vqtbx2q_v:
5275  Int = Intrinsic::aarch64_neon_tbx2; s = "vtbx2"; break;
5276  case NEON::BI__builtin_neon_vqtbx3_v:
5277  case NEON::BI__builtin_neon_vqtbx3q_v:
5278  Int = Intrinsic::aarch64_neon_tbx3; s = "vtbx3"; break;
5279  case NEON::BI__builtin_neon_vqtbx4_v:
5280  case NEON::BI__builtin_neon_vqtbx4q_v:
5281  Int = Intrinsic::aarch64_neon_tbx4; s = "vtbx4"; break;
5282  }
5283  }
5284 
5285  if (!Int)
5286  return nullptr;
5287 
5288  Function *F = CGF.CGM.getIntrinsic(Int, Ty);
5289  return CGF.EmitNeonCall(F, Ops, s);
5290 }
5291 
5293  llvm::Type *VTy = llvm::VectorType::get(Int16Ty, 4);
5294  Op = Builder.CreateBitCast(Op, Int16Ty);
5295  Value *V = UndefValue::get(VTy);
5296  llvm::Constant *CI = ConstantInt::get(SizeTy, 0);
5297  Op = Builder.CreateInsertElement(V, Op, CI);
5298  return Op;
5299 }
5300 
5302  const CallExpr *E) {
5303  unsigned HintID = static_cast<unsigned>(-1);
5304  switch (BuiltinID) {
5305  default: break;
5306  case AArch64::BI__builtin_arm_nop:
5307  HintID = 0;
5308  break;
5309  case AArch64::BI__builtin_arm_yield:
5310  HintID = 1;
5311  break;
5312  case AArch64::BI__builtin_arm_wfe:
5313  HintID = 2;
5314  break;
5315  case AArch64::BI__builtin_arm_wfi:
5316  HintID = 3;
5317  break;
5318  case AArch64::BI__builtin_arm_sev:
5319  HintID = 4;
5320  break;
5321  case AArch64::BI__builtin_arm_sevl:
5322  HintID = 5;
5323  break;
5324  }
5325 
5326  if (HintID != static_cast<unsigned>(-1)) {
5327  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_hint);
5328  return Builder.CreateCall(F, llvm::ConstantInt::get(Int32Ty, HintID));
5329  }
5330 
5331  if (BuiltinID == AArch64::BI__builtin_arm_prefetch) {
5332  Value *Address = EmitScalarExpr(E->getArg(0));
5333  Value *RW = EmitScalarExpr(E->getArg(1));
5334  Value *CacheLevel = EmitScalarExpr(E->getArg(2));
5335  Value *RetentionPolicy = EmitScalarExpr(E->getArg(3));
5336  Value *IsData = EmitScalarExpr(E->getArg(4));
5337 
5338  Value *Locality = nullptr;
5339  if (cast<llvm::ConstantInt>(RetentionPolicy)->isZero()) {
5340  // Temporal fetch, needs to convert cache level to locality.
5341  Locality = llvm::ConstantInt::get(Int32Ty,
5342  -cast<llvm::ConstantInt>(CacheLevel)->getValue() + 3);
5343  } else {
5344  // Streaming fetch.
5345  Locality = llvm::ConstantInt::get(Int32Ty, 0);
5346  }
5347 
5348  // FIXME: We need AArch64 specific LLVM intrinsic if we want to specify
5349  // PLDL3STRM or PLDL2STRM.
5350  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
5351  return Builder.CreateCall(F, {Address, RW, Locality, IsData});
5352  }
5353 
5354  if (BuiltinID == AArch64::BI__builtin_arm_rbit) {
5355  assert((getContext().getTypeSize(E->getType()) == 32) &&
5356  "rbit of unusual size!");
5357  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5358  return Builder.CreateCall(
5359  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5360  }
5361  if (BuiltinID == AArch64::BI__builtin_arm_rbit64) {
5362  assert((getContext().getTypeSize(E->getType()) == 64) &&
5363  "rbit of unusual size!");
5364  llvm::Value *Arg = EmitScalarExpr(E->getArg(0));
5365  return Builder.CreateCall(
5366  CGM.getIntrinsic(Intrinsic::bitreverse, Arg->getType()), Arg, "rbit");
5367  }
5368 
5369  if (BuiltinID == AArch64::BI__clear_cache) {
5370  assert(E->getNumArgs() == 2 && "__clear_cache takes 2 arguments");
5371  const FunctionDecl *FD = E->getDirectCallee();
5372  Value *Ops[2];
5373  for (unsigned i = 0; i < 2; i++)
5374  Ops[i] = EmitScalarExpr(E->getArg(i));
5375  llvm::Type *Ty = CGM.getTypes().ConvertType(FD->getType());
5376  llvm::FunctionType *FTy = cast<llvm::FunctionType>(Ty);
5377  StringRef Name = FD->getName();
5378  return EmitNounwindRuntimeCall(CGM.CreateRuntimeFunction(FTy, Name), Ops);
5379  }
5380 
5381  if ((BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5382  BuiltinID == AArch64::BI__builtin_arm_ldaex) &&
5383  getContext().getTypeSize(E->getType()) == 128) {
5384  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5385  ? Intrinsic::aarch64_ldaxp
5386  : Intrinsic::aarch64_ldxp);
5387 
5388  Value *LdPtr = EmitScalarExpr(E->getArg(0));
5389  Value *Val = Builder.CreateCall(F, Builder.CreateBitCast(LdPtr, Int8PtrTy),
5390  "ldxp");
5391 
5392  Value *Val0 = Builder.CreateExtractValue(Val, 1);
5393  Value *Val1 = Builder.CreateExtractValue(Val, 0);
5394  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
5395  Val0 = Builder.CreateZExt(Val0, Int128Ty);
5396  Val1 = Builder.CreateZExt(Val1, Int128Ty);
5397 
5398  Value *ShiftCst = llvm::ConstantInt::get(Int128Ty, 64);
5399  Val = Builder.CreateShl(Val0, ShiftCst, "shl", true /* nuw */);
5400  Val = Builder.CreateOr(Val, Val1);
5401  return Builder.CreateBitCast(Val, ConvertType(E->getType()));
5402  } else if (BuiltinID == AArch64::BI__builtin_arm_ldrex ||
5403  BuiltinID == AArch64::BI__builtin_arm_ldaex) {
5404  Value *LoadAddr = EmitScalarExpr(E->getArg(0));
5405 
5406  QualType Ty = E->getType();
5407  llvm::Type *RealResTy = ConvertType(Ty);
5408  llvm::Type *PtrTy = llvm::IntegerType::get(
5409  getLLVMContext(), getContext().getTypeSize(Ty))->getPointerTo();
5410  LoadAddr = Builder.CreateBitCast(LoadAddr, PtrTy);
5411 
5412  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_ldaex
5413  ? Intrinsic::aarch64_ldaxr
5414  : Intrinsic::aarch64_ldxr,
5415  PtrTy);
5416  Value *Val = Builder.CreateCall(F, LoadAddr, "ldxr");
5417 
5418  if (RealResTy->isPointerTy())
5419  return Builder.CreateIntToPtr(Val, RealResTy);
5420 
5421  llvm::Type *IntResTy = llvm::IntegerType::get(
5422  getLLVMContext(), CGM.getDataLayout().getTypeSizeInBits(RealResTy));
5423  Val = Builder.CreateTruncOrBitCast(Val, IntResTy);
5424  return Builder.CreateBitCast(Val, RealResTy);
5425  }
5426 
5427  if ((BuiltinID == AArch64::BI__builtin_arm_strex ||
5428  BuiltinID == AArch64::BI__builtin_arm_stlex) &&
5429  getContext().getTypeSize(E->getArg(0)->getType()) == 128) {
5430  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5431  ? Intrinsic::aarch64_stlxp
5432  : Intrinsic::aarch64_stxp);
5433  llvm::Type *STy = llvm::StructType::get(Int64Ty, Int64Ty);
5434 
5435  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
5436  EmitAnyExprToMem(E->getArg(0), Tmp, Qualifiers(), /*init*/ true);
5437 
5438  Tmp = Builder.CreateBitCast(Tmp, llvm::PointerType::getUnqual(STy));
5439  llvm::Value *Val = Builder.CreateLoad(Tmp);
5440 
5441  Value *Arg0 = Builder.CreateExtractValue(Val, 0);
5442  Value *Arg1 = Builder.CreateExtractValue(Val, 1);
5443  Value *StPtr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)),
5444  Int8PtrTy);
5445  return Builder.CreateCall(F, {Arg0, Arg1, StPtr}, "stxp");
5446  }
5447 
5448  if (BuiltinID == AArch64::BI__builtin_arm_strex ||
5449  BuiltinID == AArch64::BI__builtin_arm_stlex) {
5450  Value *StoreVal = EmitScalarExpr(E->getArg(0));
5451  Value *StoreAddr = EmitScalarExpr(E->getArg(1));
5452 
5453  QualType Ty = E->getArg(0)->getType();
5454  llvm::Type *StoreTy = llvm::IntegerType::get(getLLVMContext(),
5455  getContext().getTypeSize(Ty));
5456  StoreAddr = Builder.CreateBitCast(StoreAddr, StoreTy->getPointerTo());
5457 
5458  if (StoreVal->getType()->isPointerTy())
5459  StoreVal = Builder.CreatePtrToInt(StoreVal, Int64Ty);
5460  else {
5461  llvm::Type *IntTy = llvm::IntegerType::get(
5462  getLLVMContext(),
5463  CGM.getDataLayout().getTypeSizeInBits(StoreVal->getType()));
5464  StoreVal = Builder.CreateBitCast(StoreVal, IntTy);
5465  StoreVal = Builder.CreateZExtOrBitCast(StoreVal, Int64Ty);
5466  }
5467 
5468  Function *F = CGM.getIntrinsic(BuiltinID == AArch64::BI__builtin_arm_stlex
5469  ? Intrinsic::aarch64_stlxr
5470  : Intrinsic::aarch64_stxr,
5471  StoreAddr->getType());
5472  return Builder.CreateCall(F, {StoreVal, StoreAddr}, "stxr");
5473  }
5474 
5475  if (BuiltinID == AArch64::BI__builtin_arm_clrex) {
5476  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_clrex);
5477  return Builder.CreateCall(F);
5478  }
5479 
5480  // CRC32
5481  Intrinsic::ID CRCIntrinsicID = Intrinsic::not_intrinsic;
5482  switch (BuiltinID) {
5483  case AArch64::BI__builtin_arm_crc32b:
5484  CRCIntrinsicID = Intrinsic::aarch64_crc32b; break;
5485  case AArch64::BI__builtin_arm_crc32cb:
5486  CRCIntrinsicID = Intrinsic::aarch64_crc32cb; break;
5487  case AArch64::BI__builtin_arm_crc32h:
5488  CRCIntrinsicID = Intrinsic::aarch64_crc32h; break;
5489  case AArch64::BI__builtin_arm_crc32ch:
5490  CRCIntrinsicID = Intrinsic::aarch64_crc32ch; break;
5491  case AArch64::BI__builtin_arm_crc32w:
5492  CRCIntrinsicID = Intrinsic::aarch64_crc32w; break;
5493  case AArch64::BI__builtin_arm_crc32cw:
5494  CRCIntrinsicID = Intrinsic::aarch64_crc32cw; break;
5495  case AArch64::BI__builtin_arm_crc32d:
5496  CRCIntrinsicID = Intrinsic::aarch64_crc32x; break;
5497  case AArch64::BI__builtin_arm_crc32cd:
5498  CRCIntrinsicID = Intrinsic::aarch64_crc32cx; break;
5499  }
5500 
5501  if (CRCIntrinsicID != Intrinsic::not_intrinsic) {
5502  Value *Arg0 = EmitScalarExpr(E->getArg(0));
5503  Value *Arg1 = EmitScalarExpr(E->getArg(1));
5504  Function *F = CGM.getIntrinsic(CRCIntrinsicID);
5505 
5506  llvm::Type *DataTy = F->getFunctionType()->getParamType(1);
5507  Arg1 = Builder.CreateZExtOrBitCast(Arg1, DataTy);
5508 
5509  return Builder.CreateCall(F, {Arg0, Arg1});
5510  }
5511 
5512  if (BuiltinID == AArch64::BI__builtin_arm_rsr ||
5513  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5514  BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5515  BuiltinID == AArch64::BI__builtin_arm_wsr ||
5516  BuiltinID == AArch64::BI__builtin_arm_wsr64 ||
5517  BuiltinID == AArch64::BI__builtin_arm_wsrp) {
5518 
5519  bool IsRead = BuiltinID == AArch64::BI__builtin_arm_rsr ||
5520  BuiltinID == AArch64::BI__builtin_arm_rsr64 ||
5521  BuiltinID == AArch64::BI__builtin_arm_rsrp;
5522 
5523  bool IsPointerBuiltin = BuiltinID == AArch64::BI__builtin_arm_rsrp ||
5524  BuiltinID == AArch64::BI__builtin_arm_wsrp;
5525 
5526  bool Is64Bit = BuiltinID != AArch64::BI__builtin_arm_rsr &&
5527  BuiltinID != AArch64::BI__builtin_arm_wsr;
5528 
5529  llvm::Type *ValueType;
5530  llvm::Type *RegisterType = Int64Ty;
5531  if (IsPointerBuiltin) {
5532  ValueType = VoidPtrTy;
5533  } else if (Is64Bit) {
5534  ValueType = Int64Ty;
5535  } else {
5536  ValueType = Int32Ty;
5537  }
5538 
5539  return EmitSpecialRegisterBuiltin(*this, E, RegisterType, ValueType, IsRead);
5540  }
5541 
5542  // Find out if any arguments are required to be integer constant
5543  // expressions.
5544  unsigned ICEArguments = 0;
5546  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
5547  assert(Error == ASTContext::GE_None && "Should not codegen an error");
5548 
5550  for (unsigned i = 0, e = E->getNumArgs() - 1; i != e; i++) {
5551  if ((ICEArguments & (1 << i)) == 0) {
5552  Ops.push_back(EmitScalarExpr(E->getArg(i)));
5553  } else {
5554  // If this is required to be a constant, constant fold it so that we know
5555  // that the generated intrinsic gets a ConstantInt.
5556  llvm::APSInt Result;
5557  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
5558  assert(IsConst && "Constant arg isn't actually constant?");
5559  (void)IsConst;
5560  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
5561  }
5562  }
5563 
5564  auto SISDMap = makeArrayRef(AArch64SISDIntrinsicMap);
5565  const NeonIntrinsicInfo *Builtin = findNeonIntrinsicInMap(
5566  SISDMap, BuiltinID, AArch64SISDIntrinsicsProvenSorted);
5567 
5568  if (Builtin) {
5569  Ops.push_back(EmitScalarExpr(E->getArg(E->getNumArgs() - 1)));
5570  Value *Result = EmitCommonNeonSISDBuiltinExpr(*this, *Builtin, Ops, E);
5571  assert(Result && "SISD intrinsic should have been handled");
5572  return Result;
5573  }
5574 
5575  llvm::APSInt Result;
5576  const Expr *Arg = E->getArg(E->getNumArgs()-1);
5577  NeonTypeFlags Type(0);
5578  if (Arg->isIntegerConstantExpr(Result, getContext()))
5579  // Determine the type of this overloaded NEON intrinsic.
5580  Type = NeonTypeFlags(Result.getZExtValue());
5581 
5582  bool usgn = Type.isUnsigned();
5583  bool quad = Type.isQuad();
5584 
5585  // Handle non-overloaded intrinsics first.
5586  switch (BuiltinID) {
5587  default: break;
5588  case NEON::BI__builtin_neon_vldrq_p128: {
5589  llvm::Type *Int128Ty = llvm::Type::getIntNTy(getLLVMContext(), 128);
5590  llvm::Type *Int128PTy = llvm::PointerType::get(Int128Ty, 0);
5591  Value *Ptr = Builder.CreateBitCast(EmitScalarExpr(E->getArg(0)), Int128PTy);
5592  return Builder.CreateAlignedLoad(Int128Ty, Ptr,
5594  }
5595  case NEON::BI__builtin_neon_vstrq_p128: {
5596  llvm::Type *Int128PTy = llvm::Type::getIntNPtrTy(getLLVMContext(), 128);
5597  Value *Ptr = Builder.CreateBitCast(Ops[0], Int128PTy);
5599  }
5600  case NEON::BI__builtin_neon_vcvts_u32_f32:
5601  case NEON::BI__builtin_neon_vcvtd_u64_f64:
5602  usgn = true;
5603  // FALL THROUGH
5604  case NEON::BI__builtin_neon_vcvts_s32_f32:
5605  case NEON::BI__builtin_neon_vcvtd_s64_f64: {
5606  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5607  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5608  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5609  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5610  Ops[0] = Builder.CreateBitCast(Ops[0], FTy);
5611  if (usgn)
5612  return Builder.CreateFPToUI(Ops[0], InTy);
5613  return Builder.CreateFPToSI(Ops[0], InTy);
5614  }
5615  case NEON::BI__builtin_neon_vcvts_f32_u32:
5616  case NEON::BI__builtin_neon_vcvtd_f64_u64:
5617  usgn = true;
5618  // FALL THROUGH
5619  case NEON::BI__builtin_neon_vcvts_f32_s32:
5620  case NEON::BI__builtin_neon_vcvtd_f64_s64: {
5621  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5622  bool Is64 = Ops[0]->getType()->getPrimitiveSizeInBits() == 64;
5623  llvm::Type *InTy = Is64 ? Int64Ty : Int32Ty;
5624  llvm::Type *FTy = Is64 ? DoubleTy : FloatTy;
5625  Ops[0] = Builder.CreateBitCast(Ops[0], InTy);
5626  if (usgn)
5627  return Builder.CreateUIToFP(Ops[0], FTy);
5628  return Builder.CreateSIToFP(Ops[0], FTy);
5629  }
5630  case NEON::BI__builtin_neon_vpaddd_s64: {
5631  llvm::Type *Ty = llvm::VectorType::get(Int64Ty, 2);
5632  Value *Vec = EmitScalarExpr(E->getArg(0));
5633  // The vector is v2f64, so make sure it's bitcast to that.
5634  Vec = Builder.CreateBitCast(Vec, Ty, "v2i64");
5635  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5636  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5637  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5638  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5639  // Pairwise addition of a v2f64 into a scalar f64.
5640  return Builder.CreateAdd(Op0, Op1, "vpaddd");
5641  }
5642  case NEON::BI__builtin_neon_vpaddd_f64: {
5643  llvm::Type *Ty =
5644  llvm::VectorType::get(DoubleTy, 2);
5645  Value *Vec = EmitScalarExpr(E->getArg(0));
5646  // The vector is v2f64, so make sure it's bitcast to that.
5647  Vec = Builder.CreateBitCast(Vec, Ty, "v2f64");
5648  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5649  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5650  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5651  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5652  // Pairwise addition of a v2f64 into a scalar f64.
5653  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5654  }
5655  case NEON::BI__builtin_neon_vpadds_f32: {
5656  llvm::Type *Ty =
5657  llvm::VectorType::get(FloatTy, 2);
5658  Value *Vec = EmitScalarExpr(E->getArg(0));
5659  // The vector is v2f32, so make sure it's bitcast to that.
5660  Vec = Builder.CreateBitCast(Vec, Ty, "v2f32");
5661  llvm::Value *Idx0 = llvm::ConstantInt::get(SizeTy, 0);
5662  llvm::Value *Idx1 = llvm::ConstantInt::get(SizeTy, 1);
5663  Value *Op0 = Builder.CreateExtractElement(Vec, Idx0, "lane0");
5664  Value *Op1 = Builder.CreateExtractElement(Vec, Idx1, "lane1");
5665  // Pairwise addition of a v2f32 into a scalar f32.
5666  return Builder.CreateFAdd(Op0, Op1, "vpaddd");
5667  }
5668  case NEON::BI__builtin_neon_vceqzd_s64:
5669  case NEON::BI__builtin_neon_vceqzd_f64:
5670  case NEON::BI__builtin_neon_vceqzs_f32:
5671  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5673  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5674  ICmpInst::FCMP_OEQ, ICmpInst::ICMP_EQ, "vceqz");
5675  case NEON::BI__builtin_neon_vcgezd_s64:
5676  case NEON::BI__builtin_neon_vcgezd_f64:
5677  case NEON::BI__builtin_neon_vcgezs_f32:
5678  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5680  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5681  ICmpInst::FCMP_OGE, ICmpInst::ICMP_SGE, "vcgez");
5682  case NEON::BI__builtin_neon_vclezd_s64:
5683  case NEON::BI__builtin_neon_vclezd_f64:
5684  case NEON::BI__builtin_neon_vclezs_f32:
5685  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5687  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5688  ICmpInst::FCMP_OLE, ICmpInst::ICMP_SLE, "vclez");
5689  case NEON::BI__builtin_neon_vcgtzd_s64:
5690  case NEON::BI__builtin_neon_vcgtzd_f64:
5691  case NEON::BI__builtin_neon_vcgtzs_f32:
5692  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5694  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5695  ICmpInst::FCMP_OGT, ICmpInst::ICMP_SGT, "vcgtz");
5696  case NEON::BI__builtin_neon_vcltzd_s64:
5697  case NEON::BI__builtin_neon_vcltzd_f64:
5698  case NEON::BI__builtin_neon_vcltzs_f32:
5699  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5701  Ops[0], ConvertType(E->getCallReturnType(getContext())),
5702  ICmpInst::FCMP_OLT, ICmpInst::ICMP_SLT, "vcltz");
5703 
5704  case NEON::BI__builtin_neon_vceqzd_u64: {
5705  Ops.push_back(EmitScalarExpr(E->getArg(0)));
5706  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5707  Ops[0] =
5708  Builder.CreateICmpEQ(Ops[0], llvm::Constant::getNullValue(Int64Ty));
5709  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqzd");
5710  }
5711  case NEON::BI__builtin_neon_vceqd_f64:
5712  case NEON::BI__builtin_neon_vcled_f64:
5713  case NEON::BI__builtin_neon_vcltd_f64:
5714  case NEON::BI__builtin_neon_vcged_f64:
5715  case NEON::BI__builtin_neon_vcgtd_f64: {
5716  llvm::CmpInst::Predicate P;
5717  switch (BuiltinID) {
5718  default: llvm_unreachable("missing builtin ID in switch!");
5719  case NEON::BI__builtin_neon_vceqd_f64: P = llvm::FCmpInst::FCMP_OEQ; break;
5720  case NEON::BI__builtin_neon_vcled_f64: P = llvm::FCmpInst::FCMP_OLE; break;
5721  case NEON::BI__builtin_neon_vcltd_f64: P = llvm::FCmpInst::FCMP_OLT; break;
5722  case NEON::BI__builtin_neon_vcged_f64: P = llvm::FCmpInst::FCMP_OGE; break;
5723  case NEON::BI__builtin_neon_vcgtd_f64: P = llvm::FCmpInst::FCMP_OGT; break;
5724  }
5725  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5726  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
5727  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
5728  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5729  return Builder.CreateSExt(Ops[0], Int64Ty, "vcmpd");
5730  }
5731  case NEON::BI__builtin_neon_vceqs_f32:
5732  case NEON::BI__builtin_neon_vcles_f32:
5733  case NEON::BI__builtin_neon_vclts_f32:
5734  case NEON::BI__builtin_neon_vcges_f32:
5735  case NEON::BI__builtin_neon_vcgts_f32: {
5736  llvm::CmpInst::Predicate P;
5737  switch (BuiltinID) {
5738  default: llvm_unreachable("missing builtin ID in switch!");
5739  case NEON::BI__builtin_neon_vceqs_f32: P = llvm::FCmpInst::FCMP_OEQ; break;
5740  case NEON::BI__builtin_neon_vcles_f32: P = llvm::FCmpInst::FCMP_OLE; break;
5741  case NEON::BI__builtin_neon_vclts_f32: P = llvm::FCmpInst::FCMP_OLT; break;
5742  case NEON::BI__builtin_neon_vcges_f32: P = llvm::FCmpInst::FCMP_OGE; break;
5743  case NEON::BI__builtin_neon_vcgts_f32: P = llvm::FCmpInst::FCMP_OGT; break;
5744  }
5745  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5746  Ops[0] = Builder.CreateBitCast(Ops[0], FloatTy);
5747  Ops[1] = Builder.CreateBitCast(Ops[1], FloatTy);
5748  Ops[0] = Builder.CreateFCmp(P, Ops[0], Ops[1]);
5749  return Builder.CreateSExt(Ops[0], Int32Ty, "vcmpd");
5750  }
5751  case NEON::BI__builtin_neon_vceqd_s64:
5752  case NEON::BI__builtin_neon_vceqd_u64:
5753  case NEON::BI__builtin_neon_vcgtd_s64:
5754  case NEON::BI__builtin_neon_vcgtd_u64:
5755  case NEON::BI__builtin_neon_vcltd_s64:
5756  case NEON::BI__builtin_neon_vcltd_u64:
5757  case NEON::BI__builtin_neon_vcged_u64:
5758  case NEON::BI__builtin_neon_vcged_s64:
5759  case NEON::BI__builtin_neon_vcled_u64:
5760  case NEON::BI__builtin_neon_vcled_s64: {
5761  llvm::CmpInst::Predicate P;
5762  switch (BuiltinID) {
5763  default: llvm_unreachable("missing builtin ID in switch!");
5764  case NEON::BI__builtin_neon_vceqd_s64:
5765  case NEON::BI__builtin_neon_vceqd_u64:P = llvm::ICmpInst::ICMP_EQ;break;
5766  case NEON::BI__builtin_neon_vcgtd_s64:P = llvm::ICmpInst::ICMP_SGT;break;
5767  case NEON::BI__builtin_neon_vcgtd_u64:P = llvm::ICmpInst::ICMP_UGT;break;
5768  case NEON::BI__builtin_neon_vcltd_s64:P = llvm::ICmpInst::ICMP_SLT;break;
5769  case NEON::BI__builtin_neon_vcltd_u64:P = llvm::ICmpInst::ICMP_ULT;break;
5770  case NEON::BI__builtin_neon_vcged_u64:P = llvm::ICmpInst::ICMP_UGE;break;
5771  case NEON::BI__builtin_neon_vcged_s64:P = llvm::ICmpInst::ICMP_SGE;break;
5772  case NEON::BI__builtin_neon_vcled_u64:P = llvm::ICmpInst::ICMP_ULE;break;
5773  case NEON::BI__builtin_neon_vcled_s64:P = llvm::ICmpInst::ICMP_SLE;break;
5774  }
5775  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5776  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5777  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5778  Ops[0] = Builder.CreateICmp(P, Ops[0], Ops[1]);
5779  return Builder.CreateSExt(Ops[0], Int64Ty, "vceqd");
5780  }
5781  case NEON::BI__builtin_neon_vtstd_s64:
5782  case NEON::BI__builtin_neon_vtstd_u64: {
5783  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5784  Ops[0] = Builder.CreateBitCast(Ops[0], Int64Ty);
5785  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5786  Ops[0] = Builder.CreateAnd(Ops[0], Ops[1]);
5787  Ops[0] = Builder.CreateICmp(ICmpInst::ICMP_NE, Ops[0],
5788  llvm::Constant::getNullValue(Int64Ty));
5789  return Builder.CreateSExt(Ops[0], Int64Ty, "vtstd");
5790  }
5791  case NEON::BI__builtin_neon_vset_lane_i8:
5792  case NEON::BI__builtin_neon_vset_lane_i16:
5793  case NEON::BI__builtin_neon_vset_lane_i32:
5794  case NEON::BI__builtin_neon_vset_lane_i64:
5795  case NEON::BI__builtin_neon_vset_lane_f32:
5796  case NEON::BI__builtin_neon_vsetq_lane_i8:
5797  case NEON::BI__builtin_neon_vsetq_lane_i16:
5798  case NEON::BI__builtin_neon_vsetq_lane_i32:
5799  case NEON::BI__builtin_neon_vsetq_lane_i64:
5800  case NEON::BI__builtin_neon_vsetq_lane_f32:
5801  Ops.push_back(EmitScalarExpr(E->getArg(2)));
5802  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5803  case NEON::BI__builtin_neon_vset_lane_f64:
5804  // The vector type needs a cast for the v1f64 variant.
5805  Ops[1] = Builder.CreateBitCast(Ops[1],
5806  llvm::VectorType::get(DoubleTy, 1));
5807  Ops.push_back(EmitScalarExpr(E->getArg(2)));
5808  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5809  case NEON::BI__builtin_neon_vsetq_lane_f64:
5810  // The vector type needs a cast for the v2f64 variant.
5811  Ops[1] = Builder.CreateBitCast(Ops[1],
5812  llvm::VectorType::get(DoubleTy, 2));
5813  Ops.push_back(EmitScalarExpr(E->getArg(2)));
5814  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vset_lane");
5815 
5816  case NEON::BI__builtin_neon_vget_lane_i8:
5817  case NEON::BI__builtin_neon_vdupb_lane_i8:
5818  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 8));
5819  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5820  "vget_lane");
5821  case NEON::BI__builtin_neon_vgetq_lane_i8:
5822  case NEON::BI__builtin_neon_vdupb_laneq_i8:
5823  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int8Ty, 16));
5824  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5825  "vgetq_lane");
5826  case NEON::BI__builtin_neon_vget_lane_i16:
5827  case NEON::BI__builtin_neon_vduph_lane_i16:
5828  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 4));
5829  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5830  "vget_lane");
5831  case NEON::BI__builtin_neon_vgetq_lane_i16:
5832  case NEON::BI__builtin_neon_vduph_laneq_i16:
5833  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int16Ty, 8));
5834  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5835  "vgetq_lane");
5836  case NEON::BI__builtin_neon_vget_lane_i32:
5837  case NEON::BI__builtin_neon_vdups_lane_i32:
5838  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 2));
5839  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5840  "vget_lane");
5841  case NEON::BI__builtin_neon_vdups_lane_f32:
5842  Ops[0] = Builder.CreateBitCast(Ops[0],
5843  llvm::VectorType::get(FloatTy, 2));
5844  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5845  "vdups_lane");
5846  case NEON::BI__builtin_neon_vgetq_lane_i32:
5847  case NEON::BI__builtin_neon_vdups_laneq_i32:
5848  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
5849  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5850  "vgetq_lane");
5851  case NEON::BI__builtin_neon_vget_lane_i64:
5852  case NEON::BI__builtin_neon_vdupd_lane_i64:
5853  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 1));
5854  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5855  "vget_lane");
5856  case NEON::BI__builtin_neon_vdupd_lane_f64:
5857  Ops[0] = Builder.CreateBitCast(Ops[0],
5858  llvm::VectorType::get(DoubleTy, 1));
5859  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5860  "vdupd_lane");
5861  case NEON::BI__builtin_neon_vgetq_lane_i64:
5862  case NEON::BI__builtin_neon_vdupd_laneq_i64:
5863  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
5864  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5865  "vgetq_lane");
5866  case NEON::BI__builtin_neon_vget_lane_f32:
5867  Ops[0] = Builder.CreateBitCast(Ops[0],
5868  llvm::VectorType::get(FloatTy, 2));
5869  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5870  "vget_lane");
5871  case NEON::BI__builtin_neon_vget_lane_f64:
5872  Ops[0] = Builder.CreateBitCast(Ops[0],
5873  llvm::VectorType::get(DoubleTy, 1));
5874  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5875  "vget_lane");
5876  case NEON::BI__builtin_neon_vgetq_lane_f32:
5877  case NEON::BI__builtin_neon_vdups_laneq_f32:
5878  Ops[0] = Builder.CreateBitCast(Ops[0],
5879  llvm::VectorType::get(FloatTy, 4));
5880  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5881  "vgetq_lane");
5882  case NEON::BI__builtin_neon_vgetq_lane_f64:
5883  case NEON::BI__builtin_neon_vdupd_laneq_f64:
5884  Ops[0] = Builder.CreateBitCast(Ops[0],
5885  llvm::VectorType::get(DoubleTy, 2));
5886  return Builder.CreateExtractElement(Ops[0], EmitScalarExpr(E->getArg(1)),
5887  "vgetq_lane");
5888  case NEON::BI__builtin_neon_vaddd_s64:
5889  case NEON::BI__builtin_neon_vaddd_u64:
5890  return Builder.CreateAdd(Ops[0], EmitScalarExpr(E->getArg(1)), "vaddd");
5891  case NEON::BI__builtin_neon_vsubd_s64:
5892  case NEON::BI__builtin_neon_vsubd_u64:
5893  return Builder.CreateSub(Ops[0], EmitScalarExpr(E->getArg(1)), "vsubd");
5894  case NEON::BI__builtin_neon_vqdmlalh_s16:
5895  case NEON::BI__builtin_neon_vqdmlslh_s16: {
5896  SmallVector<Value *, 2> ProductOps;
5897  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5898  ProductOps.push_back(vectorWrapScalar16(EmitScalarExpr(E->getArg(2))));
5899  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5900  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5901  ProductOps, "vqdmlXl");
5902  Constant *CI = ConstantInt::get(SizeTy, 0);
5903  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
5904 
5905  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlalh_s16
5906  ? Intrinsic::aarch64_neon_sqadd
5907  : Intrinsic::aarch64_neon_sqsub;
5908  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int32Ty), Ops, "vqdmlXl");
5909  }
5910  case NEON::BI__builtin_neon_vqshlud_n_s64: {
5911  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5912  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5913  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqshlu, Int64Ty),
5914  Ops, "vqshlu_n");
5915  }
5916  case NEON::BI__builtin_neon_vqshld_n_u64:
5917  case NEON::BI__builtin_neon_vqshld_n_s64: {
5918  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vqshld_n_u64
5919  ? Intrinsic::aarch64_neon_uqshl
5920  : Intrinsic::aarch64_neon_sqshl;
5921  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5922  Ops[1] = Builder.CreateZExt(Ops[1], Int64Ty);
5923  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vqshl_n");
5924  }
5925  case NEON::BI__builtin_neon_vrshrd_n_u64:
5926  case NEON::BI__builtin_neon_vrshrd_n_s64: {
5927  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrshrd_n_u64
5928  ? Intrinsic::aarch64_neon_urshl
5929  : Intrinsic::aarch64_neon_srshl;
5930  Ops.push_back(EmitScalarExpr(E->getArg(1)));
5931  int SV = cast<ConstantInt>(Ops[1])->getSExtValue();
5932  Ops[1] = ConstantInt::get(Int64Ty, -SV);
5933  return EmitNeonCall(CGM.getIntrinsic(Int, Int64Ty), Ops, "vrshr_n");
5934  }
5935  case NEON::BI__builtin_neon_vrsrad_n_u64:
5936  case NEON::BI__builtin_neon_vrsrad_n_s64: {
5937  unsigned Int = BuiltinID == NEON::BI__builtin_neon_vrsrad_n_u64
5938  ? Intrinsic::aarch64_neon_urshl
5939  : Intrinsic::aarch64_neon_srshl;
5940  Ops[1] = Builder.CreateBitCast(Ops[1], Int64Ty);
5941  Ops.push_back(Builder.CreateNeg(EmitScalarExpr(E->getArg(2))));
5942  Ops[1] = Builder.CreateCall(CGM.getIntrinsic(Int, Int64Ty),
5943  {Ops[1], Builder.CreateSExt(Ops[2], Int64Ty)});
5944  return Builder.CreateAdd(Ops[0], Builder.CreateBitCast(Ops[1], Int64Ty));
5945  }
5946  case NEON::BI__builtin_neon_vshld_n_s64:
5947  case NEON::BI__builtin_neon_vshld_n_u64: {
5948  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5949  return Builder.CreateShl(
5950  Ops[0], ConstantInt::get(Int64Ty, Amt->getZExtValue()), "shld_n");
5951  }
5952  case NEON::BI__builtin_neon_vshrd_n_s64: {
5953  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5954  return Builder.CreateAShr(
5955  Ops[0], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5956  Amt->getZExtValue())),
5957  "shrd_n");
5958  }
5959  case NEON::BI__builtin_neon_vshrd_n_u64: {
5960  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(1)));
5961  uint64_t ShiftAmt = Amt->getZExtValue();
5962  // Right-shifting an unsigned value by its size yields 0.
5963  if (ShiftAmt == 64)
5964  return ConstantInt::get(Int64Ty, 0);
5965  return Builder.CreateLShr(Ops[0], ConstantInt::get(Int64Ty, ShiftAmt),
5966  "shrd_n");
5967  }
5968  case NEON::BI__builtin_neon_vsrad_n_s64: {
5969  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5970  Ops[1] = Builder.CreateAShr(
5971  Ops[1], ConstantInt::get(Int64Ty, std::min(static_cast<uint64_t>(63),
5972  Amt->getZExtValue())),
5973  "shrd_n");
5974  return Builder.CreateAdd(Ops[0], Ops[1]);
5975  }
5976  case NEON::BI__builtin_neon_vsrad_n_u64: {
5977  llvm::ConstantInt *Amt = cast<ConstantInt>(EmitScalarExpr(E->getArg(2)));
5978  uint64_t ShiftAmt = Amt->getZExtValue();
5979  // Right-shifting an unsigned value by its size yields 0.
5980  // As Op + 0 = Op, return Ops[0] directly.
5981  if (ShiftAmt == 64)
5982  return Ops[0];
5983  Ops[1] = Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, ShiftAmt),
5984  "shrd_n");
5985  return Builder.CreateAdd(Ops[0], Ops[1]);
5986  }
5987  case NEON::BI__builtin_neon_vqdmlalh_lane_s16:
5988  case NEON::BI__builtin_neon_vqdmlalh_laneq_s16:
5989  case NEON::BI__builtin_neon_vqdmlslh_lane_s16:
5990  case NEON::BI__builtin_neon_vqdmlslh_laneq_s16: {
5991  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
5992  "lane");
5993  SmallVector<Value *, 2> ProductOps;
5994  ProductOps.push_back(vectorWrapScalar16(Ops[1]));
5995  ProductOps.push_back(vectorWrapScalar16(Ops[2]));
5996  llvm::Type *VTy = llvm::VectorType::get(Int32Ty, 4);
5997  Ops[1] = EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmull, VTy),
5998  ProductOps, "vqdmlXl");
5999  Constant *CI = ConstantInt::get(SizeTy, 0);
6000  Ops[1] = Builder.CreateExtractElement(Ops[1], CI, "lane0");
6001  Ops.pop_back();
6002 
6003  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlalh_lane_s16 ||
6004  BuiltinID == NEON::BI__builtin_neon_vqdmlalh_laneq_s16)
6005  ? Intrinsic::aarch64_neon_sqadd
6006  : Intrinsic::aarch64_neon_sqsub;
6007  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int32Ty), Ops, "vqdmlXl");
6008  }
6009  case NEON::BI__builtin_neon_vqdmlals_s32:
6010  case NEON::BI__builtin_neon_vqdmlsls_s32: {
6011  SmallVector<Value *, 2> ProductOps;
6012  ProductOps.push_back(Ops[1]);
6013  ProductOps.push_back(EmitScalarExpr(E->getArg(2)));
6014  Ops[1] =
6015  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6016  ProductOps, "vqdmlXl");
6017 
6018  unsigned AccumInt = BuiltinID == NEON::BI__builtin_neon_vqdmlals_s32
6019  ? Intrinsic::aarch64_neon_sqadd
6020  : Intrinsic::aarch64_neon_sqsub;
6021  return EmitNeonCall(CGM.getIntrinsic(AccumInt, Int64Ty), Ops, "vqdmlXl");
6022  }
6023  case NEON::BI__builtin_neon_vqdmlals_lane_s32:
6024  case NEON::BI__builtin_neon_vqdmlals_laneq_s32:
6025  case NEON::BI__builtin_neon_vqdmlsls_lane_s32:
6026  case NEON::BI__builtin_neon_vqdmlsls_laneq_s32: {
6027  Ops[2] = Builder.CreateExtractElement(Ops[2], EmitScalarExpr(E->getArg(3)),
6028  "lane");
6029  SmallVector<Value *, 2> ProductOps;
6030  ProductOps.push_back(Ops[1]);
6031  ProductOps.push_back(Ops[2]);
6032  Ops[1] =
6033  EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_sqdmulls_scalar),
6034  ProductOps, "vqdmlXl");
6035  Ops.pop_back();
6036 
6037  unsigned AccInt = (BuiltinID == NEON::BI__builtin_neon_vqdmlals_lane_s32 ||
6038  BuiltinID == NEON::BI__builtin_neon_vqdmlals_laneq_s32)
6039  ? Intrinsic::aarch64_neon_sqadd
6040  : Intrinsic::aarch64_neon_sqsub;
6041  return EmitNeonCall(CGM.getIntrinsic(AccInt, Int64Ty), Ops, "vqdmlXl");
6042  }
6043  }
6044 
6045  llvm::VectorType *VTy = GetNeonType(this, Type);
6046  llvm::Type *Ty = VTy;
6047  if (!Ty)
6048  return nullptr;
6049 
6050  // Not all intrinsics handled by the common case work for AArch64 yet, so only
6051  // defer to common code if it's been added to our special map.
6052  Builtin = findNeonIntrinsicInMap(AArch64SIMDIntrinsicMap, BuiltinID,
6053  AArch64SIMDIntrinsicsProvenSorted);
6054 
6055  if (Builtin)
6057  Builtin->BuiltinID, Builtin->LLVMIntrinsic, Builtin->AltLLVMIntrinsic,
6058  Builtin->NameHint, Builtin->TypeModifier, E, Ops,
6059  /*never use addresses*/ Address::invalid(), Address::invalid());
6060 
6061  if (Value *V = EmitAArch64TblBuiltinExpr(*this, BuiltinID, E, Ops))
6062  return V;
6063 
6064  unsigned Int;
6065  switch (BuiltinID) {
6066  default: return nullptr;
6067  case NEON::BI__builtin_neon_vbsl_v:
6068  case NEON::BI__builtin_neon_vbslq_v: {
6069  llvm::Type *BitTy = llvm::VectorType::getInteger(VTy);
6070  Ops[0] = Builder.CreateBitCast(Ops[0], BitTy, "vbsl");
6071  Ops[1] = Builder.CreateBitCast(Ops[1], BitTy, "vbsl");
6072  Ops[2] = Builder.CreateBitCast(Ops[2], BitTy, "vbsl");
6073 
6074  Ops[1] = Builder.CreateAnd(Ops[0], Ops[1], "vbsl");
6075  Ops[2] = Builder.CreateAnd(Builder.CreateNot(Ops[0]), Ops[2], "vbsl");
6076  Ops[0] = Builder.CreateOr(Ops[1], Ops[2], "vbsl");
6077  return Builder.CreateBitCast(Ops[0], Ty);
6078  }
6079  case NEON::BI__builtin_neon_vfma_lane_v:
6080  case NEON::BI__builtin_neon_vfmaq_lane_v: { // Only used for FP types
6081  // The ARM builtins (and instructions) have the addend as the first
6082  // operand, but the 'fma' intrinsics have it last. Swap it around here.
6083  Value *Addend = Ops[0];
6084  Value *Multiplicand = Ops[1];
6085  Value *LaneSource = Ops[2];
6086  Ops[0] = Multiplicand;
6087  Ops[1] = LaneSource;
6088  Ops[2] = Addend;
6089 
6090  // Now adjust things to handle the lane access.
6091  llvm::Type *SourceTy = BuiltinID == NEON::BI__builtin_neon_vfmaq_lane_v ?
6092  llvm::VectorType::get(VTy->getElementType(), VTy->getNumElements() / 2) :
6093  VTy;
6094  llvm::Constant *cst = cast<Constant>(Ops[3]);
6095  Value *SV = llvm::ConstantVector::getSplat(VTy->getNumElements(), cst);
6096  Ops[1] = Builder.CreateBitCast(Ops[1], SourceTy);
6097  Ops[1] = Builder.CreateShuffleVector(Ops[1], Ops[1], SV, "lane");
6098 
6099  Ops.pop_back();
6100  Int = Intrinsic::fma;
6101  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "fmla");
6102  }
6103  case NEON::BI__builtin_neon_vfma_laneq_v: {
6104  llvm::VectorType *VTy = cast<llvm::VectorType>(Ty);
6105  // v1f64 fma should be mapped to Neon scalar f64 fma
6106  if (VTy && VTy->getElementType() == DoubleTy) {
6107  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6108  Ops[1] = Builder.CreateBitCast(Ops[1], DoubleTy);
6109  llvm::Type *VTy = GetNeonType(this,
6110  NeonTypeFlags(NeonTypeFlags::Float64, false, true));
6111  Ops[2] = Builder.CreateBitCast(Ops[2], VTy);
6112  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6113  Value *F = CGM.getIntrinsic(Intrinsic::fma, DoubleTy);
6114  Value *Result = Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6115  return Builder.CreateBitCast(Result, Ty);
6116  }
6117  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6118  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6119  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6120 
6121  llvm::Type *STy = llvm::VectorType::get(VTy->getElementType(),
6122  VTy->getNumElements() * 2);
6123  Ops[2] = Builder.CreateBitCast(Ops[2], STy);
6124  Value* SV = llvm::ConstantVector::getSplat(VTy->getNumElements(),
6125  cast<ConstantInt>(Ops[3]));
6126  Ops[2] = Builder.CreateShuffleVector(Ops[2], Ops[2], SV, "lane");
6127 
6128  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6129  }
6130  case NEON::BI__builtin_neon_vfmaq_laneq_v: {
6131  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6132  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6133  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6134 
6135  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6136  Ops[2] = EmitNeonSplat(Ops[2], cast<ConstantInt>(Ops[3]));
6137  return Builder.CreateCall(F, {Ops[2], Ops[1], Ops[0]});
6138  }
6139  case NEON::BI__builtin_neon_vfmas_lane_f32:
6140  case NEON::BI__builtin_neon_vfmas_laneq_f32:
6141  case NEON::BI__builtin_neon_vfmad_lane_f64:
6142  case NEON::BI__builtin_neon_vfmad_laneq_f64: {
6143  Ops.push_back(EmitScalarExpr(E->getArg(3)));
6145  Value *F = CGM.getIntrinsic(Intrinsic::fma, Ty);
6146  Ops[2] = Builder.CreateExtractElement(Ops[2], Ops[3], "extract");
6147  return Builder.CreateCall(F, {Ops[1], Ops[2], Ops[0]});
6148  }
6149  case NEON::BI__builtin_neon_vmull_v:
6150  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6151  Int = usgn ? Intrinsic::aarch64_neon_umull : Intrinsic::aarch64_neon_smull;
6152  if (Type.isPoly()) Int = Intrinsic::aarch64_neon_pmull;
6153  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmull");
6154  case NEON::BI__builtin_neon_vmax_v:
6155  case NEON::BI__builtin_neon_vmaxq_v:
6156  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6157  Int = usgn ? Intrinsic::aarch64_neon_umax : Intrinsic::aarch64_neon_smax;
6158  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmax;
6159  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmax");
6160  case NEON::BI__builtin_neon_vmin_v:
6161  case NEON::BI__builtin_neon_vminq_v:
6162  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6163  Int = usgn ? Intrinsic::aarch64_neon_umin : Intrinsic::aarch64_neon_smin;
6164  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmin;
6165  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmin");
6166  case NEON::BI__builtin_neon_vabd_v:
6167  case NEON::BI__builtin_neon_vabdq_v:
6168  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6169  Int = usgn ? Intrinsic::aarch64_neon_uabd : Intrinsic::aarch64_neon_sabd;
6170  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fabd;
6171  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vabd");
6172  case NEON::BI__builtin_neon_vpadal_v:
6173  case NEON::BI__builtin_neon_vpadalq_v: {
6174  unsigned ArgElts = VTy->getNumElements();
6175  llvm::IntegerType *EltTy = cast<IntegerType>(VTy->getElementType());
6176  unsigned BitWidth = EltTy->getBitWidth();
6177  llvm::Type *ArgTy = llvm::VectorType::get(
6178  llvm::IntegerType::get(getLLVMContext(), BitWidth/2), 2*ArgElts);
6179  llvm::Type* Tys[2] = { VTy, ArgTy };
6180  Int = usgn ? Intrinsic::aarch64_neon_uaddlp : Intrinsic::aarch64_neon_saddlp;
6182  TmpOps.push_back(Ops[1]);
6183  Function *F = CGM.getIntrinsic(Int, Tys);
6184  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vpadal");
6185  llvm::Value *addend = Builder.CreateBitCast(Ops[0], tmp->getType());
6186  return Builder.CreateAdd(tmp, addend);
6187  }
6188  case NEON::BI__builtin_neon_vpmin_v:
6189  case NEON::BI__builtin_neon_vpminq_v:
6190  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6191  Int = usgn ? Intrinsic::aarch64_neon_uminp : Intrinsic::aarch64_neon_sminp;
6192  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fminp;
6193  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmin");
6194  case NEON::BI__builtin_neon_vpmax_v:
6195  case NEON::BI__builtin_neon_vpmaxq_v:
6196  // FIXME: improve sharing scheme to cope with 3 alternative LLVM intrinsics.
6197  Int = usgn ? Intrinsic::aarch64_neon_umaxp : Intrinsic::aarch64_neon_smaxp;
6198  if (Ty->isFPOrFPVectorTy()) Int = Intrinsic::aarch64_neon_fmaxp;
6199  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmax");
6200  case NEON::BI__builtin_neon_vminnm_v:
6201  case NEON::BI__builtin_neon_vminnmq_v:
6202  Int = Intrinsic::aarch64_neon_fminnm;
6203  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vminnm");
6204  case NEON::BI__builtin_neon_vmaxnm_v:
6205  case NEON::BI__builtin_neon_vmaxnmq_v:
6206  Int = Intrinsic::aarch64_neon_fmaxnm;
6207  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmaxnm");
6208  case NEON::BI__builtin_neon_vrecpss_f32: {
6209  Ops.push_back(EmitScalarExpr(E->getArg(1)));
6210  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, FloatTy),
6211  Ops, "vrecps");
6212  }
6213  case NEON::BI__builtin_neon_vrecpsd_f64: {
6214  Ops.push_back(EmitScalarExpr(E->getArg(1)));
6215  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_frecps, DoubleTy),
6216  Ops, "vrecps");
6217  }
6218  case NEON::BI__builtin_neon_vqshrun_n_v:
6219  Int = Intrinsic::aarch64_neon_sqshrun;
6220  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrun_n");
6221  case NEON::BI__builtin_neon_vqrshrun_n_v:
6222  Int = Intrinsic::aarch64_neon_sqrshrun;
6223  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrun_n");
6224  case NEON::BI__builtin_neon_vqshrn_n_v:
6225  Int = usgn ? Intrinsic::aarch64_neon_uqshrn : Intrinsic::aarch64_neon_sqshrn;
6226  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqshrn_n");
6227  case NEON::BI__builtin_neon_vrshrn_n_v:
6228  Int = Intrinsic::aarch64_neon_rshrn;
6229  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrshrn_n");
6230  case NEON::BI__builtin_neon_vqrshrn_n_v:
6231  Int = usgn ? Intrinsic::aarch64_neon_uqrshrn : Intrinsic::aarch64_neon_sqrshrn;
6232  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vqrshrn_n");
6233  case NEON::BI__builtin_neon_vrnda_v:
6234  case NEON::BI__builtin_neon_vrndaq_v: {
6235  Int = Intrinsic::round;
6236  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrnda");
6237  }
6238  case NEON::BI__builtin_neon_vrndi_v:
6239  case NEON::BI__builtin_neon_vrndiq_v: {
6240  Int = Intrinsic::nearbyint;
6241  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndi");
6242  }
6243  case NEON::BI__builtin_neon_vrndm_v:
6244  case NEON::BI__builtin_neon_vrndmq_v: {
6245  Int = Intrinsic::floor;
6246  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndm");
6247  }
6248  case NEON::BI__builtin_neon_vrndn_v:
6249  case NEON::BI__builtin_neon_vrndnq_v: {
6250  Int = Intrinsic::aarch64_neon_frintn;
6251  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndn");
6252  }
6253  case NEON::BI__builtin_neon_vrndp_v:
6254  case NEON::BI__builtin_neon_vrndpq_v: {
6255  Int = Intrinsic::ceil;
6256  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndp");
6257  }
6258  case NEON::BI__builtin_neon_vrndx_v:
6259  case NEON::BI__builtin_neon_vrndxq_v: {
6260  Int = Intrinsic::rint;
6261  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndx");
6262  }
6263  case NEON::BI__builtin_neon_vrnd_v:
6264  case NEON::BI__builtin_neon_vrndq_v: {
6265  Int = Intrinsic::trunc;
6266  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrndz");
6267  }
6268  case NEON::BI__builtin_neon_vceqz_v:
6269  case NEON::BI__builtin_neon_vceqzq_v:
6270  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OEQ,
6271  ICmpInst::ICMP_EQ, "vceqz");
6272  case NEON::BI__builtin_neon_vcgez_v:
6273  case NEON::BI__builtin_neon_vcgezq_v:
6274  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGE,
6275  ICmpInst::ICMP_SGE, "vcgez");
6276  case NEON::BI__builtin_neon_vclez_v:
6277  case NEON::BI__builtin_neon_vclezq_v:
6278  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLE,
6279  ICmpInst::ICMP_SLE, "vclez");
6280  case NEON::BI__builtin_neon_vcgtz_v:
6281  case NEON::BI__builtin_neon_vcgtzq_v:
6282  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OGT,
6283  ICmpInst::ICMP_SGT, "vcgtz");
6284  case NEON::BI__builtin_neon_vcltz_v:
6285  case NEON::BI__builtin_neon_vcltzq_v:
6286  return EmitAArch64CompareBuiltinExpr(Ops[0], Ty, ICmpInst::FCMP_OLT,
6287  ICmpInst::ICMP_SLT, "vcltz");
6288  case NEON::BI__builtin_neon_vcvt_f64_v:
6289  case NEON::BI__builtin_neon_vcvtq_f64_v:
6290  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6291  Ty = GetNeonType(this, NeonTypeFlags(NeonTypeFlags::Float64, false, quad));
6292  return usgn ? Builder.CreateUIToFP(Ops[0], Ty, "vcvt")
6293  : Builder.CreateSIToFP(Ops[0], Ty, "vcvt");
6294  case NEON::BI__builtin_neon_vcvt_f64_f32: {
6295  assert(Type.getEltType() == NeonTypeFlags::Float64 && quad &&
6296  "unexpected vcvt_f64_f32 builtin");
6297  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float32, false, false);
6298  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6299 
6300  return Builder.CreateFPExt(Ops[0], Ty, "vcvt");
6301  }
6302  case NEON::BI__builtin_neon_vcvt_f32_f64: {
6303  assert(Type.getEltType() == NeonTypeFlags::Float32 &&
6304  "unexpected vcvt_f32_f64 builtin");
6305  NeonTypeFlags SrcFlag = NeonTypeFlags(NeonTypeFlags::Float64, false, true);
6306  Ops[0] = Builder.CreateBitCast(Ops[0], GetNeonType(this, SrcFlag));
6307 
6308  return Builder.CreateFPTrunc(Ops[0], Ty, "vcvt");
6309  }
6310  case NEON::BI__builtin_neon_vcvt_s32_v:
6311  case NEON::BI__builtin_neon_vcvt_u32_v:
6312  case NEON::BI__builtin_neon_vcvt_s64_v:
6313  case NEON::BI__builtin_neon_vcvt_u64_v:
6314  case NEON::BI__builtin_neon_vcvtq_s32_v:
6315  case NEON::BI__builtin_neon_vcvtq_u32_v:
6316  case NEON::BI__builtin_neon_vcvtq_s64_v:
6317  case NEON::BI__builtin_neon_vcvtq_u64_v: {
6318  Ops[0] = Builder.CreateBitCast(Ops[0], GetFloatNeonType(this, Type));
6319  if (usgn)
6320  return Builder.CreateFPToUI(Ops[0], Ty);
6321  return Builder.CreateFPToSI(Ops[0], Ty);
6322  }
6323  case NEON::BI__builtin_neon_vcvta_s32_v:
6324  case NEON::BI__builtin_neon_vcvtaq_s32_v:
6325  case NEON::BI__builtin_neon_vcvta_u32_v:
6326  case NEON::BI__builtin_neon_vcvtaq_u32_v:
6327  case NEON::BI__builtin_neon_vcvta_s64_v:
6328  case NEON::BI__builtin_neon_vcvtaq_s64_v:
6329  case NEON::BI__builtin_neon_vcvta_u64_v:
6330  case NEON::BI__builtin_neon_vcvtaq_u64_v: {
6331  Int = usgn ? Intrinsic::aarch64_neon_fcvtau : Intrinsic::aarch64_neon_fcvtas;
6332  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6333  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvta");
6334  }
6335  case NEON::BI__builtin_neon_vcvtm_s32_v:
6336  case NEON::BI__builtin_neon_vcvtmq_s32_v:
6337  case NEON::BI__builtin_neon_vcvtm_u32_v:
6338  case NEON::BI__builtin_neon_vcvtmq_u32_v:
6339  case NEON::BI__builtin_neon_vcvtm_s64_v:
6340  case NEON::BI__builtin_neon_vcvtmq_s64_v:
6341  case NEON::BI__builtin_neon_vcvtm_u64_v:
6342  case NEON::BI__builtin_neon_vcvtmq_u64_v: {
6343  Int = usgn ? Intrinsic::aarch64_neon_fcvtmu : Intrinsic::aarch64_neon_fcvtms;
6344  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6345  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtm");
6346  }
6347  case NEON::BI__builtin_neon_vcvtn_s32_v:
6348  case NEON::BI__builtin_neon_vcvtnq_s32_v:
6349  case NEON::BI__builtin_neon_vcvtn_u32_v:
6350  case NEON::BI__builtin_neon_vcvtnq_u32_v:
6351  case NEON::BI__builtin_neon_vcvtn_s64_v:
6352  case NEON::BI__builtin_neon_vcvtnq_s64_v:
6353  case NEON::BI__builtin_neon_vcvtn_u64_v:
6354  case NEON::BI__builtin_neon_vcvtnq_u64_v: {
6355  Int = usgn ? Intrinsic::aarch64_neon_fcvtnu : Intrinsic::aarch64_neon_fcvtns;
6356  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6357  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtn");
6358  }
6359  case NEON::BI__builtin_neon_vcvtp_s32_v:
6360  case NEON::BI__builtin_neon_vcvtpq_s32_v:
6361  case NEON::BI__builtin_neon_vcvtp_u32_v:
6362  case NEON::BI__builtin_neon_vcvtpq_u32_v:
6363  case NEON::BI__builtin_neon_vcvtp_s64_v:
6364  case NEON::BI__builtin_neon_vcvtpq_s64_v:
6365  case NEON::BI__builtin_neon_vcvtp_u64_v:
6366  case NEON::BI__builtin_neon_vcvtpq_u64_v: {
6367  Int = usgn ? Intrinsic::aarch64_neon_fcvtpu : Intrinsic::aarch64_neon_fcvtps;
6368  llvm::Type *Tys[2] = { Ty, GetFloatNeonType(this, Type) };
6369  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vcvtp");
6370  }
6371  case NEON::BI__builtin_neon_vmulx_v:
6372  case NEON::BI__builtin_neon_vmulxq_v: {
6373  Int = Intrinsic::aarch64_neon_fmulx;
6374  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vmulx");
6375  }
6376  case NEON::BI__builtin_neon_vmul_lane_v:
6377  case NEON::BI__builtin_neon_vmul_laneq_v: {
6378  // v1f64 vmul_lane should be mapped to Neon scalar mul lane
6379  bool Quad = false;
6380  if (BuiltinID == NEON::BI__builtin_neon_vmul_laneq_v)
6381  Quad = true;
6382  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6383  llvm::Type *VTy = GetNeonType(this,
6384  NeonTypeFlags(NeonTypeFlags::Float64, false, Quad));
6385  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6386  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2], "extract");
6387  Value *Result = Builder.CreateFMul(Ops[0], Ops[1]);
6388  return Builder.CreateBitCast(Result, Ty);
6389  }
6390  case NEON::BI__builtin_neon_vnegd_s64:
6391  return Builder.CreateNeg(EmitScalarExpr(E->getArg(0)), "vnegd");
6392  case NEON::BI__builtin_neon_vpmaxnm_v:
6393  case NEON::BI__builtin_neon_vpmaxnmq_v: {
6394  Int = Intrinsic::aarch64_neon_fmaxnmp;
6395  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpmaxnm");
6396  }
6397  case NEON::BI__builtin_neon_vpminnm_v:
6398  case NEON::BI__builtin_neon_vpminnmq_v: {
6399  Int = Intrinsic::aarch64_neon_fminnmp;
6400  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vpminnm");
6401  }
6402  case NEON::BI__builtin_neon_vsqrt_v:
6403  case NEON::BI__builtin_neon_vsqrtq_v: {
6404  Int = Intrinsic::sqrt;
6405  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6406  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqrt");
6407  }
6408  case NEON::BI__builtin_neon_vrbit_v:
6409  case NEON::BI__builtin_neon_vrbitq_v: {
6410  Int = Intrinsic::aarch64_neon_rbit;
6411  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vrbit");
6412  }
6413  case NEON::BI__builtin_neon_vaddv_u8:
6414  // FIXME: These are handled by the AArch64 scalar code.
6415  usgn = true;
6416  // FALLTHROUGH
6417  case NEON::BI__builtin_neon_vaddv_s8: {
6418  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6419  Ty = Int32Ty;
6420  VTy = llvm::VectorType::get(Int8Ty, 8);
6421  llvm::Type *Tys[2] = { Ty, VTy };
6422  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6423  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6424  return Builder.CreateTrunc(Ops[0], Int8Ty);
6425  }
6426  case NEON::BI__builtin_neon_vaddv_u16:
6427  usgn = true;
6428  // FALLTHROUGH
6429  case NEON::BI__builtin_neon_vaddv_s16: {
6430  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6431  Ty = Int32Ty;
6432  VTy = llvm::VectorType::get(Int16Ty, 4);
6433  llvm::Type *Tys[2] = { Ty, VTy };
6434  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6435  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6436  return Builder.CreateTrunc(Ops[0], Int16Ty);
6437  }
6438  case NEON::BI__builtin_neon_vaddvq_u8:
6439  usgn = true;
6440  // FALLTHROUGH
6441  case NEON::BI__builtin_neon_vaddvq_s8: {
6442  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6443  Ty = Int32Ty;
6444  VTy = llvm::VectorType::get(Int8Ty, 16);
6445  llvm::Type *Tys[2] = { Ty, VTy };
6446  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6447  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6448  return Builder.CreateTrunc(Ops[0], Int8Ty);
6449  }
6450  case NEON::BI__builtin_neon_vaddvq_u16:
6451  usgn = true;
6452  // FALLTHROUGH
6453  case NEON::BI__builtin_neon_vaddvq_s16: {
6454  Int = usgn ? Intrinsic::aarch64_neon_uaddv : Intrinsic::aarch64_neon_saddv;
6455  Ty = Int32Ty;
6456  VTy = llvm::VectorType::get(Int16Ty, 8);
6457  llvm::Type *Tys[2] = { Ty, VTy };
6458  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6459  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddv");
6460  return Builder.CreateTrunc(Ops[0], Int16Ty);
6461  }
6462  case NEON::BI__builtin_neon_vmaxv_u8: {
6463  Int = Intrinsic::aarch64_neon_umaxv;
6464  Ty = Int32Ty;
6465  VTy = llvm::VectorType::get(Int8Ty, 8);
6466  llvm::Type *Tys[2] = { Ty, VTy };
6467  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6468  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6469  return Builder.CreateTrunc(Ops[0], Int8Ty);
6470  }
6471  case NEON::BI__builtin_neon_vmaxv_u16: {
6472  Int = Intrinsic::aarch64_neon_umaxv;
6473  Ty = Int32Ty;
6474  VTy = llvm::VectorType::get(Int16Ty, 4);
6475  llvm::Type *Tys[2] = { Ty, VTy };
6476  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6477  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6478  return Builder.CreateTrunc(Ops[0], Int16Ty);
6479  }
6480  case NEON::BI__builtin_neon_vmaxvq_u8: {
6481  Int = Intrinsic::aarch64_neon_umaxv;
6482  Ty = Int32Ty;
6483  VTy = llvm::VectorType::get(Int8Ty, 16);
6484  llvm::Type *Tys[2] = { Ty, VTy };
6485  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6486  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6487  return Builder.CreateTrunc(Ops[0], Int8Ty);
6488  }
6489  case NEON::BI__builtin_neon_vmaxvq_u16: {
6490  Int = Intrinsic::aarch64_neon_umaxv;
6491  Ty = Int32Ty;
6492  VTy = llvm::VectorType::get(Int16Ty, 8);
6493  llvm::Type *Tys[2] = { Ty, VTy };
6494  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6495  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6496  return Builder.CreateTrunc(Ops[0], Int16Ty);
6497  }
6498  case NEON::BI__builtin_neon_vmaxv_s8: {
6499  Int = Intrinsic::aarch64_neon_smaxv;
6500  Ty = Int32Ty;
6501  VTy = llvm::VectorType::get(Int8Ty, 8);
6502  llvm::Type *Tys[2] = { Ty, VTy };
6503  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6504  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6505  return Builder.CreateTrunc(Ops[0], Int8Ty);
6506  }
6507  case NEON::BI__builtin_neon_vmaxv_s16: {
6508  Int = Intrinsic::aarch64_neon_smaxv;
6509  Ty = Int32Ty;
6510  VTy = llvm::VectorType::get(Int16Ty, 4);
6511  llvm::Type *Tys[2] = { Ty, VTy };
6512  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6513  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6514  return Builder.CreateTrunc(Ops[0], Int16Ty);
6515  }
6516  case NEON::BI__builtin_neon_vmaxvq_s8: {
6517  Int = Intrinsic::aarch64_neon_smaxv;
6518  Ty = Int32Ty;
6519  VTy = llvm::VectorType::get(Int8Ty, 16);
6520  llvm::Type *Tys[2] = { Ty, VTy };
6521  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6522  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6523  return Builder.CreateTrunc(Ops[0], Int8Ty);
6524  }
6525  case NEON::BI__builtin_neon_vmaxvq_s16: {
6526  Int = Intrinsic::aarch64_neon_smaxv;
6527  Ty = Int32Ty;
6528  VTy = llvm::VectorType::get(Int16Ty, 8);
6529  llvm::Type *Tys[2] = { Ty, VTy };
6530  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6531  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vmaxv");
6532  return Builder.CreateTrunc(Ops[0], Int16Ty);
6533  }
6534  case NEON::BI__builtin_neon_vminv_u8: {
6535  Int = Intrinsic::aarch64_neon_uminv;
6536  Ty = Int32Ty;
6537  VTy = llvm::VectorType::get(Int8Ty, 8);
6538  llvm::Type *Tys[2] = { Ty, VTy };
6539  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6540  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6541  return Builder.CreateTrunc(Ops[0], Int8Ty);
6542  }
6543  case NEON::BI__builtin_neon_vminv_u16: {
6544  Int = Intrinsic::aarch64_neon_uminv;
6545  Ty = Int32Ty;
6546  VTy = llvm::VectorType::get(Int16Ty, 4);
6547  llvm::Type *Tys[2] = { Ty, VTy };
6548  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6549  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6550  return Builder.CreateTrunc(Ops[0], Int16Ty);
6551  }
6552  case NEON::BI__builtin_neon_vminvq_u8: {
6553  Int = Intrinsic::aarch64_neon_uminv;
6554  Ty = Int32Ty;
6555  VTy = llvm::VectorType::get(Int8Ty, 16);
6556  llvm::Type *Tys[2] = { Ty, VTy };
6557  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6558  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6559  return Builder.CreateTrunc(Ops[0], Int8Ty);
6560  }
6561  case NEON::BI__builtin_neon_vminvq_u16: {
6562  Int = Intrinsic::aarch64_neon_uminv;
6563  Ty = Int32Ty;
6564  VTy = llvm::VectorType::get(Int16Ty, 8);
6565  llvm::Type *Tys[2] = { Ty, VTy };
6566  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6567  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6568  return Builder.CreateTrunc(Ops[0], Int16Ty);
6569  }
6570  case NEON::BI__builtin_neon_vminv_s8: {
6571  Int = Intrinsic::aarch64_neon_sminv;
6572  Ty = Int32Ty;
6573  VTy = llvm::VectorType::get(Int8Ty, 8);
6574  llvm::Type *Tys[2] = { Ty, VTy };
6575  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6576  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6577  return Builder.CreateTrunc(Ops[0], Int8Ty);
6578  }
6579  case NEON::BI__builtin_neon_vminv_s16: {
6580  Int = Intrinsic::aarch64_neon_sminv;
6581  Ty = Int32Ty;
6582  VTy = llvm::VectorType::get(Int16Ty, 4);
6583  llvm::Type *Tys[2] = { Ty, VTy };
6584  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6585  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6586  return Builder.CreateTrunc(Ops[0], Int16Ty);
6587  }
6588  case NEON::BI__builtin_neon_vminvq_s8: {
6589  Int = Intrinsic::aarch64_neon_sminv;
6590  Ty = Int32Ty;
6591  VTy = llvm::VectorType::get(Int8Ty, 16);
6592  llvm::Type *Tys[2] = { Ty, VTy };
6593  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6594  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6595  return Builder.CreateTrunc(Ops[0], Int8Ty);
6596  }
6597  case NEON::BI__builtin_neon_vminvq_s16: {
6598  Int = Intrinsic::aarch64_neon_sminv;
6599  Ty = Int32Ty;
6600  VTy = llvm::VectorType::get(Int16Ty, 8);
6601  llvm::Type *Tys[2] = { Ty, VTy };
6602  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6603  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vminv");
6604  return Builder.CreateTrunc(Ops[0], Int16Ty);
6605  }
6606  case NEON::BI__builtin_neon_vmul_n_f64: {
6607  Ops[0] = Builder.CreateBitCast(Ops[0], DoubleTy);
6608  Value *RHS = Builder.CreateBitCast(EmitScalarExpr(E->getArg(1)), DoubleTy);
6609  return Builder.CreateFMul(Ops[0], RHS);
6610  }
6611  case NEON::BI__builtin_neon_vaddlv_u8: {
6612  Int = Intrinsic::aarch64_neon_uaddlv;
6613  Ty = Int32Ty;
6614  VTy = llvm::VectorType::get(Int8Ty, 8);
6615  llvm::Type *Tys[2] = { Ty, VTy };
6616  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6617  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6618  return Builder.CreateTrunc(Ops[0], Int16Ty);
6619  }
6620  case NEON::BI__builtin_neon_vaddlv_u16: {
6621  Int = Intrinsic::aarch64_neon_uaddlv;
6622  Ty = Int32Ty;
6623  VTy = llvm::VectorType::get(Int16Ty, 4);
6624  llvm::Type *Tys[2] = { Ty, VTy };
6625  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6626  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6627  }
6628  case NEON::BI__builtin_neon_vaddlvq_u8: {
6629  Int = Intrinsic::aarch64_neon_uaddlv;
6630  Ty = Int32Ty;
6631  VTy = llvm::VectorType::get(Int8Ty, 16);
6632  llvm::Type *Tys[2] = { Ty, VTy };
6633  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6634  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6635  return Builder.CreateTrunc(Ops[0], Int16Ty);
6636  }
6637  case NEON::BI__builtin_neon_vaddlvq_u16: {
6638  Int = Intrinsic::aarch64_neon_uaddlv;
6639  Ty = Int32Ty;
6640  VTy = llvm::VectorType::get(Int16Ty, 8);
6641  llvm::Type *Tys[2] = { Ty, VTy };
6642  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6643  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6644  }
6645  case NEON::BI__builtin_neon_vaddlv_s8: {
6646  Int = Intrinsic::aarch64_neon_saddlv;
6647  Ty = Int32Ty;
6648  VTy = llvm::VectorType::get(Int8Ty, 8);
6649  llvm::Type *Tys[2] = { Ty, VTy };
6650  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6651  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6652  return Builder.CreateTrunc(Ops[0], Int16Ty);
6653  }
6654  case NEON::BI__builtin_neon_vaddlv_s16: {
6655  Int = Intrinsic::aarch64_neon_saddlv;
6656  Ty = Int32Ty;
6657  VTy = llvm::VectorType::get(Int16Ty, 4);
6658  llvm::Type *Tys[2] = { Ty, VTy };
6659  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6660  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6661  }
6662  case NEON::BI__builtin_neon_vaddlvq_s8: {
6663  Int = Intrinsic::aarch64_neon_saddlv;
6664  Ty = Int32Ty;
6665  VTy = llvm::VectorType::get(Int8Ty, 16);
6666  llvm::Type *Tys[2] = { Ty, VTy };
6667  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6668  Ops[0] = EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6669  return Builder.CreateTrunc(Ops[0], Int16Ty);
6670  }
6671  case NEON::BI__builtin_neon_vaddlvq_s16: {
6672  Int = Intrinsic::aarch64_neon_saddlv;
6673  Ty = Int32Ty;
6674  VTy = llvm::VectorType::get(Int16Ty, 8);
6675  llvm::Type *Tys[2] = { Ty, VTy };
6676  Ops.push_back(EmitScalarExpr(E->getArg(0)));
6677  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "vaddlv");
6678  }
6679  case NEON::BI__builtin_neon_vsri_n_v:
6680  case NEON::BI__builtin_neon_vsriq_n_v: {
6681  Int = Intrinsic::aarch64_neon_vsri;
6682  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6683  return EmitNeonCall(Intrin, Ops, "vsri_n");
6684  }
6685  case NEON::BI__builtin_neon_vsli_n_v:
6686  case NEON::BI__builtin_neon_vsliq_n_v: {
6687  Int = Intrinsic::aarch64_neon_vsli;
6688  llvm::Function *Intrin = CGM.getIntrinsic(Int, Ty);
6689  return EmitNeonCall(Intrin, Ops, "vsli_n");
6690  }
6691  case NEON::BI__builtin_neon_vsra_n_v:
6692  case NEON::BI__builtin_neon_vsraq_n_v:
6693  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6694  Ops[1] = EmitNeonRShiftImm(Ops[1], Ops[2], Ty, usgn, "vsra_n");
6695  return Builder.CreateAdd(Ops[0], Ops[1]);
6696  case NEON::BI__builtin_neon_vrsra_n_v:
6697  case NEON::BI__builtin_neon_vrsraq_n_v: {
6698  Int = usgn ? Intrinsic::aarch64_neon_urshl : Intrinsic::aarch64_neon_srshl;
6700  TmpOps.push_back(Ops[1]);
6701  TmpOps.push_back(Ops[2]);
6702  Function* F = CGM.getIntrinsic(Int, Ty);
6703  llvm::Value *tmp = EmitNeonCall(F, TmpOps, "vrshr_n", 1, true);
6704  Ops[0] = Builder.CreateBitCast(Ops[0], VTy);
6705  return Builder.CreateAdd(Ops[0], tmp);
6706  }
6707  // FIXME: Sharing loads & stores with 32-bit is complicated by the absence
6708  // of an Align parameter here.
6709  case NEON::BI__builtin_neon_vld1_x2_v:
6710  case NEON::BI__builtin_neon_vld1q_x2_v:
6711  case NEON::BI__builtin_neon_vld1_x3_v:
6712  case NEON::BI__builtin_neon_vld1q_x3_v:
6713  case NEON::BI__builtin_neon_vld1_x4_v:
6714  case NEON::BI__builtin_neon_vld1q_x4_v: {
6715  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6716  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6717  llvm::Type *Tys[2] = { VTy, PTy };
6718  unsigned Int;
6719  switch (BuiltinID) {
6720  case NEON::BI__builtin_neon_vld1_x2_v:
6721  case NEON::BI__builtin_neon_vld1q_x2_v:
6722  Int = Intrinsic::aarch64_neon_ld1x2;
6723  break;
6724  case NEON::BI__builtin_neon_vld1_x3_v:
6725  case NEON::BI__builtin_neon_vld1q_x3_v:
6726  Int = Intrinsic::aarch64_neon_ld1x3;
6727  break;
6728  case NEON::BI__builtin_neon_vld1_x4_v:
6729  case NEON::BI__builtin_neon_vld1q_x4_v:
6730  Int = Intrinsic::aarch64_neon_ld1x4;
6731  break;
6732  }
6733  Function *F = CGM.getIntrinsic(Int, Tys);
6734  Ops[1] = Builder.CreateCall(F, Ops[1], "vld1xN");
6735  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6736  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6737  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6738  }
6739  case NEON::BI__builtin_neon_vst1_x2_v:
6740  case NEON::BI__builtin_neon_vst1q_x2_v:
6741  case NEON::BI__builtin_neon_vst1_x3_v:
6742  case NEON::BI__builtin_neon_vst1q_x3_v:
6743  case NEON::BI__builtin_neon_vst1_x4_v:
6744  case NEON::BI__builtin_neon_vst1q_x4_v: {
6745  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy->getVectorElementType());
6746  llvm::Type *Tys[2] = { VTy, PTy };
6747  unsigned Int;
6748  switch (BuiltinID) {
6749  case NEON::BI__builtin_neon_vst1_x2_v:
6750  case NEON::BI__builtin_neon_vst1q_x2_v:
6751  Int = Intrinsic::aarch64_neon_st1x2;
6752  break;
6753  case NEON::BI__builtin_neon_vst1_x3_v:
6754  case NEON::BI__builtin_neon_vst1q_x3_v:
6755  Int = Intrinsic::aarch64_neon_st1x3;
6756  break;
6757  case NEON::BI__builtin_neon_vst1_x4_v:
6758  case NEON::BI__builtin_neon_vst1q_x4_v:
6759  Int = Intrinsic::aarch64_neon_st1x4;
6760  break;
6761  }
6762  std::rotate(Ops.begin(), Ops.begin() + 1, Ops.end());
6763  return EmitNeonCall(CGM.getIntrinsic(Int, Tys), Ops, "");
6764  }
6765  case NEON::BI__builtin_neon_vld1_v:
6766  case NEON::BI__builtin_neon_vld1q_v: {
6767  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6768  auto Alignment = CharUnits::fromQuantity(
6769  BuiltinID == NEON::BI__builtin_neon_vld1_v ? 8 : 16);
6770  return Builder.CreateAlignedLoad(VTy, Ops[0], Alignment);
6771  }
6772  case NEON::BI__builtin_neon_vst1_v:
6773  case NEON::BI__builtin_neon_vst1q_v:
6774  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(VTy));
6775  Ops[1] = Builder.CreateBitCast(Ops[1], VTy);
6776  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6777  case NEON::BI__builtin_neon_vld1_lane_v:
6778  case NEON::BI__builtin_neon_vld1q_lane_v: {
6779  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6780  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6781  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6782  auto Alignment = CharUnits::fromQuantity(
6783  BuiltinID == NEON::BI__builtin_neon_vld1_lane_v ? 8 : 16);
6784  Ops[0] =
6785  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6786  return Builder.CreateInsertElement(Ops[1], Ops[0], Ops[2], "vld1_lane");
6787  }
6788  case NEON::BI__builtin_neon_vld1_dup_v:
6789  case NEON::BI__builtin_neon_vld1q_dup_v: {
6790  Value *V = UndefValue::get(Ty);
6791  Ty = llvm::PointerType::getUnqual(VTy->getElementType());
6792  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6793  auto Alignment = CharUnits::fromQuantity(
6794  BuiltinID == NEON::BI__builtin_neon_vld1_dup_v ? 8 : 16);
6795  Ops[0] =
6796  Builder.CreateAlignedLoad(VTy->getElementType(), Ops[0], Alignment);
6797  llvm::Constant *CI = ConstantInt::get(Int32Ty, 0);
6798  Ops[0] = Builder.CreateInsertElement(V, Ops[0], CI);
6799  return EmitNeonSplat(Ops[0], CI);
6800  }
6801  case NEON::BI__builtin_neon_vst1_lane_v:
6802  case NEON::BI__builtin_neon_vst1q_lane_v:
6803  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6804  Ops[1] = Builder.CreateExtractElement(Ops[1], Ops[2]);
6805  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6806  return Builder.CreateDefaultAlignedStore(Ops[1],
6807  Builder.CreateBitCast(Ops[0], Ty));
6808  case NEON::BI__builtin_neon_vld2_v:
6809  case NEON::BI__builtin_neon_vld2q_v: {
6810  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6811  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6812  llvm::Type *Tys[2] = { VTy, PTy };
6813  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2, Tys);
6814  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6815  Ops[0] = Builder.CreateBitCast(Ops[0],
6816  llvm::PointerType::getUnqual(Ops[1]->getType()));
6817  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6818  }
6819  case NEON::BI__builtin_neon_vld3_v:
6820  case NEON::BI__builtin_neon_vld3q_v: {
6821  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6822  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6823  llvm::Type *Tys[2] = { VTy, PTy };
6824  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3, Tys);
6825  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6826  Ops[0] = Builder.CreateBitCast(Ops[0],
6827  llvm::PointerType::getUnqual(Ops[1]->getType()));
6828  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6829  }
6830  case NEON::BI__builtin_neon_vld4_v:
6831  case NEON::BI__builtin_neon_vld4q_v: {
6832  llvm::Type *PTy = llvm::PointerType::getUnqual(VTy);
6833  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6834  llvm::Type *Tys[2] = { VTy, PTy };
6835  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4, Tys);
6836  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6837  Ops[0] = Builder.CreateBitCast(Ops[0],
6838  llvm::PointerType::getUnqual(Ops[1]->getType()));
6839  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6840  }
6841  case NEON::BI__builtin_neon_vld2_dup_v:
6842  case NEON::BI__builtin_neon_vld2q_dup_v: {
6843  llvm::Type *PTy =
6844  llvm::PointerType::getUnqual(VTy->getElementType());
6845  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6846  llvm::Type *Tys[2] = { VTy, PTy };
6847  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2r, Tys);
6848  Ops[1] = Builder.CreateCall(F, Ops[1], "vld2");
6849  Ops[0] = Builder.CreateBitCast(Ops[0],
6850  llvm::PointerType::getUnqual(Ops[1]->getType()));
6851  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6852  }
6853  case NEON::BI__builtin_neon_vld3_dup_v:
6854  case NEON::BI__builtin_neon_vld3q_dup_v: {
6855  llvm::Type *PTy =
6856  llvm::PointerType::getUnqual(VTy->getElementType());
6857  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6858  llvm::Type *Tys[2] = { VTy, PTy };
6859  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3r, Tys);
6860  Ops[1] = Builder.CreateCall(F, Ops[1], "vld3");
6861  Ops[0] = Builder.CreateBitCast(Ops[0],
6862  llvm::PointerType::getUnqual(Ops[1]->getType()));
6863  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6864  }
6865  case NEON::BI__builtin_neon_vld4_dup_v:
6866  case NEON::BI__builtin_neon_vld4q_dup_v: {
6867  llvm::Type *PTy =
6868  llvm::PointerType::getUnqual(VTy->getElementType());
6869  Ops[1] = Builder.CreateBitCast(Ops[1], PTy);
6870  llvm::Type *Tys[2] = { VTy, PTy };
6871  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4r, Tys);
6872  Ops[1] = Builder.CreateCall(F, Ops[1], "vld4");
6873  Ops[0] = Builder.CreateBitCast(Ops[0],
6874  llvm::PointerType::getUnqual(Ops[1]->getType()));
6875  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6876  }
6877  case NEON::BI__builtin_neon_vld2_lane_v:
6878  case NEON::BI__builtin_neon_vld2q_lane_v: {
6879  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6880  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld2lane, Tys);
6881  Ops.push_back(Ops[1]);
6882  Ops.erase(Ops.begin()+1);
6883  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6884  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6885  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6886  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld2_lane");
6887  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6888  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6889  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6890  }
6891  case NEON::BI__builtin_neon_vld3_lane_v:
6892  case NEON::BI__builtin_neon_vld3q_lane_v: {
6893  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6894  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld3lane, Tys);
6895  Ops.push_back(Ops[1]);
6896  Ops.erase(Ops.begin()+1);
6897  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6898  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6899  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6900  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6901  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld3_lane");
6902  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6903  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6904  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6905  }
6906  case NEON::BI__builtin_neon_vld4_lane_v:
6907  case NEON::BI__builtin_neon_vld4q_lane_v: {
6908  llvm::Type *Tys[2] = { VTy, Ops[1]->getType() };
6909  Function *F = CGM.getIntrinsic(Intrinsic::aarch64_neon_ld4lane, Tys);
6910  Ops.push_back(Ops[1]);
6911  Ops.erase(Ops.begin()+1);
6912  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6913  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6914  Ops[3] = Builder.CreateBitCast(Ops[3], Ty);
6915  Ops[4] = Builder.CreateBitCast(Ops[4], Ty);
6916  Ops[5] = Builder.CreateZExt(Ops[5], Int64Ty);
6917  Ops[1] = Builder.CreateCall(F, makeArrayRef(Ops).slice(1), "vld4_lane");
6918  Ty = llvm::PointerType::getUnqual(Ops[1]->getType());
6919  Ops[0] = Builder.CreateBitCast(Ops[0], Ty);
6920  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
6921  }
6922  case NEON::BI__builtin_neon_vst2_v:
6923  case NEON::BI__builtin_neon_vst2q_v: {
6924  Ops.push_back(Ops[0]);
6925  Ops.erase(Ops.begin());
6926  llvm::Type *Tys[2] = { VTy, Ops[2]->getType() };
6927  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2, Tys),
6928  Ops, "");
6929  }
6930  case NEON::BI__builtin_neon_vst2_lane_v:
6931  case NEON::BI__builtin_neon_vst2q_lane_v: {
6932  Ops.push_back(Ops[0]);
6933  Ops.erase(Ops.begin());
6934  Ops[2] = Builder.CreateZExt(Ops[2], Int64Ty);
6935  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6936  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st2lane, Tys),
6937  Ops, "");
6938  }
6939  case NEON::BI__builtin_neon_vst3_v:
6940  case NEON::BI__builtin_neon_vst3q_v: {
6941  Ops.push_back(Ops[0]);
6942  Ops.erase(Ops.begin());
6943  llvm::Type *Tys[2] = { VTy, Ops[3]->getType() };
6944  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3, Tys),
6945  Ops, "");
6946  }
6947  case NEON::BI__builtin_neon_vst3_lane_v:
6948  case NEON::BI__builtin_neon_vst3q_lane_v: {
6949  Ops.push_back(Ops[0]);
6950  Ops.erase(Ops.begin());
6951  Ops[3] = Builder.CreateZExt(Ops[3], Int64Ty);
6952  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6953  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st3lane, Tys),
6954  Ops, "");
6955  }
6956  case NEON::BI__builtin_neon_vst4_v:
6957  case NEON::BI__builtin_neon_vst4q_v: {
6958  Ops.push_back(Ops[0]);
6959  Ops.erase(Ops.begin());
6960  llvm::Type *Tys[2] = { VTy, Ops[4]->getType() };
6961  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4, Tys),
6962  Ops, "");
6963  }
6964  case NEON::BI__builtin_neon_vst4_lane_v:
6965  case NEON::BI__builtin_neon_vst4q_lane_v: {
6966  Ops.push_back(Ops[0]);
6967  Ops.erase(Ops.begin());
6968  Ops[4] = Builder.CreateZExt(Ops[4], Int64Ty);
6969  llvm::Type *Tys[2] = { VTy, Ops[5]->getType() };
6970  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_st4lane, Tys),
6971  Ops, "");
6972  }
6973  case NEON::BI__builtin_neon_vtrn_v:
6974  case NEON::BI__builtin_neon_vtrnq_v: {
6975  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6976  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6977  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6978  Value *SV = nullptr;
6979 
6980  for (unsigned vi = 0; vi != 2; ++vi) {
6981  SmallVector<uint32_t, 16> Indices;
6982  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
6983  Indices.push_back(i+vi);
6984  Indices.push_back(i+e+vi);
6985  }
6986  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
6987  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vtrn");
6988  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
6989  }
6990  return SV;
6991  }
6992  case NEON::BI__builtin_neon_vuzp_v:
6993  case NEON::BI__builtin_neon_vuzpq_v: {
6994  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
6995  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
6996  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
6997  Value *SV = nullptr;
6998 
6999  for (unsigned vi = 0; vi != 2; ++vi) {
7000  SmallVector<uint32_t, 16> Indices;
7001  for (unsigned i = 0, e = VTy->getNumElements(); i != e; ++i)
7002  Indices.push_back(2*i+vi);
7003 
7004  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7005  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vuzp");
7006  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7007  }
7008  return SV;
7009  }
7010  case NEON::BI__builtin_neon_vzip_v:
7011  case NEON::BI__builtin_neon_vzipq_v: {
7012  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::PointerType::getUnqual(Ty));
7013  Ops[1] = Builder.CreateBitCast(Ops[1], Ty);
7014  Ops[2] = Builder.CreateBitCast(Ops[2], Ty);
7015  Value *SV = nullptr;
7016 
7017  for (unsigned vi = 0; vi != 2; ++vi) {
7018  SmallVector<uint32_t, 16> Indices;
7019  for (unsigned i = 0, e = VTy->getNumElements(); i != e; i += 2) {
7020  Indices.push_back((i + vi*e) >> 1);
7021  Indices.push_back(((i + vi*e) >> 1)+e);
7022  }
7023  Value *Addr = Builder.CreateConstInBoundsGEP1_32(Ty, Ops[0], vi);
7024  SV = Builder.CreateShuffleVector(Ops[1], Ops[2], Indices, "vzip");
7025  SV = Builder.CreateDefaultAlignedStore(SV, Addr);
7026  }
7027  return SV;
7028  }
7029  case NEON::BI__builtin_neon_vqtbl1q_v: {
7030  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl1, Ty),
7031  Ops, "vtbl1");
7032  }
7033  case NEON::BI__builtin_neon_vqtbl2q_v: {
7034  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl2, Ty),
7035  Ops, "vtbl2");
7036  }
7037  case NEON::BI__builtin_neon_vqtbl3q_v: {
7038  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl3, Ty),
7039  Ops, "vtbl3");
7040  }
7041  case NEON::BI__builtin_neon_vqtbl4q_v: {
7042  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbl4, Ty),
7043  Ops, "vtbl4");
7044  }
7045  case NEON::BI__builtin_neon_vqtbx1q_v: {
7046  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx1, Ty),
7047  Ops, "vtbx1");
7048  }
7049  case NEON::BI__builtin_neon_vqtbx2q_v: {
7050  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx2, Ty),
7051  Ops, "vtbx2");
7052  }
7053  case NEON::BI__builtin_neon_vqtbx3q_v: {
7054  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx3, Ty),
7055  Ops, "vtbx3");
7056  }
7057  case NEON::BI__builtin_neon_vqtbx4q_v: {
7058  return EmitNeonCall(CGM.getIntrinsic(Intrinsic::aarch64_neon_tbx4, Ty),
7059  Ops, "vtbx4");
7060  }
7061  case NEON::BI__builtin_neon_vsqadd_v:
7062  case NEON::BI__builtin_neon_vsqaddq_v: {
7063  Int = Intrinsic::aarch64_neon_usqadd;
7064  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vsqadd");
7065  }
7066  case NEON::BI__builtin_neon_vuqadd_v:
7067  case NEON::BI__builtin_neon_vuqaddq_v: {
7068  Int = Intrinsic::aarch64_neon_suqadd;
7069  return EmitNeonCall(CGM.getIntrinsic(Int, Ty), Ops, "vuqadd");
7070  }
7071  }
7072 }
7073 
7076  assert((Ops.size() & (Ops.size() - 1)) == 0 &&
7077  "Not a power-of-two sized vector!");
7078  bool AllConstants = true;
7079  for (unsigned i = 0, e = Ops.size(); i != e && AllConstants; ++i)
7080  AllConstants &= isa<Constant>(Ops[i]);
7081 
7082  // If this is a constant vector, create a ConstantVector.
7083  if (AllConstants) {
7085  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7086  CstOps.push_back(cast<Constant>(Ops[i]));
7087  return llvm::ConstantVector::get(CstOps);
7088  }
7089 
7090  // Otherwise, insertelement the values to build the vector.
7091  Value *Result =
7092  llvm::UndefValue::get(llvm::VectorType::get(Ops[0]->getType(), Ops.size()));
7093 
7094  for (unsigned i = 0, e = Ops.size(); i != e; ++i)
7095  Result = Builder.CreateInsertElement(Result, Ops[i], Builder.getInt32(i));
7096 
7097  return Result;
7098 }
7099 
7100 // Convert the mask from an integer type to a vector of i1.
7101 static Value *getMaskVecValue(CodeGenFunction &CGF, Value *Mask,
7102  unsigned NumElts) {
7103 
7104  llvm::VectorType *MaskTy = llvm::VectorType::get(CGF.Builder.getInt1Ty(),
7105  cast<IntegerType>(Mask->getType())->getBitWidth());
7106  Value *MaskVec = CGF.Builder.CreateBitCast(Mask, MaskTy);
7107 
7108  // If we have less than 8 elements, then the starting mask was an i8 and
7109  // we need to extract down to the right number of elements.
7110  if (NumElts < 8) {
7111  uint32_t Indices[4];
7112  for (unsigned i = 0; i != NumElts; ++i)
7113  Indices[i] = i;
7114  MaskVec = CGF.Builder.CreateShuffleVector(MaskVec, MaskVec,
7115  makeArrayRef(Indices, NumElts),
7116  "extract");
7117  }
7118  return MaskVec;
7119 }
7120 
7123  unsigned Align) {
7124  // Cast the pointer to right type.
7125  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7126  llvm::PointerType::getUnqual(Ops[1]->getType()));
7127 
7128  // If the mask is all ones just emit a regular store.
7129  if (const auto *C = dyn_cast<Constant>(Ops[2]))
7130  if (C->isAllOnesValue())
7131  return CGF.Builder.CreateAlignedStore(Ops[1], Ops[0], Align);
7132 
7133  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7134  Ops[1]->getType()->getVectorNumElements());
7135 
7136  return CGF.Builder.CreateMaskedStore(Ops[1], Ops[0], Align, MaskVec);
7137 }
7138 
7140  SmallVectorImpl<Value *> &Ops, unsigned Align) {
7141  // Cast the pointer to right type.
7142  Ops[0] = CGF.Builder.CreateBitCast(Ops[0],
7143  llvm::PointerType::getUnqual(Ops[1]->getType()));
7144 
7145  // If the mask is all ones just emit a regular store.
7146  if (const auto *C = dyn_cast<Constant>(Ops[2]))
7147  if (C->isAllOnesValue())
7148  return CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7149 
7150  Value *MaskVec = getMaskVecValue(CGF, Ops[2],
7151  Ops[1]->getType()->getVectorNumElements());
7152 
7153  return CGF.Builder.CreateMaskedLoad(Ops[0], Align, MaskVec, Ops[1]);
7154 }
7155 
7158  llvm::Type *DstTy,
7159  unsigned SrcSizeInBits,
7160  unsigned Align) {
7161  // Load the subvector.
7162  Ops[0] = CGF.Builder.CreateAlignedLoad(Ops[0], Align);
7163 
7164  // Create broadcast mask.
7165  unsigned NumDstElts = DstTy->getVectorNumElements();
7166  unsigned NumSrcElts = SrcSizeInBits / DstTy->getScalarSizeInBits();
7167 
7169  for (unsigned i = 0; i != NumDstElts; i += NumSrcElts)
7170  for (unsigned j = 0; j != NumSrcElts; ++j)
7171  Mask.push_back(j);
7172 
7173  return CGF.Builder.CreateShuffleVector(Ops[0], Ops[0], Mask, "subvecbcst");
7174 }
7175 
7176 static Value *EmitX86Select(CodeGenFunction &CGF,
7177  Value *Mask, Value *Op0, Value *Op1) {
7178 
7179  // If the mask is all ones just return first argument.
7180  if (const auto *C = dyn_cast<Constant>(Mask))
7181  if (C->isAllOnesValue())
7182  return Op0;
7183 
7184  Mask = getMaskVecValue(CGF, Mask, Op0->getType()->getVectorNumElements());
7185 
7186  return CGF.Builder.CreateSelect(Mask, Op0, Op1);
7187 }
7188 
7189 static Value *EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC,
7190  bool Signed, SmallVectorImpl<Value *> &Ops) {
7191  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7192  Value *Cmp;
7193 
7194  if (CC == 3) {
7195  Cmp = Constant::getNullValue(
7196  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7197  } else if (CC == 7) {
7198  Cmp = Constant::getAllOnesValue(
7199  llvm::VectorType::get(CGF.Builder.getInt1Ty(), NumElts));
7200  } else {
7201  ICmpInst::Predicate Pred;
7202  switch (CC) {
7203  default: llvm_unreachable("Unknown condition code");
7204  case 0: Pred = ICmpInst::ICMP_EQ; break;
7205  case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
7206  case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
7207  case 4: Pred = ICmpInst::ICMP_NE; break;
7208  case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
7209  case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
7210  }
7211  Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7212  }
7213 
7214  const auto *C = dyn_cast<Constant>(Ops.back());
7215  if (!C || !C->isAllOnesValue())
7216  Cmp = CGF.Builder.CreateAnd(Cmp, getMaskVecValue(CGF, Ops.back(), NumElts));
7217 
7218  if (NumElts < 8) {
7219  uint32_t Indices[8];
7220  for (unsigned i = 0; i != NumElts; ++i)
7221  Indices[i] = i;
7222  for (unsigned i = NumElts; i != 8; ++i)
7223  Indices[i] = i % NumElts + NumElts;
7224  Cmp = CGF.Builder.CreateShuffleVector(
7225  Cmp, llvm::Constant::getNullValue(Cmp->getType()), Indices);
7226  }
7227  return CGF.Builder.CreateBitCast(Cmp,
7228  IntegerType::get(CGF.getLLVMContext(),
7229  std::max(NumElts, 8U)));
7230 }
7231 
7232 static Value *EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred,
7233  ArrayRef<Value *> Ops) {
7234  Value *Cmp = CGF.Builder.CreateICmp(Pred, Ops[0], Ops[1]);
7235  Value *Res = CGF.Builder.CreateSelect(Cmp, Ops[0], Ops[1]);
7236 
7237  if (Ops.size() == 2)
7238  return Res;
7239 
7240  assert(Ops.size() == 4);
7241  return EmitX86Select(CGF, Ops[3], Res, Ops[2]);
7242 }
7243 
7244 static Value *EmitX86SExtMask(CodeGenFunction &CGF, Value *Op,
7245  llvm::Type *DstTy) {
7246  unsigned NumberOfElements = DstTy->getVectorNumElements();
7247  Value *Mask = getMaskVecValue(CGF, Op, NumberOfElements);
7248  return CGF.Builder.CreateSExt(Mask, DstTy, "vpmovm2");
7249 }
7250 
7251 Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID,
7252  const CallExpr *E) {
7254 
7255  // Find out if any arguments are required to be integer constant expressions.
7256  unsigned ICEArguments = 0;
7258  getContext().GetBuiltinType(BuiltinID, Error, &ICEArguments);
7259  assert(Error == ASTContext::GE_None && "Should not codegen an error");
7260 
7261  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++) {
7262  // If this is a normal argument, just emit it as a scalar.
7263  if ((ICEArguments & (1 << i)) == 0) {
7264  Ops.push_back(EmitScalarExpr(E->getArg(i)));
7265  continue;
7266  }
7267 
7268  // If this is required to be a constant, constant fold it so that we know
7269  // that the generated intrinsic gets a ConstantInt.
7270  llvm::APSInt Result;
7271  bool IsConst = E->getArg(i)->isIntegerConstantExpr(Result, getContext());
7272  assert(IsConst && "Constant arg isn't actually constant?"); (void)IsConst;
7273  Ops.push_back(llvm::ConstantInt::get(getLLVMContext(), Result));
7274  }
7275 
7276  // These exist so that the builtin that takes an immediate can be bounds
7277  // checked by clang to avoid passing bad immediates to the backend. Since
7278  // AVX has a larger immediate than SSE we would need separate builtins to
7279  // do the different bounds checking. Rather than create a clang specific
7280  // SSE only builtin, this implements eight separate builtins to match gcc
7281  // implementation.
7282  auto getCmpIntrinsicCall = [this, &Ops](Intrinsic::ID ID, unsigned Imm) {
7283  Ops.push_back(llvm::ConstantInt::get(Int8Ty, Imm));
7284  llvm::Function *F = CGM.getIntrinsic(ID);
7285  return Builder.CreateCall(F, Ops);
7286  };
7287 
7288  // For the vector forms of FP comparisons, translate the builtins directly to
7289  // IR.
7290  // TODO: The builtins could be removed if the SSE header files used vector
7291  // extension comparisons directly (vector ordered/unordered may need
7292  // additional support via __builtin_isnan()).
7293  auto getVectorFCmpIR = [this, &Ops](CmpInst::Predicate Pred) {
7294  Value *Cmp = Builder.CreateFCmp(Pred, Ops[0], Ops[1]);
7295  llvm::VectorType *FPVecTy = cast<llvm::VectorType>(Ops[0]->getType());
7296  llvm::VectorType *IntVecTy = llvm::VectorType::getInteger(FPVecTy);
7297  Value *Sext = Builder.CreateSExt(Cmp, IntVecTy);
7298  return Builder.CreateBitCast(Sext, FPVecTy);
7299  };
7300 
7301  switch (BuiltinID) {
7302  default: return nullptr;
7303  case X86::BI__builtin_cpu_supports: {
7304  const Expr *FeatureExpr = E->getArg(0)->IgnoreParenCasts();
7305  StringRef FeatureStr = cast<StringLiteral>(FeatureExpr)->getString();
7306 
7307  // TODO: When/if this becomes more than x86 specific then use a TargetInfo
7308  // based mapping.
7309  // Processor features and mapping to processor feature value.
7310  enum X86Features {
7311  CMOV = 0,
7312  MMX,
7313  POPCNT,
7314  SSE,
7315  SSE2,
7316  SSE3,
7317  SSSE3,
7318  SSE4_1,
7319  SSE4_2,
7320  AVX,
7321  AVX2,
7322  SSE4_A,
7323  FMA4,
7324  XOP,
7325  FMA,
7326  AVX512F,
7327  BMI,
7328  BMI2,
7329  AES,
7330  PCLMUL,
7331  AVX512VL,
7332  AVX512BW,
7333  AVX512DQ,
7334  AVX512CD,
7335  AVX512ER,
7336  AVX512PF,
7337  AVX512VBMI,
7338  AVX512IFMA,
7339  AVX5124VNNIW, // TODO implement this fully
7340  AVX5124FMAPS, // TODO implement this fully
7341  AVX512VPOPCNTDQ,
7342  MAX
7343  };
7344 
7345  X86Features Feature =
7346  StringSwitch<X86Features>(FeatureStr)
7347  .Case("cmov", X86Features::CMOV)
7348  .Case("mmx", X86Features::MMX)
7349  .Case("popcnt", X86Features::POPCNT)
7350  .Case("sse", X86Features::SSE)
7351  .Case("sse2", X86Features::SSE2)
7352  .Case("sse3", X86Features::SSE3)
7353  .Case("ssse3", X86Features::SSSE3)
7354  .Case("sse4.1", X86Features::SSE4_1)
7355  .Case("sse4.2", X86Features::SSE4_2)
7356  .Case("avx", X86Features::AVX)
7357  .Case("avx2", X86Features::AVX2)
7358  .Case("sse4a", X86Features::SSE4_A)
7359  .Case("fma4", X86Features::FMA4)
7360  .Case("xop", X86Features::XOP)
7361  .Case("fma", X86Features::FMA)
7362  .Case("avx512f", X86Features::AVX512F)
7363  .Case("bmi", X86Features::BMI)
7364  .Case("bmi2", X86Features::BMI2)
7365  .Case("aes", X86Features::AES)
7366  .Case("pclmul", X86Features::PCLMUL)
7367  .Case("avx512vl", X86Features::AVX512VL)
7368  .Case("avx512bw", X86Features::AVX512BW)
7369  .Case("avx512dq", X86Features::AVX512DQ)
7370  .Case("avx512cd", X86Features::AVX512CD)
7371  .Case("avx512er", X86Features::AVX512ER)
7372  .Case("avx512pf", X86Features::AVX512PF)
7373  .Case("avx512vbmi", X86Features::AVX512VBMI)
7374  .Case("avx512ifma", X86Features::AVX512IFMA)
7375  .Case("avx512vpopcntdq", X86Features::AVX512VPOPCNTDQ)
7376  .Default(X86Features::MAX);
7377  assert(Feature != X86Features::MAX && "Invalid feature!");
7378 
7379  // Matching the struct layout from the compiler-rt/libgcc structure that is
7380  // filled in:
7381  // unsigned int __cpu_vendor;
7382  // unsigned int __cpu_type;
7383  // unsigned int __cpu_subtype;
7384  // unsigned int __cpu_features[1];
7385  llvm::Type *STy = llvm::StructType::get(Int32Ty, Int32Ty, Int32Ty,
7386  llvm::ArrayType::get(Int32Ty, 1));
7387 
7388  // Grab the global __cpu_model.
7389  llvm::Constant *CpuModel = CGM.CreateRuntimeVariable(STy, "__cpu_model");
7390 
7391  // Grab the first (0th) element from the field __cpu_features off of the
7392  // global in the struct STy.
7393  Value *Idxs[] = {
7394  ConstantInt::get(Int32Ty, 0),
7395  ConstantInt::get(Int32Ty, 3),
7396  ConstantInt::get(Int32Ty, 0)
7397  };
7398  Value *CpuFeatures = Builder.CreateGEP(STy, CpuModel, Idxs);
7399  Value *Features = Builder.CreateAlignedLoad(CpuFeatures,
7401 
7402  // Check the value of the bit corresponding to the feature requested.
7403  Value *Bitset = Builder.CreateAnd(
7404  Features, llvm::ConstantInt::get(Int32Ty, 1ULL << Feature));
7405  return Builder.CreateICmpNE(Bitset, llvm::ConstantInt::get(Int32Ty, 0));
7406  }
7407  case X86::BI_mm_prefetch: {
7408  Value *Address = Ops[0];
7409  Value *RW = ConstantInt::get(Int32Ty, 0);
7410  Value *Locality = Ops[1];
7411  Value *Data = ConstantInt::get(Int32Ty, 1);
7412  Value *F = CGM.getIntrinsic(Intrinsic::prefetch);
7413  return Builder.CreateCall(F, {Address, RW, Locality, Data});
7414  }
7415  case X86::BI_mm_clflush: {
7416  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_clflush),
7417  Ops[0]);
7418  }
7419  case X86::BI_mm_lfence: {
7420  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_lfence));
7421  }
7422  case X86::BI_mm_mfence: {
7423  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_mfence));
7424  }
7425  case X86::BI_mm_sfence: {
7426  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_sfence));
7427  }
7428  case X86::BI_mm_pause: {
7429  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse2_pause));
7430  }
7431  case X86::BI__rdtsc: {
7432  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_rdtsc));
7433  }
7434  case X86::BI__builtin_ia32_undef128:
7435  case X86::BI__builtin_ia32_undef256:
7436  case X86::BI__builtin_ia32_undef512:
7437  // The x86 definition of "undef" is not the same as the LLVM definition
7438  // (PR32176). We leave optimizing away an unnecessary zero constant to the
7439  // IR optimizer and backend.
7440  // TODO: If we had a "freeze" IR instruction to generate a fixed undef
7441  // value, we should use that here instead of a zero.
7442  return llvm::Constant::getNullValue(ConvertType(E->getType()));
7443  case X86::BI__builtin_ia32_vec_init_v8qi:
7444  case X86::BI__builtin_ia32_vec_init_v4hi:
7445  case X86::BI__builtin_ia32_vec_init_v2si:
7446  return Builder.CreateBitCast(BuildVector(Ops),
7447  llvm::Type::getX86_MMXTy(getLLVMContext()));
7448  case X86::BI__builtin_ia32_vec_ext_v2si:
7449  return Builder.CreateExtractElement(Ops[0],
7450  llvm::ConstantInt::get(Ops[1]->getType(), 0));
7451  case X86::BI_mm_setcsr:
7452  case X86::BI__builtin_ia32_ldmxcsr: {
7453  Address Tmp = CreateMemTemp(E->getArg(0)->getType());
7454  Builder.CreateStore(Ops[0], Tmp);
7455  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_ldmxcsr),
7457  }
7458  case X86::BI_mm_getcsr:
7459  case X86::BI__builtin_ia32_stmxcsr: {
7460  Address Tmp = CreateMemTemp(E->getType());
7461  Builder.CreateCall(CGM.getIntrinsic(Intrinsic::x86_sse_stmxcsr),
7463  return Builder.CreateLoad(Tmp, "stmxcsr");
7464  }
7465  case X86::BI__builtin_ia32_xsave:
7466  case X86::BI__builtin_ia32_xsave64:
7467  case X86::BI__builtin_ia32_xrstor:
7468  case X86::BI__builtin_ia32_xrstor64:
7469  case X86::BI__builtin_ia32_xsaveopt:
7470  case X86::BI__builtin_ia32_xsaveopt64:
7471  case X86::BI__builtin_ia32_xrstors:
7472  case X86::BI__builtin_ia32_xrstors64:
7473  case X86::BI__builtin_ia32_xsavec:
7474  case X86::BI__builtin_ia32_xsavec64:
7475  case X86::BI__builtin_ia32_xsaves:
7476  case X86::BI__builtin_ia32_xsaves64: {
7477  Intrinsic::ID ID;
7478 #define INTRINSIC_X86_XSAVE_ID(NAME) \
7479  case X86::BI__builtin_ia32_##NAME: \
7480  ID = Intrinsic::x86_##NAME; \
7481  break
7482  switch (BuiltinID) {
7483  default: llvm_unreachable("Unsupported intrinsic!");
7484  INTRINSIC_X86_XSAVE_ID(xsave);
7485  INTRINSIC_X86_XSAVE_ID(xsave64);
7486  INTRINSIC_X86_XSAVE_ID(xrstor);
7487  INTRINSIC_X86_XSAVE_ID(xrstor64);
7488  INTRINSIC_X86_XSAVE_ID(xsaveopt);
7489  INTRINSIC_X86_XSAVE_ID(xsaveopt64);
7490  INTRINSIC_X86_XSAVE_ID(xrstors);
7491  INTRINSIC_X86_XSAVE_ID(xrstors64);
7492  INTRINSIC_X86_XSAVE_ID(xsavec);
7493  INTRINSIC_X86_XSAVE_ID(xsavec64);
7494  INTRINSIC_X86_XSAVE_ID(xsaves);
7495  INTRINSIC_X86_XSAVE_ID(xsaves64);
7496  }
7497 #undef INTRINSIC_X86_XSAVE_ID
7498  Value *Mhi = Builder.CreateTrunc(
7499  Builder.CreateLShr(Ops[1], ConstantInt::get(Int64Ty, 32)), Int32Ty);
7500  Value *Mlo = Builder.CreateTrunc(Ops[1], Int32Ty);
7501  Ops[1] = Mhi;
7502  Ops.push_back(Mlo);
7503  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7504  }
7505  case X86::BI__builtin_ia32_storedqudi128_mask:
7506  case X86::BI__builtin_ia32_storedqusi128_mask:
7507  case X86::BI__builtin_ia32_storedquhi128_mask:
7508  case X86::BI__builtin_ia32_storedquqi128_mask:
7509  case X86::BI__builtin_ia32_storeupd128_mask:
7510  case X86::BI__builtin_ia32_storeups128_mask:
7511  case X86::BI__builtin_ia32_storedqudi256_mask:
7512  case X86::BI__builtin_ia32_storedqusi256_mask:
7513  case X86::BI__builtin_ia32_storedquhi256_mask:
7514  case X86::BI__builtin_ia32_storedquqi256_mask:
7515  case X86::BI__builtin_ia32_storeupd256_mask:
7516  case X86::BI__builtin_ia32_storeups256_mask:
7517  case X86::BI__builtin_ia32_storedqudi512_mask:
7518  case X86::BI__builtin_ia32_storedqusi512_mask:
7519  case X86::BI__builtin_ia32_storedquhi512_mask:
7520  case X86::BI__builtin_ia32_storedquqi512_mask:
7521  case X86::BI__builtin_ia32_storeupd512_mask:
7522  case X86::BI__builtin_ia32_storeups512_mask:
7523  return EmitX86MaskedStore(*this, Ops, 1);
7524 
7525  case X86::BI__builtin_ia32_storess128_mask:
7526  case X86::BI__builtin_ia32_storesd128_mask: {
7527  return EmitX86MaskedStore(*this, Ops, 16);
7528  }
7529  case X86::BI__builtin_ia32_vpopcntd_512:
7530  case X86::BI__builtin_ia32_vpopcntq_512: {
7531  llvm::Type *ResultType = ConvertType(E->getType());
7532  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
7533  return Builder.CreateCall(F, Ops);
7534  }
7535  case X86::BI__builtin_ia32_cvtmask2b128:
7536  case X86::BI__builtin_ia32_cvtmask2b256:
7537  case X86::BI__builtin_ia32_cvtmask2b512:
7538  case X86::BI__builtin_ia32_cvtmask2w128:
7539  case X86::BI__builtin_ia32_cvtmask2w256:
7540  case X86::BI__builtin_ia32_cvtmask2w512:
7541  case X86::BI__builtin_ia32_cvtmask2d128:
7542  case X86::BI__builtin_ia32_cvtmask2d256:
7543  case X86::BI__builtin_ia32_cvtmask2d512:
7544  case X86::BI__builtin_ia32_cvtmask2q128:
7545  case X86::BI__builtin_ia32_cvtmask2q256:
7546  case X86::BI__builtin_ia32_cvtmask2q512:
7547  return EmitX86SExtMask(*this, Ops[0], ConvertType(E->getType()));
7548 
7549  case X86::BI__builtin_ia32_movdqa32store128_mask:
7550  case X86::BI__builtin_ia32_movdqa64store128_mask:
7551  case X86::BI__builtin_ia32_storeaps128_mask:
7552  case X86::BI__builtin_ia32_storeapd128_mask:
7553  case X86::BI__builtin_ia32_movdqa32store256_mask:
7554  case X86::BI__builtin_ia32_movdqa64store256_mask:
7555  case X86::BI__builtin_ia32_storeaps256_mask:
7556  case X86::BI__builtin_ia32_storeapd256_mask:
7557  case X86::BI__builtin_ia32_movdqa32store512_mask:
7558  case X86::BI__builtin_ia32_movdqa64store512_mask:
7559  case X86::BI__builtin_ia32_storeaps512_mask:
7560  case X86::BI__builtin_ia32_storeapd512_mask: {
7561  unsigned Align =
7562  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7563  return EmitX86MaskedStore(*this, Ops, Align);
7564  }
7565  case X86::BI__builtin_ia32_loadups128_mask:
7566  case X86::BI__builtin_ia32_loadups256_mask:
7567  case X86::BI__builtin_ia32_loadups512_mask:
7568  case X86::BI__builtin_ia32_loadupd128_mask:
7569  case X86::BI__builtin_ia32_loadupd256_mask:
7570  case X86::BI__builtin_ia32_loadupd512_mask:
7571  case X86::BI__builtin_ia32_loaddquqi128_mask:
7572  case X86::BI__builtin_ia32_loaddquqi256_mask:
7573  case X86::BI__builtin_ia32_loaddquqi512_mask:
7574  case X86::BI__builtin_ia32_loaddquhi128_mask:
7575  case X86::BI__builtin_ia32_loaddquhi256_mask:
7576  case X86::BI__builtin_ia32_loaddquhi512_mask:
7577  case X86::BI__builtin_ia32_loaddqusi128_mask:
7578  case X86::BI__builtin_ia32_loaddqusi256_mask:
7579  case X86::BI__builtin_ia32_loaddqusi512_mask:
7580  case X86::BI__builtin_ia32_loaddqudi128_mask:
7581  case X86::BI__builtin_ia32_loaddqudi256_mask:
7582  case X86::BI__builtin_ia32_loaddqudi512_mask:
7583  return EmitX86MaskedLoad(*this, Ops, 1);
7584 
7585  case X86::BI__builtin_ia32_loadss128_mask:
7586  case X86::BI__builtin_ia32_loadsd128_mask:
7587  return EmitX86MaskedLoad(*this, Ops, 16);
7588 
7589  case X86::BI__builtin_ia32_loadaps128_mask:
7590  case X86::BI__builtin_ia32_loadaps256_mask:
7591  case X86::BI__builtin_ia32_loadaps512_mask:
7592  case X86::BI__builtin_ia32_loadapd128_mask:
7593  case X86::BI__builtin_ia32_loadapd256_mask:
7594  case X86::BI__builtin_ia32_loadapd512_mask:
7595  case X86::BI__builtin_ia32_movdqa32load128_mask:
7596  case X86::BI__builtin_ia32_movdqa32load256_mask:
7597  case X86::BI__builtin_ia32_movdqa32load512_mask:
7598  case X86::BI__builtin_ia32_movdqa64load128_mask:
7599  case X86::BI__builtin_ia32_movdqa64load256_mask:
7600  case X86::BI__builtin_ia32_movdqa64load512_mask: {
7601  unsigned Align =
7602  getContext().getTypeAlignInChars(E->getArg(1)->getType()).getQuantity();
7603  return EmitX86MaskedLoad(*this, Ops, Align);
7604  }
7605 
7606  case X86::BI__builtin_ia32_vbroadcastf128_pd256:
7607  case X86::BI__builtin_ia32_vbroadcastf128_ps256: {
7608  llvm::Type *DstTy = ConvertType(E->getType());
7609  return EmitX86SubVectorBroadcast(*this, Ops, DstTy, 128, 1);
7610  }
7611 
7612  case X86::BI__builtin_ia32_storehps:
7613  case X86::BI__builtin_ia32_storelps: {
7614  llvm::Type *PtrTy = llvm::PointerType::getUnqual(Int64Ty);
7615  llvm::Type *VecTy = llvm::VectorType::get(Int64Ty, 2);
7616 
7617  // cast val v2i64
7618  Ops[1] = Builder.CreateBitCast(Ops[1], VecTy, "cast");
7619 
7620  // extract (0, 1)
7621  unsigned Index = BuiltinID == X86::BI__builtin_ia32_storelps ? 0 : 1;
7622  llvm::Value *Idx = llvm::ConstantInt::get(SizeTy, Index);
7623  Ops[1] = Builder.CreateExtractElement(Ops[1], Idx, "extract");
7624 
7625  // cast pointer to i64 & store
7626  Ops[0] = Builder.CreateBitCast(Ops[0], PtrTy);
7627  return Builder.CreateDefaultAlignedStore(Ops[1], Ops[0]);
7628  }
7629  case X86::BI__builtin_ia32_palignr128:
7630  case X86::BI__builtin_ia32_palignr256:
7631  case X86::BI__builtin_ia32_palignr512_mask: {
7632  unsigned ShiftVal = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7633 
7634  unsigned NumElts = Ops[0]->getType()->getVectorNumElements();
7635  assert(NumElts % 16 == 0);
7636 
7637  // If palignr is shifting the pair of vectors more than the size of two
7638  // lanes, emit zero.
7639  if (ShiftVal >= 32)
7640  return llvm::Constant::getNullValue(ConvertType(E->getType()));
7641 
7642  // If palignr is shifting the pair of input vectors more than one lane,
7643  // but less than two lanes, convert to shifting in zeroes.
7644  if (ShiftVal > 16) {
7645  ShiftVal -= 16;
7646  Ops[1] = Ops[0];
7647  Ops[0] = llvm::Constant::getNullValue(Ops[0]->getType());
7648  }
7649 
7650  uint32_t Indices[64];
7651  // 256-bit palignr operates on 128-bit lanes so we need to handle that
7652  for (unsigned l = 0; l != NumElts; l += 16) {
7653  for (unsigned i = 0; i != 16; ++i) {
7654  unsigned Idx = ShiftVal + i;
7655  if (Idx >= 16)
7656  Idx += NumElts - 16; // End of lane, switch operand.
7657  Indices[l + i] = Idx + l;
7658  }
7659  }
7660 
7661  Value *Align = Builder.CreateShuffleVector(Ops[1], Ops[0],
7662  makeArrayRef(Indices, NumElts),
7663  "palignr");
7664 
7665  // If this isn't a masked builtin, just return the align operation.
7666  if (Ops.size() == 3)
7667  return Align;
7668 
7669  return EmitX86Select(*this, Ops[4], Align, Ops[3]);
7670  }
7671 
7672  case X86::BI__builtin_ia32_movnti:
7673  case X86::BI__builtin_ia32_movnti64:
7674  case X86::BI__builtin_ia32_movntsd:
7675  case X86::BI__builtin_ia32_movntss: {
7676  llvm::MDNode *Node = llvm::MDNode::get(
7677  getLLVMContext(), llvm::ConstantAsMetadata::get(Builder.getInt32(1)));
7678 
7679  Value *Ptr = Ops[0];
7680  Value *Src = Ops[1];
7681 
7682  // Extract the 0'th element of the source vector.
7683  if (BuiltinID == X86::BI__builtin_ia32_movntsd ||
7684  BuiltinID == X86::BI__builtin_ia32_movntss)
7685  Src = Builder.CreateExtractElement(Src, (uint64_t)0, "extract");
7686 
7687  // Convert the type of the pointer to a pointer to the stored type.
7688  Value *BC = Builder.CreateBitCast(
7689  Ptr, llvm::PointerType::getUnqual(Src->getType()), "cast");
7690 
7691  // Unaligned nontemporal store of the scalar value.
7692  StoreInst *SI = Builder.CreateDefaultAlignedStore(Src, BC);
7693  SI->setMetadata(CGM.getModule().getMDKindID("nontemporal"), Node);
7694  SI->setAlignment(1);
7695  return SI;
7696  }
7697 
7698  case X86::BI__builtin_ia32_selectb_128:
7699  case X86::BI__builtin_ia32_selectb_256:
7700  case X86::BI__builtin_ia32_selectb_512:
7701  case X86::BI__builtin_ia32_selectw_128:
7702  case X86::BI__builtin_ia32_selectw_256:
7703  case X86::BI__builtin_ia32_selectw_512:
7704  case X86::BI__builtin_ia32_selectd_128:
7705  case X86::BI__builtin_ia32_selectd_256:
7706  case X86::BI__builtin_ia32_selectd_512:
7707  case X86::BI__builtin_ia32_selectq_128:
7708  case X86::BI__builtin_ia32_selectq_256:
7709  case X86::BI__builtin_ia32_selectq_512:
7710  case X86::BI__builtin_ia32_selectps_128:
7711  case X86::BI__builtin_ia32_selectps_256:
7712  case X86::BI__builtin_ia32_selectps_512:
7713  case X86::BI__builtin_ia32_selectpd_128:
7714  case X86::BI__builtin_ia32_selectpd_256:
7715  case X86::BI__builtin_ia32_selectpd_512:
7716  return EmitX86Select(*this, Ops[0], Ops[1], Ops[2]);
7717  case X86::BI__builtin_ia32_pcmpeqb128_mask:
7718  case X86::BI__builtin_ia32_pcmpeqb256_mask:
7719  case X86::BI__builtin_ia32_pcmpeqb512_mask:
7720  case X86::BI__builtin_ia32_pcmpeqw128_mask:
7721  case X86::BI__builtin_ia32_pcmpeqw256_mask:
7722  case X86::BI__builtin_ia32_pcmpeqw512_mask:
7723  case X86::BI__builtin_ia32_pcmpeqd128_mask:
7724  case X86::BI__builtin_ia32_pcmpeqd256_mask:
7725  case X86::BI__builtin_ia32_pcmpeqd512_mask:
7726  case X86::BI__builtin_ia32_pcmpeqq128_mask:
7727  case X86::BI__builtin_ia32_pcmpeqq256_mask:
7728  case X86::BI__builtin_ia32_pcmpeqq512_mask:
7729  return EmitX86MaskedCompare(*this, 0, false, Ops);
7730  case X86::BI__builtin_ia32_pcmpgtb128_mask:
7731  case X86::BI__builtin_ia32_pcmpgtb256_mask:
7732  case X86::BI__builtin_ia32_pcmpgtb512_mask:
7733  case X86::BI__builtin_ia32_pcmpgtw128_mask:
7734  case X86::BI__builtin_ia32_pcmpgtw256_mask:
7735  case X86::BI__builtin_ia32_pcmpgtw512_mask:
7736  case X86::BI__builtin_ia32_pcmpgtd128_mask:
7737  case X86::BI__builtin_ia32_pcmpgtd256_mask:
7738  case X86::BI__builtin_ia32_pcmpgtd512_mask:
7739  case X86::BI__builtin_ia32_pcmpgtq128_mask:
7740  case X86::BI__builtin_ia32_pcmpgtq256_mask:
7741  case X86::BI__builtin_ia32_pcmpgtq512_mask:
7742  return EmitX86MaskedCompare(*this, 6, true, Ops);
7743  case X86::BI__builtin_ia32_cmpb128_mask:
7744  case X86::BI__builtin_ia32_cmpb256_mask:
7745  case X86::BI__builtin_ia32_cmpb512_mask:
7746  case X86::BI__builtin_ia32_cmpw128_mask:
7747  case X86::BI__builtin_ia32_cmpw256_mask:
7748  case X86::BI__builtin_ia32_cmpw512_mask:
7749  case X86::BI__builtin_ia32_cmpd128_mask:
7750  case X86::BI__builtin_ia32_cmpd256_mask:
7751  case X86::BI__builtin_ia32_cmpd512_mask:
7752  case X86::BI__builtin_ia32_cmpq128_mask:
7753  case X86::BI__builtin_ia32_cmpq256_mask:
7754  case X86::BI__builtin_ia32_cmpq512_mask: {
7755  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7756  return EmitX86MaskedCompare(*this, CC, true, Ops);
7757  }
7758  case X86::BI__builtin_ia32_ucmpb128_mask:
7759  case X86::BI__builtin_ia32_ucmpb256_mask:
7760  case X86::BI__builtin_ia32_ucmpb512_mask:
7761  case X86::BI__builtin_ia32_ucmpw128_mask:
7762  case X86::BI__builtin_ia32_ucmpw256_mask:
7763  case X86::BI__builtin_ia32_ucmpw512_mask:
7764  case X86::BI__builtin_ia32_ucmpd128_mask:
7765  case X86::BI__builtin_ia32_ucmpd256_mask:
7766  case X86::BI__builtin_ia32_ucmpd512_mask:
7767  case X86::BI__builtin_ia32_ucmpq128_mask:
7768  case X86::BI__builtin_ia32_ucmpq256_mask:
7769  case X86::BI__builtin_ia32_ucmpq512_mask: {
7770  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue() & 0x7;
7771  return EmitX86MaskedCompare(*this, CC, false, Ops);
7772  }
7773 
7774  case X86::BI__builtin_ia32_vplzcntd_128_mask:
7775  case X86::BI__builtin_ia32_vplzcntd_256_mask:
7776  case X86::BI__builtin_ia32_vplzcntd_512_mask:
7777  case X86::BI__builtin_ia32_vplzcntq_128_mask:
7778  case X86::BI__builtin_ia32_vplzcntq_256_mask:
7779  case X86::BI__builtin_ia32_vplzcntq_512_mask: {
7780  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, Ops[0]->getType());
7781  return EmitX86Select(*this, Ops[2],
7782  Builder.CreateCall(F, {Ops[0],Builder.getInt1(false)}),
7783  Ops[1]);
7784  }
7785 
7786  case X86::BI__builtin_ia32_pmaxsb128:
7787  case X86::BI__builtin_ia32_pmaxsw128:
7788  case X86::BI__builtin_ia32_pmaxsd128:
7789  case X86::BI__builtin_ia32_pmaxsq128_mask:
7790  case X86::BI__builtin_ia32_pmaxsb256:
7791  case X86::BI__builtin_ia32_pmaxsw256:
7792  case X86::BI__builtin_ia32_pmaxsd256:
7793  case X86::BI__builtin_ia32_pmaxsq256_mask:
7794  case X86::BI__builtin_ia32_pmaxsb512_mask:
7795  case X86::BI__builtin_ia32_pmaxsw512_mask:
7796  case X86::BI__builtin_ia32_pmaxsd512_mask:
7797  case X86::BI__builtin_ia32_pmaxsq512_mask:
7798  return EmitX86MinMax(*this, ICmpInst::ICMP_SGT, Ops);
7799  case X86::BI__builtin_ia32_pmaxub128:
7800  case X86::BI__builtin_ia32_pmaxuw128:
7801  case X86::BI__builtin_ia32_pmaxud128:
7802  case X86::BI__builtin_ia32_pmaxuq128_mask:
7803  case X86::BI__builtin_ia32_pmaxub256:
7804  case X86::BI__builtin_ia32_pmaxuw256:
7805  case X86::BI__builtin_ia32_pmaxud256:
7806  case X86::BI__builtin_ia32_pmaxuq256_mask:
7807  case X86::BI__builtin_ia32_pmaxub512_mask:
7808  case X86::BI__builtin_ia32_pmaxuw512_mask:
7809  case X86::BI__builtin_ia32_pmaxud512_mask:
7810  case X86::BI__builtin_ia32_pmaxuq512_mask:
7811  return EmitX86MinMax(*this, ICmpInst::ICMP_UGT, Ops);
7812  case X86::BI__builtin_ia32_pminsb128:
7813  case X86::BI__builtin_ia32_pminsw128:
7814  case X86::BI__builtin_ia32_pminsd128:
7815  case X86::BI__builtin_ia32_pminsq128_mask:
7816  case X86::BI__builtin_ia32_pminsb256:
7817  case X86::BI__builtin_ia32_pminsw256:
7818  case X86::BI__builtin_ia32_pminsd256:
7819  case X86::BI__builtin_ia32_pminsq256_mask:
7820  case X86::BI__builtin_ia32_pminsb512_mask:
7821  case X86::BI__builtin_ia32_pminsw512_mask:
7822  case X86::BI__builtin_ia32_pminsd512_mask:
7823  case X86::BI__builtin_ia32_pminsq512_mask:
7824  return EmitX86MinMax(*this, ICmpInst::ICMP_SLT, Ops);
7825  case X86::BI__builtin_ia32_pminub128:
7826  case X86::BI__builtin_ia32_pminuw128:
7827  case X86::BI__builtin_ia32_pminud128:
7828  case X86::BI__builtin_ia32_pminuq128_mask:
7829  case X86::BI__builtin_ia32_pminub256:
7830  case X86::BI__builtin_ia32_pminuw256:
7831  case X86::BI__builtin_ia32_pminud256:
7832  case X86::BI__builtin_ia32_pminuq256_mask:
7833  case X86::BI__builtin_ia32_pminub512_mask:
7834  case X86::BI__builtin_ia32_pminuw512_mask:
7835  case X86::BI__builtin_ia32_pminud512_mask:
7836  case X86::BI__builtin_ia32_pminuq512_mask:
7837  return EmitX86MinMax(*this, ICmpInst::ICMP_ULT, Ops);
7838 
7839  // 3DNow!
7840  case X86::BI__builtin_ia32_pswapdsf:
7841  case X86::BI__builtin_ia32_pswapdsi: {
7842  llvm::Type *MMXTy = llvm::Type::getX86_MMXTy(getLLVMContext());
7843  Ops[0] = Builder.CreateBitCast(Ops[0], MMXTy, "cast");
7844  llvm::Function *F = CGM.getIntrinsic(Intrinsic::x86_3dnowa_pswapd);
7845  return Builder.CreateCall(F, Ops, "pswapd");
7846  }
7847  case X86::BI__builtin_ia32_rdrand16_step:
7848  case X86::BI__builtin_ia32_rdrand32_step:
7849  case X86::BI__builtin_ia32_rdrand64_step:
7850  case X86::BI__builtin_ia32_rdseed16_step:
7851  case X86::BI__builtin_ia32_rdseed32_step:
7852  case X86::BI__builtin_ia32_rdseed64_step: {
7853  Intrinsic::ID ID;
7854  switch (BuiltinID) {
7855  default: llvm_unreachable("Unsupported intrinsic!");
7856  case X86::BI__builtin_ia32_rdrand16_step:
7857  ID = Intrinsic::x86_rdrand_16;
7858  break;
7859  case X86::BI__builtin_ia32_rdrand32_step:
7860  ID = Intrinsic::x86_rdrand_32;
7861  break;
7862  case X86::BI__builtin_ia32_rdrand64_step:
7863  ID = Intrinsic::x86_rdrand_64;
7864  break;
7865  case X86::BI__builtin_ia32_rdseed16_step:
7866  ID = Intrinsic::x86_rdseed_16;
7867  break;
7868  case X86::BI__builtin_ia32_rdseed32_step:
7869  ID = Intrinsic::x86_rdseed_32;
7870  break;
7871  case X86::BI__builtin_ia32_rdseed64_step:
7872  ID = Intrinsic::x86_rdseed_64;
7873  break;
7874  }
7875 
7876  Value *Call = Builder.CreateCall(CGM.getIntrinsic(ID));
7877  Builder.CreateDefaultAlignedStore(Builder.CreateExtractValue(Call, 0),
7878  Ops[0]);
7879  return Builder.CreateExtractValue(Call, 1);
7880  }
7881 
7882  // SSE packed comparison intrinsics
7883  case X86::BI__builtin_ia32_cmpeqps:
7884  case X86::BI__builtin_ia32_cmpeqpd:
7885  return getVectorFCmpIR(CmpInst::FCMP_OEQ);
7886  case X86::BI__builtin_ia32_cmpltps:
7887  case X86::BI__builtin_ia32_cmpltpd:
7888  return getVectorFCmpIR(CmpInst::FCMP_OLT);
7889  case X86::BI__builtin_ia32_cmpleps:
7890  case X86::BI__builtin_ia32_cmplepd:
7891  return getVectorFCmpIR(CmpInst::FCMP_OLE);
7892  case X86::BI__builtin_ia32_cmpunordps:
7893  case X86::BI__builtin_ia32_cmpunordpd:
7894  return getVectorFCmpIR(CmpInst::FCMP_UNO);
7895  case X86::BI__builtin_ia32_cmpneqps:
7896  case X86::BI__builtin_ia32_cmpneqpd:
7897  return getVectorFCmpIR(CmpInst::FCMP_UNE);
7898  case X86::BI__builtin_ia32_cmpnltps:
7899  case X86::BI__builtin_ia32_cmpnltpd:
7900  return getVectorFCmpIR(CmpInst::FCMP_UGE);
7901  case X86::BI__builtin_ia32_cmpnleps:
7902  case X86::BI__builtin_ia32_cmpnlepd:
7903  return getVectorFCmpIR(CmpInst::FCMP_UGT);
7904  case X86::BI__builtin_ia32_cmpordps:
7905  case X86::BI__builtin_ia32_cmpordpd:
7906  return getVectorFCmpIR(CmpInst::FCMP_ORD);
7907  case X86::BI__builtin_ia32_cmpps:
7908  case X86::BI__builtin_ia32_cmpps256:
7909  case X86::BI__builtin_ia32_cmppd:
7910  case X86::BI__builtin_ia32_cmppd256: {
7911  unsigned CC = cast<llvm::ConstantInt>(Ops[2])->getZExtValue();
7912  // If this one of the SSE immediates, we can use native IR.
7913  if (CC < 8) {
7914  FCmpInst::Predicate Pred;
7915  switch (CC) {
7916  case 0: Pred = FCmpInst::FCMP_OEQ; break;
7917  case 1: Pred = FCmpInst::FCMP_OLT; break;
7918  case 2: Pred = FCmpInst::FCMP_OLE; break;
7919  case 3: Pred = FCmpInst::FCMP_UNO; break;
7920  case 4: Pred = FCmpInst::FCMP_UNE; break;
7921  case 5: Pred = FCmpInst::FCMP_UGE; break;
7922  case 6: Pred = FCmpInst::FCMP_UGT; break;
7923  case 7: Pred = FCmpInst::FCMP_ORD; break;
7924  }
7925  return getVectorFCmpIR(Pred);
7926  }
7927 
7928  // We can't handle 8-31 immediates with native IR, use the intrinsic.
7929  // Except for predicates that create constants.
7930  Intrinsic::ID ID;
7931  switch (BuiltinID) {
7932  default: llvm_unreachable("Unsupported intrinsic!");
7933  case X86::BI__builtin_ia32_cmpps:
7934  ID = Intrinsic::x86_sse_cmp_ps;
7935  break;
7936  case X86::BI__builtin_ia32_cmpps256:
7937  // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
7938  // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
7939  if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
7940  Value *Constant = (CC == 0xf || CC == 0x1f) ?
7941  llvm::Constant::getAllOnesValue(Builder.getInt32Ty()) :
7942  llvm::Constant::getNullValue(Builder.getInt32Ty());
7943  Value *Vec = Builder.CreateVectorSplat(
7944  Ops[0]->getType()->getVectorNumElements(), Constant);
7945  return Builder.CreateBitCast(Vec, Ops[0]->getType());
7946  }
7947  ID = Intrinsic::x86_avx_cmp_ps_256;
7948  break;
7949  case X86::BI__builtin_ia32_cmppd:
7950  ID = Intrinsic::x86_sse2_cmp_pd;
7951  break;
7952  case X86::BI__builtin_ia32_cmppd256:
7953  // _CMP_TRUE_UQ, _CMP_TRUE_US produce -1,-1... vector
7954  // on any input and _CMP_FALSE_OQ, _CMP_FALSE_OS produce 0, 0...
7955  if (CC == 0xf || CC == 0xb || CC == 0x1b || CC == 0x1f) {
7956  Value *Constant = (CC == 0xf || CC == 0x1f) ?
7957  llvm::Constant::getAllOnesValue(Builder.getInt64Ty()) :
7958  llvm::Constant::getNullValue(Builder.getInt64Ty());
7959  Value *Vec = Builder.CreateVectorSplat(
7960  Ops[0]->getType()->getVectorNumElements(), Constant);
7961  return Builder.CreateBitCast(Vec, Ops[0]->getType());
7962  }
7963  ID = Intrinsic::x86_avx_cmp_pd_256;
7964  break;
7965  }
7966 
7967  return Builder.CreateCall(CGM.getIntrinsic(ID), Ops);
7968  }
7969 
7970  // SSE scalar comparison intrinsics
7971  case X86::BI__builtin_ia32_cmpeqss:
7972  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 0);
7973  case X86::BI__builtin_ia32_cmpltss:
7974  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 1);
7975  case X86::BI__builtin_ia32_cmpless:
7976  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 2);
7977  case X86::BI__builtin_ia32_cmpunordss:
7978  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 3);
7979  case X86::BI__builtin_ia32_cmpneqss:
7980  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 4);
7981  case X86::BI__builtin_ia32_cmpnltss:
7982  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 5);
7983  case X86::BI__builtin_ia32_cmpnless:
7984  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 6);
7985  case X86::BI__builtin_ia32_cmpordss:
7986  return getCmpIntrinsicCall(Intrinsic::x86_sse_cmp_ss, 7);
7987  case X86::BI__builtin_ia32_cmpeqsd:
7988  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 0);
7989  case X86::BI__builtin_ia32_cmpltsd:
7990  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 1);
7991  case X86::BI__builtin_ia32_cmplesd:
7992  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 2);
7993  case X86::BI__builtin_ia32_cmpunordsd:
7994  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 3);
7995  case X86::BI__builtin_ia32_cmpneqsd:
7996  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 4);
7997  case X86::BI__builtin_ia32_cmpnltsd:
7998  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 5);
7999  case X86::BI__builtin_ia32_cmpnlesd:
8000  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 6);
8001  case X86::BI__builtin_ia32_cmpordsd:
8002  return getCmpIntrinsicCall(Intrinsic::x86_sse2_cmp_sd, 7);
8003 
8004  case X86::BI__emul:
8005  case X86::BI__emulu: {
8006  llvm::Type *Int64Ty = llvm::IntegerType::get(getLLVMContext(), 64);
8007  bool isSigned = (BuiltinID == X86::BI__emul);
8008  Value *LHS = Builder.CreateIntCast(Ops[0], Int64Ty, isSigned);
8009  Value *RHS = Builder.CreateIntCast(Ops[1], Int64Ty, isSigned);
8010  return Builder.CreateMul(LHS, RHS, "", !isSigned, isSigned);
8011  }
8012  case X86::BI__mulh:
8013  case X86::BI__umulh:
8014  case X86::BI_mul128:
8015  case X86::BI_umul128: {
8016  llvm::Type *ResType = ConvertType(E->getType());
8017  llvm::Type *Int128Ty = llvm::IntegerType::get(getLLVMContext(), 128);
8018 
8019  bool IsSigned = (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI_mul128);
8020  Value *LHS = Builder.CreateIntCast(Ops[0], Int128Ty, IsSigned);
8021  Value *RHS = Builder.CreateIntCast(Ops[1], Int128Ty, IsSigned);
8022 
8023  Value *MulResult, *HigherBits;
8024  if (IsSigned) {
8025  MulResult = Builder.CreateNSWMul(LHS, RHS);
8026  HigherBits = Builder.CreateAShr(MulResult, 64);
8027  } else {
8028  MulResult = Builder.CreateNUWMul(LHS, RHS);
8029  HigherBits = Builder.CreateLShr(MulResult, 64);
8030  }
8031  HigherBits = Builder.CreateIntCast(HigherBits, ResType, IsSigned);
8032 
8033  if (BuiltinID == X86::BI__mulh || BuiltinID == X86::BI__umulh)
8034  return HigherBits;
8035 
8036  Address HighBitsAddress = EmitPointerWithAlignment(E->getArg(2));
8037  Builder.CreateStore(HigherBits, HighBitsAddress);
8038  return Builder.CreateIntCast(MulResult, ResType, IsSigned);
8039  }
8040 
8041  case X86::BI__faststorefence: {
8042  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8044  }
8045  case X86::BI_ReadWriteBarrier:
8046  case X86::BI_ReadBarrier:
8047  case X86::BI_WriteBarrier: {
8048  return Builder.CreateFence(llvm::AtomicOrdering::SequentiallyConsistent,
8049  llvm::SyncScope::SingleThread);
8050  }
8051  case X86::BI_BitScanForward:
8052  case X86::BI_BitScanForward64:
8053  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanForward, E);
8054  case X86::BI_BitScanReverse:
8055  case X86::BI_BitScanReverse64:
8056  return EmitMSVCBuiltinExpr(MSVCIntrin::_BitScanReverse, E);
8057 
8058  case X86::BI_InterlockedAnd64:
8059  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedAnd, E);
8060  case X86::BI_InterlockedExchange64:
8061  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchange, E);
8062  case X86::BI_InterlockedExchangeAdd64:
8063  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeAdd, E);
8064  case X86::BI_InterlockedExchangeSub64:
8065  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedExchangeSub, E);
8066  case X86::BI_InterlockedOr64:
8067  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedOr, E);
8068  case X86::BI_InterlockedXor64:
8069  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedXor, E);
8070  case X86::BI_InterlockedDecrement64:
8071  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedDecrement, E);
8072  case X86::BI_InterlockedIncrement64:
8073  return EmitMSVCBuiltinExpr(MSVCIntrin::_InterlockedIncrement, E);
8074 
8075  case X86::BI_AddressOfReturnAddress: {
8076  Value *F = CGM.getIntrinsic(Intrinsic::addressofreturnaddress);
8077  return Builder.CreateCall(F);
8078  }
8079  case X86::BI__stosb: {
8080  // We treat __stosb as a volatile memset - it may not generate "rep stosb"
8081  // instruction, but it will create a memset that won't be optimized away.
8082  return Builder.CreateMemSet(Ops[0], Ops[1], Ops[2], 1, true);
8083  }
8084  case X86::BI__ud2:
8085  // llvm.trap makes a ud2a instruction on x86.
8086  return EmitTrapCall(Intrinsic::trap);
8087  case X86::BI__int2c: {
8088  // This syscall signals a driver assertion failure in x86 NT kernels.
8089  llvm::FunctionType *FTy = llvm::FunctionType::get(VoidTy, false);
8090  llvm::InlineAsm *IA =
8091  llvm::InlineAsm::get(FTy, "int $$0x2c", "", /*SideEffects=*/true);
8092  llvm::AttributeList NoReturnAttr = llvm::AttributeList::get(
8093  getLLVMContext(), llvm::AttributeList::FunctionIndex,
8094  llvm::Attribute::NoReturn);
8095  CallSite CS = Builder.CreateCall(IA);
8096  CS.setAttributes(NoReturnAttr);
8097  return CS.getInstruction();
8098  }
8099  case X86::BI__readfsbyte:
8100  case X86::BI__readfsword:
8101  case X86::BI__readfsdword:
8102  case X86::BI__readfsqword: {
8103  llvm::Type *IntTy = ConvertType(E->getType());
8104  Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8105  llvm::PointerType::get(IntTy, 257));
8106  LoadInst *Load = Builder.CreateAlignedLoad(
8107  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8108  Load->setVolatile(true);
8109  return Load;
8110  }
8111  case X86::BI__readgsbyte:
8112  case X86::BI__readgsword:
8113  case X86::BI__readgsdword:
8114  case X86::BI__readgsqword: {
8115  llvm::Type *IntTy = ConvertType(E->getType());
8116  Value *Ptr = Builder.CreateIntToPtr(EmitScalarExpr(E->getArg(0)),
8117  llvm::PointerType::get(IntTy, 256));
8118  LoadInst *Load = Builder.CreateAlignedLoad(
8119  IntTy, Ptr, getContext().getTypeAlignInChars(E->getType()));
8120  Load->setVolatile(true);
8121  return Load;
8122  }
8123  }
8124 }
8125 
8126 
8127 Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID,
8128  const CallExpr *E) {
8130 
8131  for (unsigned i = 0, e = E->getNumArgs(); i != e; i++)
8132  Ops.push_back(EmitScalarExpr(E->getArg(i)));
8133 
8134  Intrinsic::ID ID = Intrinsic::not_intrinsic;
8135 
8136  switch (BuiltinID) {
8137  default: return nullptr;
8138 
8139  // __builtin_ppc_get_timebase is GCC 4.8+'s PowerPC-specific name for what we
8140  // call __builtin_readcyclecounter.
8141  case PPC::BI__builtin_ppc_get_timebase:
8142  return Builder.CreateCall(CGM.getIntrinsic(Intrinsic::readcyclecounter));
8143 
8144  // vec_ld, vec_xl_be, vec_lvsl, vec_lvsr
8145  case PPC::BI__builtin_altivec_lvx:
8146  case PPC::BI__builtin_altivec_lvxl:
8147  case PPC::BI__builtin_altivec_lvebx:
8148  case PPC::BI__builtin_altivec_lvehx:
8149  case PPC::BI__builtin_altivec_lvewx:
8150  case PPC::BI__builtin_altivec_lvsl:
8151  case PPC::BI__builtin_altivec_lvsr:
8152  case PPC::BI__builtin_vsx_lxvd2x:
8153  case PPC::BI__builtin_vsx_lxvw4x:
8154  case PPC::BI__builtin_vsx_lxvd2x_be:
8155  case PPC::BI__builtin_vsx_lxvw4x_be:
8156  case PPC::BI__builtin_vsx_lxvl:
8157  case PPC::BI__builtin_vsx_lxvll:
8158  {
8159  if(BuiltinID == PPC::BI__builtin_vsx_lxvl ||
8160  BuiltinID == PPC::BI__builtin_vsx_lxvll){
8161  Ops[0] = Builder.CreateBitCast(Ops[0], Int8PtrTy);
8162  }else {
8163  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8164  Ops[0] = Builder.CreateGEP(Ops[1], Ops[0]);
8165  Ops.pop_back();
8166  }
8167 
8168  switch (BuiltinID) {
8169  default: llvm_unreachable("Unsupported ld/lvsl/lvsr intrinsic!");
8170  case PPC::BI__builtin_altivec_lvx:
8171  ID = Intrinsic::ppc_altivec_lvx;
8172  break;
8173  case PPC::BI__builtin_altivec_lvxl:
8174  ID = Intrinsic::ppc_altivec_lvxl;
8175  break;
8176  case PPC::BI__builtin_altivec_lvebx:
8177  ID = Intrinsic::ppc_altivec_lvebx;
8178  break;
8179  case PPC::BI__builtin_altivec_lvehx:
8180  ID = Intrinsic::ppc_altivec_lvehx;
8181  break;
8182  case PPC::BI__builtin_altivec_lvewx:
8183  ID = Intrinsic::ppc_altivec_lvewx;
8184  break;
8185  case PPC::BI__builtin_altivec_lvsl:
8186  ID = Intrinsic::ppc_altivec_lvsl;
8187  break;
8188  case PPC::BI__builtin_altivec_lvsr:
8189  ID = Intrinsic::ppc_altivec_lvsr;
8190  break;
8191  case PPC::BI__builtin_vsx_lxvd2x:
8192  ID = Intrinsic::ppc_vsx_lxvd2x;
8193  break;
8194  case PPC::BI__builtin_vsx_lxvw4x:
8195  ID = Intrinsic::ppc_vsx_lxvw4x;
8196  break;
8197  case PPC::BI__builtin_vsx_lxvd2x_be:
8198  ID = Intrinsic::ppc_vsx_lxvd2x_be;
8199  break;
8200  case PPC::BI__builtin_vsx_lxvw4x_be:
8201  ID = Intrinsic::ppc_vsx_lxvw4x_be;
8202  break;
8203  case PPC::BI__builtin_vsx_lxvl:
8204  ID = Intrinsic::ppc_vsx_lxvl;
8205  break;
8206  case PPC::BI__builtin_vsx_lxvll:
8207  ID = Intrinsic::ppc_vsx_lxvll;
8208  break;
8209  }
8210  llvm::Function *F = CGM.getIntrinsic(ID);
8211  return Builder.CreateCall(F, Ops, "");
8212  }
8213 
8214  // vec_st, vec_xst_be
8215  case PPC::BI__builtin_altivec_stvx:
8216  case PPC::BI__builtin_altivec_stvxl:
8217  case PPC::BI__builtin_altivec_stvebx:
8218  case PPC::BI__builtin_altivec_stvehx:
8219  case PPC::BI__builtin_altivec_stvewx:
8220  case PPC::BI__builtin_vsx_stxvd2x:
8221  case PPC::BI__builtin_vsx_stxvw4x:
8222  case PPC::BI__builtin_vsx_stxvd2x_be:
8223  case PPC::BI__builtin_vsx_stxvw4x_be:
8224  case PPC::BI__builtin_vsx_stxvl:
8225  case PPC::BI__builtin_vsx_stxvll:
8226  {
8227  if(BuiltinID == PPC::BI__builtin_vsx_stxvl ||
8228  BuiltinID == PPC::BI__builtin_vsx_stxvll ){
8229  Ops[1] = Builder.CreateBitCast(Ops[1], Int8PtrTy);
8230  }else {
8231  Ops[2] = Builder.CreateBitCast(Ops[2], Int8PtrTy);
8232  Ops[1] = Builder.CreateGEP(Ops[2], Ops[1]);
8233  Ops.pop_back();
8234  }
8235 
8236  switch (BuiltinID) {
8237  default: llvm_unreachable("Unsupported st intrinsic!");
8238  case PPC::BI__builtin_altivec_stvx:
8239  ID = Intrinsic::ppc_altivec_stvx;
8240  break;
8241  case PPC::BI__builtin_altivec_stvxl:
8242  ID = Intrinsic::ppc_altivec_stvxl;
8243  break;
8244  case PPC::BI__builtin_altivec_stvebx:
8245  ID = Intrinsic::ppc_altivec_stvebx;
8246  break;
8247  case PPC::BI__builtin_altivec_stvehx:
8248  ID = Intrinsic::ppc_altivec_stvehx;
8249  break;
8250  case PPC::BI__builtin_altivec_stvewx:
8251  ID = Intrinsic::ppc_altivec_stvewx;
8252  break;
8253  case PPC::BI__builtin_vsx_stxvd2x:
8254  ID = Intrinsic::ppc_vsx_stxvd2x;
8255  break;
8256  case PPC::BI__builtin_vsx_stxvw4x:
8257  ID = Intrinsic::ppc_vsx_stxvw4x;
8258  break;
8259  case PPC::BI__builtin_vsx_stxvd2x_be:
8260  ID = Intrinsic::ppc_vsx_stxvd2x_be;
8261  break;
8262  case PPC::BI__builtin_vsx_stxvw4x_be:
8263  ID = Intrinsic::ppc_vsx_stxvw4x_be;
8264  break;
8265  case PPC::BI__builtin_vsx_stxvl:
8266  ID = Intrinsic::ppc_vsx_stxvl;
8267  break;
8268  case PPC::BI__builtin_vsx_stxvll:
8269  ID = Intrinsic::ppc_vsx_stxvll;
8270  break;
8271  }
8272  llvm::Function *F = CGM.getIntrinsic(ID);
8273  return Builder.CreateCall(F, Ops, "");
8274  }
8275  // Square root
8276  case PPC::BI__builtin_vsx_xvsqrtsp:
8277  case PPC::BI__builtin_vsx_xvsqrtdp: {
8278  llvm::Type *ResultType = ConvertType(E->getType());
8279  Value *X = EmitScalarExpr(E->getArg(0));
8280  ID = Intrinsic::sqrt;
8281  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8282  return Builder.CreateCall(F, X);
8283  }
8284  // Count leading zeros
8285  case PPC::BI__builtin_altivec_vclzb:
8286  case PPC::BI__builtin_altivec_vclzh:
8287  case PPC::BI__builtin_altivec_vclzw:
8288  case PPC::BI__builtin_altivec_vclzd: {
8289  llvm::Type *ResultType = ConvertType(E->getType());
8290  Value *X = EmitScalarExpr(E->getArg(0));
8291  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8292  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8293  return Builder.CreateCall(F, {X, Undef});
8294  }
8295  case PPC::BI__builtin_altivec_vctzb:
8296  case PPC::BI__builtin_altivec_vctzh:
8297  case PPC::BI__builtin_altivec_vctzw:
8298  case PPC::BI__builtin_altivec_vctzd: {
8299  llvm::Type *ResultType = ConvertType(E->getType());
8300  Value *X = EmitScalarExpr(E->getArg(0));
8301  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8302  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8303  return Builder.CreateCall(F, {X, Undef});
8304  }
8305  case PPC::BI__builtin_altivec_vpopcntb:
8306  case PPC::BI__builtin_altivec_vpopcnth:
8307  case PPC::BI__builtin_altivec_vpopcntw:
8308  case PPC::BI__builtin_altivec_vpopcntd: {
8309  llvm::Type *ResultType = ConvertType(E->getType());
8310  Value *X = EmitScalarExpr(E->getArg(0));
8311  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8312  return Builder.CreateCall(F, X);
8313  }
8314  // Copy sign
8315  case PPC::BI__builtin_vsx_xvcpsgnsp:
8316  case PPC::BI__builtin_vsx_xvcpsgndp: {
8317  llvm::Type *ResultType = ConvertType(E->getType());
8318  Value *X = EmitScalarExpr(E->getArg(0));
8319  Value *Y = EmitScalarExpr(E->getArg(1));
8320  ID = Intrinsic::copysign;
8321  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8322  return Builder.CreateCall(F, {X, Y});
8323  }
8324  // Rounding/truncation
8325  case PPC::BI__builtin_vsx_xvrspip:
8326  case PPC::BI__builtin_vsx_xvrdpip:
8327  case PPC::BI__builtin_vsx_xvrdpim:
8328  case PPC::BI__builtin_vsx_xvrspim:
8329  case PPC::BI__builtin_vsx_xvrdpi:
8330  case PPC::BI__builtin_vsx_xvrspi:
8331  case PPC::BI__builtin_vsx_xvrdpic:
8332  case PPC::BI__builtin_vsx_xvrspic:
8333  case PPC::BI__builtin_vsx_xvrdpiz:
8334  case PPC::BI__builtin_vsx_xvrspiz: {
8335  llvm::Type *ResultType = ConvertType(E->getType());
8336  Value *X = EmitScalarExpr(E->getArg(0));
8337  if (BuiltinID == PPC::BI__builtin_vsx_xvrdpim ||
8338  BuiltinID == PPC::BI__builtin_vsx_xvrspim)
8339  ID = Intrinsic::floor;
8340  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpi ||
8341  BuiltinID == PPC::BI__builtin_vsx_xvrspi)
8342  ID = Intrinsic::round;
8343  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpic ||
8344  BuiltinID == PPC::BI__builtin_vsx_xvrspic)
8345  ID = Intrinsic::nearbyint;
8346  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpip ||
8347  BuiltinID == PPC::BI__builtin_vsx_xvrspip)
8348  ID = Intrinsic::ceil;
8349  else if (BuiltinID == PPC::BI__builtin_vsx_xvrdpiz ||
8350  BuiltinID == PPC::BI__builtin_vsx_xvrspiz)
8351  ID = Intrinsic::trunc;
8352  llvm::Function *F = CGM.getIntrinsic(ID, ResultType);
8353  return Builder.CreateCall(F, X);
8354  }
8355 
8356  // Absolute value
8357  case PPC::BI__builtin_vsx_xvabsdp:
8358  case PPC::BI__builtin_vsx_xvabssp: {
8359  llvm::Type *ResultType = ConvertType(E->getType());
8360  Value *X = EmitScalarExpr(E->getArg(0));
8361  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8362  return Builder.CreateCall(F, X);
8363  }
8364 
8365  // FMA variations
8366  case PPC::BI__builtin_vsx_xvmaddadp:
8367  case PPC::BI__builtin_vsx_xvmaddasp:
8368  case PPC::BI__builtin_vsx_xvnmaddadp:
8369  case PPC::BI__builtin_vsx_xvnmaddasp:
8370  case PPC::BI__builtin_vsx_xvmsubadp:
8371  case PPC::BI__builtin_vsx_xvmsubasp:
8372  case PPC::BI__builtin_vsx_xvnmsubadp:
8373  case PPC::BI__builtin_vsx_xvnmsubasp: {
8374  llvm::Type *ResultType = ConvertType(E->getType());
8375  Value *X = EmitScalarExpr(E->getArg(0));
8376  Value *Y = EmitScalarExpr(E->getArg(1));
8377  Value *Z = EmitScalarExpr(E->getArg(2));
8378  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8379  llvm::Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8380  switch (BuiltinID) {
8381  case PPC::BI__builtin_vsx_xvmaddadp:
8382  case PPC::BI__builtin_vsx_xvmaddasp:
8383  return Builder.CreateCall(F, {X, Y, Z});
8384  case PPC::BI__builtin_vsx_xvnmaddadp:
8385  case PPC::BI__builtin_vsx_xvnmaddasp:
8386  return Builder.CreateFSub(Zero,
8387  Builder.CreateCall(F, {X, Y, Z}), "sub");
8388  case PPC::BI__builtin_vsx_xvmsubadp:
8389  case PPC::BI__builtin_vsx_xvmsubasp:
8390  return Builder.CreateCall(F,
8391  {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8392  case PPC::BI__builtin_vsx_xvnmsubadp:
8393  case PPC::BI__builtin_vsx_xvnmsubasp:
8394  Value *FsubRes =
8395  Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8396  return Builder.CreateFSub(Zero, FsubRes, "sub");
8397  }
8398  llvm_unreachable("Unknown FMA operation");
8399  return nullptr; // Suppress no-return warning
8400  }
8401 
8402  case PPC::BI__builtin_vsx_insertword: {
8403  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxinsertw);
8404 
8405  // Third argument is a compile time constant int. It must be clamped to
8406  // to the range [0, 12].
8407  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8408  assert(ArgCI &&
8409  "Third arg to xxinsertw intrinsic must be constant integer");
8410  const int64_t MaxIndex = 12;
8411  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8412 
8413  // The builtin semantics don't exactly match the xxinsertw instructions
8414  // semantics (which ppc_vsx_xxinsertw follows). The builtin extracts the
8415  // word from the first argument, and inserts it in the second argument. The
8416  // instruction extracts the word from its second input register and inserts
8417  // it into its first input register, so swap the first and second arguments.
8418  std::swap(Ops[0], Ops[1]);
8419 
8420  // Need to cast the second argument from a vector of unsigned int to a
8421  // vector of long long.
8422  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8423 
8424  if (getTarget().isLittleEndian()) {
8425  // Create a shuffle mask of (1, 0)
8426  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8427  ConstantInt::get(Int32Ty, 0)
8428  };
8429  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8430 
8431  // Reverse the double words in the vector we will extract from.
8432  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8433  Ops[0] = Builder.CreateShuffleVector(Ops[0], Ops[0], ShuffleMask);
8434 
8435  // Reverse the index.
8436  Index = MaxIndex - Index;
8437  }
8438 
8439  // Intrinsic expects the first arg to be a vector of int.
8440  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8441  Ops[2] = ConstantInt::getSigned(Int32Ty, Index);
8442  return Builder.CreateCall(F, Ops);
8443  }
8444 
8445  case PPC::BI__builtin_vsx_extractuword: {
8446  llvm::Function *F = CGM.getIntrinsic(Intrinsic::ppc_vsx_xxextractuw);
8447 
8448  // Intrinsic expects the first argument to be a vector of doublewords.
8449  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8450 
8451  // The second argument is a compile time constant int that needs to
8452  // be clamped to the range [0, 12].
8453  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[1]);
8454  assert(ArgCI &&
8455  "Second Arg to xxextractuw intrinsic must be a constant integer!");
8456  const int64_t MaxIndex = 12;
8457  int64_t Index = clamp(ArgCI->getSExtValue(), 0, MaxIndex);
8458 
8459  if (getTarget().isLittleEndian()) {
8460  // Reverse the index.
8461  Index = MaxIndex - Index;
8462  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8463 
8464  // Emit the call, then reverse the double words of the results vector.
8465  Value *Call = Builder.CreateCall(F, Ops);
8466 
8467  // Create a shuffle mask of (1, 0)
8468  Constant *ShuffleElts[2] = { ConstantInt::get(Int32Ty, 1),
8469  ConstantInt::get(Int32Ty, 0)
8470  };
8471  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8472 
8473  Value *ShuffleCall = Builder.CreateShuffleVector(Call, Call, ShuffleMask);
8474  return ShuffleCall;
8475  } else {
8476  Ops[1] = ConstantInt::getSigned(Int32Ty, Index);
8477  return Builder.CreateCall(F, Ops);
8478  }
8479  }
8480 
8481  case PPC::BI__builtin_vsx_xxpermdi: {
8482  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8483  assert(ArgCI && "Third arg must be constant integer!");
8484 
8485  unsigned Index = ArgCI->getZExtValue();
8486  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int64Ty, 2));
8487  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int64Ty, 2));
8488 
8489  // Element zero comes from the first input vector and element one comes from
8490  // the second. The element indices within each vector are numbered in big
8491  // endian order so the shuffle mask must be adjusted for this on little
8492  // endian platforms (i.e. index is complemented and source vector reversed).
8493  unsigned ElemIdx0;
8494  unsigned ElemIdx1;
8495  if (getTarget().isLittleEndian()) {
8496  ElemIdx0 = (~Index & 1) + 2;
8497  ElemIdx1 = (~Index & 2) >> 1;
8498  } else { // BigEndian
8499  ElemIdx0 = (Index & 2) >> 1;
8500  ElemIdx1 = 2 + (Index & 1);
8501  }
8502 
8503  Constant *ShuffleElts[2] = {ConstantInt::get(Int32Ty, ElemIdx0),
8504  ConstantInt::get(Int32Ty, ElemIdx1)};
8505  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8506 
8507  Value *ShuffleCall =
8508  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8509  QualType BIRetType = E->getType();
8510  auto RetTy = ConvertType(BIRetType);
8511  return Builder.CreateBitCast(ShuffleCall, RetTy);
8512  }
8513 
8514  case PPC::BI__builtin_vsx_xxsldwi: {
8515  ConstantInt *ArgCI = dyn_cast<ConstantInt>(Ops[2]);
8516  assert(ArgCI && "Third argument must be a compile time constant");
8517  unsigned Index = ArgCI->getZExtValue() & 0x3;
8518  Ops[0] = Builder.CreateBitCast(Ops[0], llvm::VectorType::get(Int32Ty, 4));
8519  Ops[1] = Builder.CreateBitCast(Ops[1], llvm::VectorType::get(Int32Ty, 4));
8520 
8521  // Create a shuffle mask
8522  unsigned ElemIdx0;
8523  unsigned ElemIdx1;
8524  unsigned ElemIdx2;
8525  unsigned ElemIdx3;
8526  if (getTarget().isLittleEndian()) {
8527  // Little endian element N comes from element 8+N-Index of the
8528  // concatenated wide vector (of course, using modulo arithmetic on
8529  // the total number of elements).
8530  ElemIdx0 = (8 - Index) % 8;
8531  ElemIdx1 = (9 - Index) % 8;
8532  ElemIdx2 = (10 - Index) % 8;
8533  ElemIdx3 = (11 - Index) % 8;
8534  } else {
8535  // Big endian ElemIdx<N> = Index + N
8536  ElemIdx0 = Index;
8537  ElemIdx1 = Index + 1;
8538  ElemIdx2 = Index + 2;
8539  ElemIdx3 = Index + 3;
8540  }
8541 
8542  Constant *ShuffleElts[4] = {ConstantInt::get(Int32Ty, ElemIdx0),
8543  ConstantInt::get(Int32Ty, ElemIdx1),
8544  ConstantInt::get(Int32Ty, ElemIdx2),
8545  ConstantInt::get(Int32Ty, ElemIdx3)};
8546 
8547  Constant *ShuffleMask = llvm::ConstantVector::get(ShuffleElts);
8548  Value *ShuffleCall =
8549  Builder.CreateShuffleVector(Ops[0], Ops[1], ShuffleMask);
8550  QualType BIRetType = E->getType();
8551  auto RetTy = ConvertType(BIRetType);
8552  return Builder.CreateBitCast(ShuffleCall, RetTy);
8553  }
8554  }
8555 }
8556 
8558  const CallExpr *E) {
8559  switch (BuiltinID) {
8560  case AMDGPU::BI__builtin_amdgcn_div_scale:
8561  case AMDGPU::BI__builtin_amdgcn_div_scalef: {
8562  // Translate from the intrinsics's struct return to the builtin's out
8563  // argument.
8564 
8565  Address FlagOutPtr = EmitPointerWithAlignment(E->getArg(3));
8566 
8567  llvm::Value *X = EmitScalarExpr(E->getArg(0));
8568  llvm::Value *Y = EmitScalarExpr(E->getArg(1));
8569  llvm::Value *Z = EmitScalarExpr(E->getArg(2));
8570 
8571  llvm::Value *Callee = CGM.getIntrinsic(Intrinsic::amdgcn_div_scale,
8572  X->getType());
8573 
8574  llvm::Value *Tmp = Builder.CreateCall(Callee, {X, Y, Z});
8575 
8576  llvm::Value *Result = Builder.CreateExtractValue(Tmp, 0);
8577  llvm::Value *Flag = Builder.CreateExtractValue(Tmp, 1);
8578 
8579  llvm::Type *RealFlagType
8580  = FlagOutPtr.getPointer()->getType()->getPointerElementType();
8581 
8582  llvm::Value *FlagExt = Builder.CreateZExt(Flag, RealFlagType);
8583  Builder.CreateStore(FlagExt, FlagOutPtr);
8584  return Result;
8585  }
8586  case AMDGPU::BI__builtin_amdgcn_div_fmas:
8587  case AMDGPU::BI__builtin_amdgcn_div_fmasf: {
8588  llvm::Value *Src0 = EmitScalarExpr(E->getArg(0));
8589  llvm::Value *Src1 = EmitScalarExpr(E->getArg(1));
8590  llvm::Value *Src2 = EmitScalarExpr(E->getArg(2));
8591  llvm::Value *Src3 = EmitScalarExpr(E->getArg(3));
8592 
8593  llvm::Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_div_fmas,
8594  Src0->getType());
8595  llvm::Value *Src3ToBool = Builder.CreateIsNotNull(Src3);
8596  return Builder.CreateCall(F, {Src0, Src1, Src2, Src3ToBool});
8597  }
8598 
8599  case AMDGPU::BI__builtin_amdgcn_ds_swizzle:
8600  return emitBinaryBuiltin(*this, E, Intrinsic::amdgcn_ds_swizzle);
8601  case AMDGPU::BI__builtin_amdgcn_mov_dpp: {
8603  for (unsigned I = 0; I != 5; ++I)
8604  Args.push_back(EmitScalarExpr(E->getArg(I)));
8605  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_mov_dpp,
8606  Args[0]->getType());
8607  return Builder.CreateCall(F, Args);
8608  }
8609  case AMDGPU::BI__builtin_amdgcn_div_fixup:
8610  case AMDGPU::BI__builtin_amdgcn_div_fixupf:
8611  case AMDGPU::BI__builtin_amdgcn_div_fixuph:
8612  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_div_fixup);
8613  case AMDGPU::BI__builtin_amdgcn_trig_preop:
8614  case AMDGPU::BI__builtin_amdgcn_trig_preopf:
8615  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_trig_preop);
8616  case AMDGPU::BI__builtin_amdgcn_rcp:
8617  case AMDGPU::BI__builtin_amdgcn_rcpf:
8618  case AMDGPU::BI__builtin_amdgcn_rcph:
8619  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rcp);
8620  case AMDGPU::BI__builtin_amdgcn_rsq:
8621  case AMDGPU::BI__builtin_amdgcn_rsqf:
8622  case AMDGPU::BI__builtin_amdgcn_rsqh:
8623  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq);
8624  case AMDGPU::BI__builtin_amdgcn_rsq_clamp:
8625  case AMDGPU::BI__builtin_amdgcn_rsq_clampf:
8626  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_rsq_clamp);
8627  case AMDGPU::BI__builtin_amdgcn_sinf:
8628  case AMDGPU::BI__builtin_amdgcn_sinh:
8629  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_sin);
8630  case AMDGPU::BI__builtin_amdgcn_cosf:
8631  case AMDGPU::BI__builtin_amdgcn_cosh:
8632  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_cos);
8633  case AMDGPU::BI__builtin_amdgcn_log_clampf:
8634  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_log_clamp);
8635  case AMDGPU::BI__builtin_amdgcn_ldexp:
8636  case AMDGPU::BI__builtin_amdgcn_ldexpf:
8637  case AMDGPU::BI__builtin_amdgcn_ldexph:
8638  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_ldexp);
8639  case AMDGPU::BI__builtin_amdgcn_frexp_mant:
8640  case AMDGPU::BI__builtin_amdgcn_frexp_mantf:
8641  case AMDGPU::BI__builtin_amdgcn_frexp_manth:
8642  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_frexp_mant);
8643  case AMDGPU::BI__builtin_amdgcn_frexp_exp:
8644  case AMDGPU::BI__builtin_amdgcn_frexp_expf: {
8645  Value *Src0 = EmitScalarExpr(E->getArg(0));
8646  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8647  { Builder.getInt32Ty(), Src0->getType() });
8648  return Builder.CreateCall(F, Src0);
8649  }
8650  case AMDGPU::BI__builtin_amdgcn_frexp_exph: {
8651  Value *Src0 = EmitScalarExpr(E->getArg(0));
8652  Value *F = CGM.getIntrinsic(Intrinsic::amdgcn_frexp_exp,
8653  { Builder.getInt16Ty(), Src0->getType() });
8654  return Builder.CreateCall(F, Src0);
8655  }
8656  case AMDGPU::BI__builtin_amdgcn_fract:
8657  case AMDGPU::BI__builtin_amdgcn_fractf:
8658  case AMDGPU::BI__builtin_amdgcn_fracth:
8659  return emitUnaryBuiltin(*this, E, Intrinsic::amdgcn_fract);
8660  case AMDGPU::BI__builtin_amdgcn_lerp:
8661  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_lerp);
8662  case AMDGPU::BI__builtin_amdgcn_uicmp:
8663  case AMDGPU::BI__builtin_amdgcn_uicmpl:
8664  case AMDGPU::BI__builtin_amdgcn_sicmp:
8665  case AMDGPU::BI__builtin_amdgcn_sicmpl:
8666  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_icmp);
8667  case AMDGPU::BI__builtin_amdgcn_fcmp:
8668  case AMDGPU::BI__builtin_amdgcn_fcmpf:
8669  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fcmp);
8670  case AMDGPU::BI__builtin_amdgcn_class:
8671  case AMDGPU::BI__builtin_amdgcn_classf:
8672  case AMDGPU::BI__builtin_amdgcn_classh:
8673  return emitFPIntBuiltin(*this, E, Intrinsic::amdgcn_class);
8674  case AMDGPU::BI__builtin_amdgcn_fmed3f:
8675  case AMDGPU::BI__builtin_amdgcn_fmed3h:
8676  return emitTernaryBuiltin(*this, E, Intrinsic::amdgcn_fmed3);
8677  case AMDGPU::BI__builtin_amdgcn_read_exec: {
8678  CallInst *CI = cast<CallInst>(
8679  EmitSpecialRegisterBuiltin(*this, E, Int64Ty, Int64Ty, true, "exec"));
8680  CI->setConvergent();
8681  return CI;
8682  }
8683 
8684  // amdgcn workitem
8685  case AMDGPU::BI__builtin_amdgcn_workitem_id_x:
8686  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_x, 0, 1024);
8687  case AMDGPU::BI__builtin_amdgcn_workitem_id_y:
8688  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_y, 0, 1024);
8689  case AMDGPU::BI__builtin_amdgcn_workitem_id_z:
8690  return emitRangedBuiltin(*this, Intrinsic::amdgcn_workitem_id_z, 0, 1024);
8691 
8692  // r600 intrinsics
8693  case AMDGPU::BI__builtin_r600_recipsqrt_ieee:
8694  case AMDGPU::BI__builtin_r600_recipsqrt_ieeef:
8695  return emitUnaryBuiltin(*this, E, Intrinsic::r600_recipsqrt_ieee);
8696  case AMDGPU::BI__builtin_r600_read_tidig_x:
8697  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_x, 0, 1024);
8698  case AMDGPU::BI__builtin_r600_read_tidig_y:
8699  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_y, 0, 1024);
8700  case AMDGPU::BI__builtin_r600_read_tidig_z:
8701  return emitRangedBuiltin(*this, Intrinsic::r600_read_tidig_z, 0, 1024);
8702  default:
8703  return nullptr;
8704  }
8705 }
8706 
8707 /// Handle a SystemZ function in which the final argument is a pointer
8708 /// to an int that receives the post-instruction CC value. At the LLVM level
8709 /// this is represented as a function that returns a {result, cc} pair.
8711  unsigned IntrinsicID,
8712  const CallExpr *E) {
8713  unsigned NumArgs = E->getNumArgs() - 1;
8714  SmallVector<Value *, 8> Args(NumArgs);
8715  for (unsigned I = 0; I < NumArgs; ++I)
8716  Args[I] = CGF.EmitScalarExpr(E->getArg(I));
8717  Address CCPtr = CGF.EmitPointerWithAlignment(E->getArg(NumArgs));
8718  Value *F = CGF.CGM.getIntrinsic(IntrinsicID);
8719  Value *Call = CGF.Builder.CreateCall(F, Args);
8720  Value *CC = CGF.Builder.CreateExtractValue(Call, 1);
8721  CGF.Builder.CreateStore(CC, CCPtr);
8722  return CGF.Builder.CreateExtractValue(Call, 0);
8723 }
8724 
8726  const CallExpr *E) {
8727  switch (BuiltinID) {
8728  case SystemZ::BI__builtin_tbegin: {
8729  Value *TDB = EmitScalarExpr(E->getArg(0));
8730  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8731  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin);
8732  return Builder.CreateCall(F, {TDB, Control});
8733  }
8734  case SystemZ::BI__builtin_tbegin_nofloat: {
8735  Value *TDB = EmitScalarExpr(E->getArg(0));
8736  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff0c);
8737  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbegin_nofloat);
8738  return Builder.CreateCall(F, {TDB, Control});
8739  }
8740  case SystemZ::BI__builtin_tbeginc: {
8741  Value *TDB = llvm::ConstantPointerNull::get(Int8PtrTy);
8742  Value *Control = llvm::ConstantInt::get(Int32Ty, 0xff08);
8743  Value *F = CGM.getIntrinsic(Intrinsic::s390_tbeginc);
8744  return Builder.CreateCall(F, {TDB, Control});
8745  }
8746  case SystemZ::BI__builtin_tabort: {
8747  Value *Data = EmitScalarExpr(E->getArg(0));
8748  Value *F = CGM.getIntrinsic(Intrinsic::s390_tabort);
8749  return Builder.CreateCall(F, Builder.CreateSExt(Data, Int64Ty, "tabort"));
8750  }
8751  case SystemZ::BI__builtin_non_tx_store: {
8752  Value *Address = EmitScalarExpr(E->getArg(0));
8753  Value *Data = EmitScalarExpr(E->getArg(1));
8754  Value *F = CGM.getIntrinsic(Intrinsic::s390_ntstg);
8755  return Builder.CreateCall(F, {Data, Address});
8756  }
8757 
8758  // Vector builtins. Note that most vector builtins are mapped automatically
8759  // to target-specific LLVM intrinsics. The ones handled specially here can
8760  // be represented via standard LLVM IR, which is preferable to enable common
8761  // LLVM optimizations.
8762 
8763  case SystemZ::BI__builtin_s390_vpopctb:
8764  case SystemZ::BI__builtin_s390_vpopcth:
8765  case SystemZ::BI__builtin_s390_vpopctf:
8766  case SystemZ::BI__builtin_s390_vpopctg: {
8767  llvm::Type *ResultType = ConvertType(E->getType());
8768  Value *X = EmitScalarExpr(E->getArg(0));
8769  Function *F = CGM.getIntrinsic(Intrinsic::ctpop, ResultType);
8770  return Builder.CreateCall(F, X);
8771  }
8772 
8773  case SystemZ::BI__builtin_s390_vclzb:
8774  case SystemZ::BI__builtin_s390_vclzh:
8775  case SystemZ::BI__builtin_s390_vclzf:
8776  case SystemZ::BI__builtin_s390_vclzg: {
8777  llvm::Type *ResultType = ConvertType(E->getType());
8778  Value *X = EmitScalarExpr(E->getArg(0));
8779  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8780  Function *F = CGM.getIntrinsic(Intrinsic::ctlz, ResultType);
8781  return Builder.CreateCall(F, {X, Undef});
8782  }
8783 
8784  case SystemZ::BI__builtin_s390_vctzb:
8785  case SystemZ::BI__builtin_s390_vctzh:
8786  case SystemZ::BI__builtin_s390_vctzf:
8787  case SystemZ::BI__builtin_s390_vctzg: {
8788  llvm::Type *ResultType = ConvertType(E->getType());
8789  Value *X = EmitScalarExpr(E->getArg(0));
8790  Value *Undef = ConstantInt::get(Builder.getInt1Ty(), false);
8791  Function *F = CGM.getIntrinsic(Intrinsic::cttz, ResultType);
8792  return Builder.CreateCall(F, {X, Undef});
8793  }
8794 
8795  case SystemZ::BI__builtin_s390_vfsqsb:
8796  case SystemZ::BI__builtin_s390_vfsqdb: {
8797  llvm::Type *ResultType = ConvertType(E->getType());
8798  Value *X = EmitScalarExpr(E->getArg(0));
8799  Function *F = CGM.getIntrinsic(Intrinsic::sqrt, ResultType);
8800  return Builder.CreateCall(F, X);
8801  }
8802  case SystemZ::BI__builtin_s390_vfmasb:
8803  case SystemZ::BI__builtin_s390_vfmadb: {
8804  llvm::Type *ResultType = ConvertType(E->getType());
8805  Value *X = EmitScalarExpr(E->getArg(0));
8806  Value *Y = EmitScalarExpr(E->getArg(1));
8807  Value *Z = EmitScalarExpr(E->getArg(2));
8808  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8809  return Builder.CreateCall(F, {X, Y, Z});
8810  }
8811  case SystemZ::BI__builtin_s390_vfmssb:
8812  case SystemZ::BI__builtin_s390_vfmsdb: {
8813  llvm::Type *ResultType = ConvertType(E->getType());
8814  Value *X = EmitScalarExpr(E->getArg(0));
8815  Value *Y = EmitScalarExpr(E->getArg(1));
8816  Value *Z = EmitScalarExpr(E->getArg(2));
8817  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8818  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8819  return Builder.CreateCall(F, {X, Y, Builder.CreateFSub(Zero, Z, "sub")});
8820  }
8821  case SystemZ::BI__builtin_s390_vfnmasb:
8822  case SystemZ::BI__builtin_s390_vfnmadb: {
8823  llvm::Type *ResultType = ConvertType(E->getType());
8824  Value *X = EmitScalarExpr(E->getArg(0));
8825  Value *Y = EmitScalarExpr(E->getArg(1));
8826  Value *Z = EmitScalarExpr(E->getArg(2));
8827  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8828  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8829  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, Z}), "sub");
8830  }
8831  case SystemZ::BI__builtin_s390_vfnmssb:
8832  case SystemZ::BI__builtin_s390_vfnmsdb: {
8833  llvm::Type *ResultType = ConvertType(E->getType());
8834  Value *X = EmitScalarExpr(E->getArg(0));
8835  Value *Y = EmitScalarExpr(E->getArg(1));
8836  Value *Z = EmitScalarExpr(E->getArg(2));
8837  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8838  Function *F = CGM.getIntrinsic(Intrinsic::fma, ResultType);
8839  Value *NegZ = Builder.CreateFSub(Zero, Z, "sub");
8840  return Builder.CreateFSub(Zero, Builder.CreateCall(F, {X, Y, NegZ}));
8841  }
8842  case SystemZ::BI__builtin_s390_vflpsb:
8843  case SystemZ::BI__builtin_s390_vflpdb: {
8844  llvm::Type *ResultType = ConvertType(E->getType());
8845  Value *X = EmitScalarExpr(E->getArg(0));
8846  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8847  return Builder.CreateCall(F, X);
8848  }
8849  case SystemZ::BI__builtin_s390_vflnsb:
8850  case SystemZ::BI__builtin_s390_vflndb: {
8851  llvm::Type *ResultType = ConvertType(E->getType());
8852  Value *X = EmitScalarExpr(E->getArg(0));
8853  Value *Zero = llvm::ConstantFP::getZeroValueForNegation(ResultType);
8854  Function *F = CGM.getIntrinsic(Intrinsic::fabs, ResultType);
8855  return Builder.CreateFSub(Zero, Builder.CreateCall(F, X), "sub");
8856  }
8857  case SystemZ::BI__builtin_s390_vfisb:
8858  case SystemZ::BI__builtin_s390_vfidb: {
8859  llvm::Type *ResultType = ConvertType(E->getType());
8860  Value *X = EmitScalarExpr(E->getArg(0));
8861  // Constant-fold the M4 and M5 mask arguments.
8862  llvm::APSInt M4, M5;
8863  bool IsConstM4 = E->getArg(1)->isIntegerConstantExpr(M4, getContext());
8864  bool IsConstM5 = E->getArg(2)->isIntegerConstantExpr(M5, getContext());
8865  assert(IsConstM4 && IsConstM5 && "Constant arg isn't actually constant?");
8866  (void)IsConstM4; (void)IsConstM5;
8867  // Check whether this instance can be represented via a LLVM standard
8868  // intrinsic. We only support some combinations of M4 and M5.
8869  Intrinsic::ID ID = Intrinsic::not_intrinsic;
8870  switch (M4.getZExtValue()) {
8871  default: break;
8872  case 0: // IEEE-inexact exception allowed
8873  switch (M5.getZExtValue()) {
8874  default: break;
8875  case 0: ID = Intrinsic::rint; break;
8876  }
8877  break;
8878  case 4: // IEEE-inexact exception suppressed
8879  switch (M5.getZExtValue()) {
8880  default: break;
8881  case 0: ID = Intrinsic::nearbyint; break;
8882  case 1: ID = Intrinsic::round; break;
8883  case 5: ID = Intrinsic::trunc; break;
8884  case 6: ID = Intrinsic::ceil; break;
8885  case 7: ID = Intrinsic::floor; break;
8886  }
8887  break;
8888  }
8889  if (ID != Intrinsic::not_intrinsic) {
8890  Function *F = CGM.getIntrinsic(ID, ResultType);
8891  return Builder.CreateCall(F, X);
8892  }
8893  switch (BuiltinID) {
8894  case SystemZ::BI__builtin_s390_vfisb: ID = Intrinsic::s390_vfisb; break;
8895  case SystemZ::BI__builtin_s390_vfidb: ID = Intrinsic::s390_vfidb; break;
8896  default: llvm_unreachable("Unknown BuiltinID");
8897  }
8898  Function *F = CGM.getIntrinsic(ID);
8899  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8900  Value *M5Value = llvm::ConstantInt::get(getLLVMContext(), M5);
8901  return Builder.CreateCall(F, {X, M4Value, M5Value});
8902  }
8903  case SystemZ::BI__builtin_s390_vfmaxsb:
8904  case SystemZ::BI__builtin_s390_vfmaxdb: {
8905  llvm::Type *ResultType = ConvertType(E->getType());
8906  Value *X = EmitScalarExpr(E->getArg(0));
8907  Value *Y = EmitScalarExpr(E->getArg(1));
8908  // Constant-fold the M4 mask argument.
8909  llvm::APSInt M4;
8910  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
8911  assert(IsConstM4 && "Constant arg isn't actually constant?");
8912  (void)IsConstM4;
8913  // Check whether this instance can be represented via a LLVM standard
8914  // intrinsic. We only support some values of M4.
8915  Intrinsic::ID ID = Intrinsic::not_intrinsic;
8916  switch (M4.getZExtValue()) {
8917  default: break;
8918  case 4: ID = Intrinsic::maxnum; break;
8919  }
8920  if (ID != Intrinsic::not_intrinsic) {
8921  Function *F = CGM.getIntrinsic(ID, ResultType);
8922  return Builder.CreateCall(F, {X, Y});
8923  }
8924  switch (BuiltinID) {
8925  case SystemZ::BI__builtin_s390_vfmaxsb: ID = Intrinsic::s390_vfmaxsb; break;
8926  case SystemZ::BI__builtin_s390_vfmaxdb: ID = Intrinsic::s390_vfmaxdb; break;
8927  default: llvm_unreachable("Unknown BuiltinID");
8928  }
8929  Function *F = CGM.getIntrinsic(ID);
8930  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8931  return Builder.CreateCall(F, {X, Y, M4Value});
8932  }
8933  case SystemZ::BI__builtin_s390_vfminsb:
8934  case SystemZ::BI__builtin_s390_vfmindb: {
8935  llvm::Type *ResultType = ConvertType(E->getType());
8936  Value *X = EmitScalarExpr(E->getArg(0));
8937  Value *Y = EmitScalarExpr(E->getArg(1));
8938  // Constant-fold the M4 mask argument.
8939  llvm::APSInt M4;
8940  bool IsConstM4 = E->getArg(2)->isIntegerConstantExpr(M4, getContext());
8941  assert(IsConstM4 && "Constant arg isn't actually constant?");
8942  (void)IsConstM4;
8943  // Check whether this instance can be represented via a LLVM standard
8944  // intrinsic. We only support some values of M4.
8945  Intrinsic::ID ID = Intrinsic::not_intrinsic;
8946  switch (M4.getZExtValue()) {
8947  default: break;
8948  case 4: ID = Intrinsic::minnum; break;
8949  }
8950  if (ID != Intrinsic::not_intrinsic) {
8951  Function *F = CGM.getIntrinsic(ID, ResultType);
8952  return Builder.CreateCall(F, {X, Y});
8953  }
8954  switch (BuiltinID) {
8955  case SystemZ::BI__builtin_s390_vfminsb: ID = Intrinsic::s390_vfminsb; break;
8956  case SystemZ::BI__builtin_s390_vfmindb: ID = Intrinsic::s390_vfmindb; break;
8957  default: llvm_unreachable("Unknown BuiltinID");
8958  }
8959  Function *F = CGM.getIntrinsic(ID);
8960  Value *M4Value = llvm::ConstantInt::get(getLLVMContext(), M4);
8961  return Builder.CreateCall(F, {X, Y, M4Value});
8962  }
8963 
8964  // Vector intrisincs that output the post-instruction CC value.
8965 
8966 #define INTRINSIC_WITH_CC(NAME) \
8967  case SystemZ::BI__builtin_##NAME: \
8968  return EmitSystemZIntrinsicWithCC(*this, Intrinsic::NAME, E)
8969 
8970  INTRINSIC_WITH_CC(s390_vpkshs);
8971  INTRINSIC_WITH_CC(s390_vpksfs);
8972  INTRINSIC_WITH_CC(s390_vpksgs);
8973 
8974  INTRINSIC_WITH_CC(s390_vpklshs);
8975  INTRINSIC_WITH_CC(s390_vpklsfs);
8976  INTRINSIC_WITH_CC(s390_vpklsgs);
8977 
8978  INTRINSIC_WITH_CC(s390_vceqbs);
8979  INTRINSIC_WITH_CC(s390_vceqhs);
8980  INTRINSIC_WITH_CC(s390_vceqfs);
8981  INTRINSIC_WITH_CC(s390_vceqgs);
8982 
8983  INTRINSIC_WITH_CC(s390_vchbs);
8984  INTRINSIC_WITH_CC(s390_vchhs);
8985  INTRINSIC_WITH_CC(s390_vchfs);
8986  INTRINSIC_WITH_CC(s390_vchgs);
8987 
8988  INTRINSIC_WITH_CC(s390_vchlbs);
8989  INTRINSIC_WITH_CC(s390_vchlhs);
8990  INTRINSIC_WITH_CC(s390_vchlfs);
8991  INTRINSIC_WITH_CC(s390_vchlgs);
8992 
8993  INTRINSIC_WITH_CC(s390_vfaebs);
8994  INTRINSIC_WITH_CC(s390_vfaehs);
8995  INTRINSIC_WITH_CC(s390_vfaefs);
8996 
8997  INTRINSIC_WITH_CC(s390_vfaezbs);
8998  INTRINSIC_WITH_CC(s390_vfaezhs);
8999  INTRINSIC_WITH_CC(s390_vfaezfs);
9000 
9001  INTRINSIC_WITH_CC(s390_vfeebs);
9002  INTRINSIC_WITH_CC(s390_vfeehs);
9003  INTRINSIC_WITH_CC(s390_vfeefs);
9004 
9005  INTRINSIC_WITH_CC(s390_vfeezbs);
9006  INTRINSIC_WITH_CC(s390_vfeezhs);
9007  INTRINSIC_WITH_CC(s390_vfeezfs);
9008 
9009  INTRINSIC_WITH_CC(s390_vfenebs);
9010  INTRINSIC_WITH_CC(s390_vfenehs);
9011  INTRINSIC_WITH_CC(s390_vfenefs);
9012 
9013  INTRINSIC_WITH_CC(s390_vfenezbs);
9014  INTRINSIC_WITH_CC(s390_vfenezhs);
9015  INTRINSIC_WITH_CC(s390_vfenezfs);
9016 
9017  INTRINSIC_WITH_CC(s390_vistrbs);
9018  INTRINSIC_WITH_CC(s390_vistrhs);
9019  INTRINSIC_WITH_CC(s390_vistrfs);
9020 
9021  INTRINSIC_WITH_CC(s390_vstrcbs);
9022  INTRINSIC_WITH_CC(s390_vstrchs);
9023  INTRINSIC_WITH_CC(s390_vstrcfs);
9024 
9025  INTRINSIC_WITH_CC(s390_vstrczbs);
9026  INTRINSIC_WITH_CC(s390_vstrczhs);
9027  INTRINSIC_WITH_CC(s390_vstrczfs);
9028 
9029  INTRINSIC_WITH_CC(s390_vfcesbs);
9030  INTRINSIC_WITH_CC(s390_vfcedbs);
9031  INTRINSIC_WITH_CC(s390_vfchsbs);
9032  INTRINSIC_WITH_CC(s390_vfchdbs);
9033  INTRINSIC_WITH_CC(s390_vfchesbs);
9034  INTRINSIC_WITH_CC(s390_vfchedbs);
9035 
9036  INTRINSIC_WITH_CC(s390_vftcisb);
9037  INTRINSIC_WITH_CC(s390_vftcidb);
9038 
9039 #undef INTRINSIC_WITH_CC
9040 
9041  default:
9042  return nullptr;
9043  }
9044 }
9045 
9047  const CallExpr *E) {
9048  auto MakeLdg = [&](unsigned IntrinsicID) {
9049  Value *Ptr = EmitScalarExpr(E->getArg(0));
9050  clang::CharUnits Align =
9052  return Builder.CreateCall(
9053  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9054  Ptr->getType()}),
9055  {Ptr, ConstantInt::get(Builder.getInt32Ty(), Align.getQuantity())});
9056  };
9057  auto MakeScopedAtomic = [&](unsigned IntrinsicID) {
9058  Value *Ptr = EmitScalarExpr(E->getArg(0));
9059  return Builder.CreateCall(
9060  CGM.getIntrinsic(IntrinsicID, {Ptr->getType()->getPointerElementType(),
9061  Ptr->getType()}),
9062  {Ptr, EmitScalarExpr(E->getArg(1))});
9063  };
9064  switch (BuiltinID) {
9065  case NVPTX::BI__nvvm_atom_add_gen_i:
9066  case NVPTX::BI__nvvm_atom_add_gen_l:
9067  case NVPTX::BI__nvvm_atom_add_gen_ll:
9068  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Add, E);
9069 
9070  case NVPTX::BI__nvvm_atom_sub_gen_i:
9071  case NVPTX::BI__nvvm_atom_sub_gen_l:
9072  case NVPTX::BI__nvvm_atom_sub_gen_ll:
9073  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Sub, E);
9074 
9075  case NVPTX::BI__nvvm_atom_and_gen_i:
9076  case NVPTX::BI__nvvm_atom_and_gen_l:
9077  case NVPTX::BI__nvvm_atom_and_gen_ll:
9079 
9080  case NVPTX::BI__nvvm_atom_or_gen_i:
9081  case NVPTX::BI__nvvm_atom_or_gen_l:
9082  case NVPTX::BI__nvvm_atom_or_gen_ll:
9083  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Or, E);
9084 
9085  case NVPTX::BI__nvvm_atom_xor_gen_i:
9086  case NVPTX::BI__nvvm_atom_xor_gen_l:
9087  case NVPTX::BI__nvvm_atom_xor_gen_ll:
9088  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xor, E);
9089 
9090  case NVPTX::BI__nvvm_atom_xchg_gen_i:
9091  case NVPTX::BI__nvvm_atom_xchg_gen_l:
9092  case NVPTX::BI__nvvm_atom_xchg_gen_ll:
9093  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Xchg, E);
9094 
9095  case NVPTX::BI__nvvm_atom_max_gen_i:
9096  case NVPTX::BI__nvvm_atom_max_gen_l:
9097  case NVPTX::BI__nvvm_atom_max_gen_ll:
9098  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Max, E);
9099 
9100  case NVPTX::BI__nvvm_atom_max_gen_ui:
9101  case NVPTX::BI__nvvm_atom_max_gen_ul:
9102  case NVPTX::BI__nvvm_atom_max_gen_ull:
9103  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMax, E);
9104 
9105  case NVPTX::BI__nvvm_atom_min_gen_i:
9106  case NVPTX::BI__nvvm_atom_min_gen_l:
9107  case NVPTX::BI__nvvm_atom_min_gen_ll:
9108  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::Min, E);
9109 
9110  case NVPTX::BI__nvvm_atom_min_gen_ui:
9111  case NVPTX::BI__nvvm_atom_min_gen_ul:
9112  case NVPTX::BI__nvvm_atom_min_gen_ull:
9113  return MakeBinaryAtomicValue(*this, llvm::AtomicRMWInst::UMin, E);
9114 
9115  case NVPTX::BI__nvvm_atom_cas_gen_i:
9116  case NVPTX::BI__nvvm_atom_cas_gen_l:
9117  case NVPTX::BI__nvvm_atom_cas_gen_ll:
9118  // __nvvm_atom_cas_gen_* should return the old value rather than the
9119  // success flag.
9120  return MakeAtomicCmpXchgValue(*this, E, /*ReturnBool=*/false);
9121 
9122  case NVPTX::BI__nvvm_atom_add_gen_f: {
9123  Value *Ptr = EmitScalarExpr(E->getArg(0));
9124  Value *Val = EmitScalarExpr(E->getArg(1));
9125  // atomicrmw only deals with integer arguments so we need to use
9126  // LLVM's nvvm_atomic_load_add_f32 intrinsic for that.
9127  Value *FnALAF32 =
9128  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_add_f32, Ptr->getType());
9129  return Builder.CreateCall(FnALAF32, {Ptr, Val});
9130  }
9131 
9132  case NVPTX::BI__nvvm_atom_inc_gen_ui: {
9133  Value *Ptr = EmitScalarExpr(E->getArg(0));
9134  Value *Val = EmitScalarExpr(E->getArg(1));
9135  Value *FnALI32 =
9136  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_inc_32, Ptr->getType());
9137  return Builder.CreateCall(FnALI32, {Ptr, Val});
9138  }
9139 
9140  case NVPTX::BI__nvvm_atom_dec_gen_ui: {
9141  Value *Ptr = EmitScalarExpr(E->getArg(0));
9142  Value *Val = EmitScalarExpr(E->getArg(1));
9143  Value *FnALD32 =
9144  CGM.getIntrinsic(Intrinsic::nvvm_atomic_load_dec_32, Ptr->getType());
9145  return Builder.CreateCall(FnALD32, {Ptr, Val});
9146  }
9147 
9148  case NVPTX::BI__nvvm_ldg_c:
9149  case NVPTX::BI__nvvm_ldg_c2:
9150  case NVPTX::BI__nvvm_ldg_c4:
9151  case NVPTX::BI__nvvm_ldg_s:
9152  case NVPTX::BI__nvvm_ldg_s2:
9153  case NVPTX::BI__nvvm_ldg_s4:
9154  case NVPTX::BI__nvvm_ldg_i:
9155  case NVPTX::BI__nvvm_ldg_i2:
9156  case NVPTX::BI__nvvm_ldg_i4:
9157  case NVPTX::BI__nvvm_ldg_l:
9158  case NVPTX::BI__nvvm_ldg_ll:
9159  case NVPTX::BI__nvvm_ldg_ll2:
9160  case NVPTX::BI__nvvm_ldg_uc:
9161  case NVPTX::BI__nvvm_ldg_uc2:
9162  case NVPTX::BI__nvvm_ldg_uc4:
9163  case NVPTX::BI__nvvm_ldg_us:
9164  case NVPTX::BI__nvvm_ldg_us2:
9165  case NVPTX::BI__nvvm_ldg_us4:
9166  case NVPTX::BI__nvvm_ldg_ui:
9167  case NVPTX::BI__nvvm_ldg_ui2:
9168  case NVPTX::BI__nvvm_ldg_ui4:
9169  case NVPTX::BI__nvvm_ldg_ul:
9170  case NVPTX::BI__nvvm_ldg_ull:
9171  case NVPTX::BI__nvvm_ldg_ull2:
9172  // PTX Interoperability section 2.2: "For a vector with an even number of
9173  // elements, its alignment is set to number of elements times the alignment
9174  // of its member: n*alignof(t)."
9175  return MakeLdg(Intrinsic::nvvm_ldg_global_i);
9176  case NVPTX::BI__nvvm_ldg_f:
9177  case NVPTX::BI__nvvm_ldg_f2:
9178  case NVPTX::BI__nvvm_ldg_f4:
9179  case NVPTX::BI__nvvm_ldg_d:
9180  case NVPTX::BI__nvvm_ldg_d2:
9181  return MakeLdg(Intrinsic::nvvm_ldg_global_f);
9182 
9183  case NVPTX::BI__nvvm_atom_cta_add_gen_i:
9184  case NVPTX::BI__nvvm_atom_cta_add_gen_l:
9185  case NVPTX::BI__nvvm_atom_cta_add_gen_ll:
9186  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_cta);
9187  case NVPTX::BI__nvvm_atom_sys_add_gen_i:
9188  case NVPTX::BI__nvvm_atom_sys_add_gen_l:
9189  case NVPTX::BI__nvvm_atom_sys_add_gen_ll:
9190  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_i_sys);
9191  case NVPTX::BI__nvvm_atom_cta_add_gen_f:
9192  case NVPTX::BI__nvvm_atom_cta_add_gen_d:
9193  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_cta);
9194  case NVPTX::BI__nvvm_atom_sys_add_gen_f:
9195  case NVPTX::BI__nvvm_atom_sys_add_gen_d:
9196  return MakeScopedAtomic(Intrinsic::nvvm_atomic_add_gen_f_sys);
9197  case NVPTX::BI__nvvm_atom_cta_xchg_gen_i:
9198  case NVPTX::BI__nvvm_atom_cta_xchg_gen_l:
9199  case NVPTX::BI__nvvm_atom_cta_xchg_gen_ll:
9200  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_cta);
9201  case NVPTX::BI__nvvm_atom_sys_xchg_gen_i:
9202  case NVPTX::BI__nvvm_atom_sys_xchg_gen_l:
9203  case NVPTX::BI__nvvm_atom_sys_xchg_gen_ll:
9204  return MakeScopedAtomic(Intrinsic::nvvm_atomic_exch_gen_i_sys);
9205  case NVPTX::BI__nvvm_atom_cta_max_gen_i:
9206  case NVPTX::BI__nvvm_atom_cta_max_gen_ui:
9207  case NVPTX::BI__nvvm_atom_cta_max_gen_l:
9208  case NVPTX::BI__nvvm_atom_cta_max_gen_ul:
9209  case NVPTX::BI__nvvm_atom_cta_max_gen_ll:
9210  case NVPTX::BI__nvvm_atom_cta_max_gen_ull:
9211  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_cta);
9212  case NVPTX::BI__nvvm_atom_sys_max_gen_i:
9213  case NVPTX::BI__nvvm_atom_sys_max_gen_ui:
9214  case NVPTX::BI__nvvm_atom_sys_max_gen_l:
9215  case NVPTX::BI__nvvm_atom_sys_max_gen_ul:
9216  case NVPTX::BI__nvvm_atom_sys_max_gen_ll:
9217  case NVPTX::BI__nvvm_atom_sys_max_gen_ull:
9218  return MakeScopedAtomic(Intrinsic::nvvm_atomic_max_gen_i_sys);
9219  case NVPTX::BI__nvvm_atom_cta_min_gen_i:
9220  case NVPTX::BI__nvvm_atom_cta_min_gen_ui:
9221  case NVPTX::BI__nvvm_atom_cta_min_gen_l:
9222  case NVPTX::BI__nvvm_atom_cta_min_gen_ul:
9223  case NVPTX::BI__nvvm_atom_cta_min_gen_ll:
9224  case NVPTX::BI__nvvm_atom_cta_min_gen_ull:
9225  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_cta);
9226  case NVPTX::BI__nvvm_atom_sys_min_gen_i:
9227  case NVPTX::BI__nvvm_atom_sys_min_gen_ui:
9228  case NVPTX::BI__nvvm_atom_sys_min_gen_l:
9229  case NVPTX::BI__nvvm_atom_sys_min_gen_ul:
9230  case NVPTX::BI__nvvm_atom_sys_min_gen_ll:
9231  case NVPTX::BI__nvvm_atom_sys_min_gen_ull:
9232  return MakeScopedAtomic(Intrinsic::nvvm_atomic_min_gen_i_sys);
9233  case NVPTX::BI__nvvm_atom_cta_inc_gen_ui:
9234  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_cta);
9235  case NVPTX::BI__nvvm_atom_cta_dec_gen_ui:
9236  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_cta);
9237  case NVPTX::BI__nvvm_atom_sys_inc_gen_ui:
9238  return MakeScopedAtomic(Intrinsic::nvvm_atomic_inc_gen_i_sys);
9239  case NVPTX::BI__nvvm_atom_sys_dec_gen_ui:
9240  return MakeScopedAtomic(Intrinsic::nvvm_atomic_dec_gen_i_sys);
9241  case NVPTX::BI__nvvm_atom_cta_and_gen_i:
9242  case NVPTX::BI__nvvm_atom_cta_and_gen_l:
9243  case NVPTX::BI__nvvm_atom_cta_and_gen_ll:
9244  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_cta);
9245  case NVPTX::BI__nvvm_atom_sys_and_gen_i:
9246  case NVPTX::BI__nvvm_atom_sys_and_gen_l:
9247  case NVPTX::BI__nvvm_atom_sys_and_gen_ll:
9248  return MakeScopedAtomic(Intrinsic::nvvm_atomic_and_gen_i_sys);
9249  case NVPTX::BI__nvvm_atom_cta_or_gen_i:
9250  case NVPTX::BI__nvvm_atom_cta_or_gen_l:
9251  case NVPTX::BI__nvvm_atom_cta_or_gen_ll:
9252  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_cta);
9253  case NVPTX::BI__nvvm_atom_sys_or_gen_i:
9254  case NVPTX::BI__nvvm_atom_sys_or_gen_l:
9255  case NVPTX::BI__nvvm_atom_sys_or_gen_ll:
9256  return MakeScopedAtomic(Intrinsic::nvvm_atomic_or_gen_i_sys);
9257  case NVPTX::BI__nvvm_atom_cta_xor_gen_i:
9258  case NVPTX::BI__nvvm_atom_cta_xor_gen_l:
9259  case NVPTX::BI__nvvm_atom_cta_xor_gen_ll:
9260  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_cta);
9261  case NVPTX::BI__nvvm_atom_sys_xor_gen_i:
9262  case NVPTX::BI__nvvm_atom_sys_xor_gen_l:
9263  case NVPTX::BI__nvvm_atom_sys_xor_gen_ll:
9264  return MakeScopedAtomic(Intrinsic::nvvm_atomic_xor_gen_i_sys);
9265  case NVPTX::BI__nvvm_atom_cta_cas_gen_i:
9266  case NVPTX::BI__nvvm_atom_cta_cas_gen_l:
9267  case NVPTX::BI__nvvm_atom_cta_cas_gen_ll: {
9268  Value *Ptr = EmitScalarExpr(E->getArg(0));
9269  return Builder.CreateCall(
9270  CGM.getIntrinsic(
9271  Intrinsic::nvvm_atomic_cas_gen_i_cta,
9272  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9273  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9274  }
9275  case NVPTX::BI__nvvm_atom_sys_cas_gen_i:
9276  case NVPTX::BI__nvvm_atom_sys_cas_gen_l:
9277  case NVPTX::BI__nvvm_atom_sys_cas_gen_ll: {
9278  Value *Ptr = EmitScalarExpr(E->getArg(0));
9279  return Builder.CreateCall(
9280  CGM.getIntrinsic(
9281  Intrinsic::nvvm_atomic_cas_gen_i_sys,
9282  {Ptr->getType()->getPointerElementType(), Ptr->getType()}),
9283  {Ptr, EmitScalarExpr(E->getArg(1)), EmitScalarExpr(E->getArg(2))});
9284  }
9285  default:
9286  return nullptr;
9287  }
9288 }
9289 
9291  const CallExpr *E) {
9292  switch (BuiltinID) {
9293  case WebAssembly::BI__builtin_wasm_current_memory: {
9294  llvm::Type *ResultType = ConvertType(E->getType());
9295  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_current_memory, ResultType);
9296  return Builder.CreateCall(Callee);
9297  }
9298  case WebAssembly::BI__builtin_wasm_grow_memory: {
9299  Value *X = EmitScalarExpr(E->getArg(0));
9300  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_grow_memory, X->getType());
9301  return Builder.CreateCall(Callee, X);
9302  }
9303  case WebAssembly::BI__builtin_wasm_throw: {
9304  Value *Tag = EmitScalarExpr(E->getArg(0));
9305  Value *Obj = EmitScalarExpr(E->getArg(1));
9306  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_throw);
9307  return Builder.CreateCall(Callee, {Tag, Obj});
9308  }
9309  case WebAssembly::BI__builtin_wasm_rethrow: {
9310  Value *Callee = CGM.getIntrinsic(Intrinsic::wasm_rethrow);
9311  return Builder.CreateCall(Callee);
9312  }
9313 
9314  default:
9315  return nullptr;
9316  }
9317 }
unsigned getAddressSpace() const
Return the address space of this type.
Definition: Type.h:5605
ReturnValueSlot - Contains the address where the return value of a function can be stored...
Definition: CGCall.h:281
Defines the clang::ASTContext interface.
llvm::StoreInst * CreateDefaultAlignedStore(llvm::Value *Val, llvm::Value *Addr, bool IsVolatile=false)
Definition: CGBuilder.h:122
static Value * emitFPIntBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:263
FunctionDecl - An instance of this class is created to represent a function declaration or definition...
Definition: Decl.h:1618
llvm::Value * EmitAArch64CompareBuiltinExpr(llvm::Value *Op, llvm::Type *Ty, const llvm::CmpInst::Predicate Fp, const llvm::CmpInst::Predicate Ip, const llvm::Twine &Name="")
Definition: CGBuiltin.cpp:4213
PointerType - C99 6.7.5.1 - Pointer Declarators.
Definition: Type.h:2224
A (possibly-)qualified type.
Definition: Type.h:616
#define fma(__x, __y, __z)
Definition: tgmath.h:758
static WidthAndSignedness getIntegerWidthAndSignedness(const clang::ASTContext &context, const clang::QualType Type)
Definition: CGBuiltin.cpp:361
Expr * getArg(unsigned Arg)
getArg - Return the specified argument.
Definition: Expr.h:2275
static Value * emitBinaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:240
llvm::Module & getModule() const
static struct WidthAndSignedness EncompassingIntegerType(ArrayRef< struct WidthAndSignedness > Types)
Definition: CGBuiltin.cpp:373
llvm::Value * EmitNVPTXBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:9046
llvm::LLVMContext & getLLVMContext()
const TargetInfo & getTarget() const
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.
#define trunc(__x)
Definition: tgmath.h:1232
static const Builtin::Info BuiltinInfo[]
Definition: Builtins.cpp:21
QuantityType getQuantity() const
getQuantity - Get the raw integer representation of this quantity.
Definition: CharUnits.h:179
static Value * EmitX86MaskedCompare(CodeGenFunction &CGF, unsigned CC, bool Signed, SmallVectorImpl< Value * > &Ops)
Definition: CGBuiltin.cpp:7189
static Value * getMaskVecValue(CodeGenFunction &CGF, Value *Mask, unsigned NumElts)
Definition: CGBuiltin.cpp:7101
llvm::Value * getTypeSize(QualType Ty)
Returns calculated size of the specified type.
StringRef P
llvm::Type * FloatTy
float, double
llvm::Value * EmitMSVCBuiltinExpr(MSVCIntrin BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:499
static Value * EmitToInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::IntegerType *IntType)
Emit the conversions required to turn the given value into an integer of the given size...
Definition: CGBuiltin.cpp:69
const llvm::DataLayout & getDataLayout() const
const void * Store
Store - This opaque type encapsulates an immutable mapping from locations to values.
Definition: StoreRef.h:26
static Value * EmitSpecialRegisterBuiltin(CodeGenFunction &CGF, const CallExpr *E, llvm::Type *RegisterType, llvm::Type *ValueType, bool IsRead, StringRef SysReg="")
Definition: CGBuiltin.cpp:4310
The base class of the type hierarchy.
Definition: Type.h:1303
bool isLittleEndian() const
Definition: TargetInfo.h:989
bool isBooleanType() const
Definition: Type.h:5969
#define NEONMAP1(NameBase, LLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:3118
bool isBlockPointerType() const
Definition: Type.h:5718
llvm::IntegerType * Int8Ty
i8, i16, i32, and i64
bool HasSideEffects(const ASTContext &Ctx, bool IncludePossibleEffects=true) const
HasSideEffects - This routine returns true for all those expressions which have any effect other than...
Definition: Expr.cpp:2876
llvm::Type * getElementType() const
Return the type of the values stored in this address.
Definition: Address.h:52
const Expr * getCallee() const
Definition: Expr.h:2246
static Value * EmitSystemZIntrinsicWithCC(CodeGenFunction &CGF, unsigned IntrinsicID, const CallExpr *E)
Handle a SystemZ function in which the final argument is a pointer to an int that receives the post-i...
Definition: CGBuiltin.cpp:8710
uint64_t getTypeSize(QualType T) const
Return the size of the specified (complete) type T, in bits.
Definition: ASTContext.h:1924
static int64_t clamp(int64_t Value, int64_t Low, int64_t High)
Definition: CGBuiltin.cpp:39
llvm::CallInst * EmitTrapCall(llvm::Intrinsic::ID IntrID)
Emit a call to trap or debugtrap and attach function attribute "trap-func-name" if specified...
Definition: CGExpr.cpp:2995
llvm::Value * EmitARMBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:4407
ParmVarDecl - Represents a parameter to a function.
Definition: Decl.h:1434
static bool HasExtraNeonArgument(unsigned BuiltinID)
Return true if BuiltinID is an overloaded Neon intrinsic with an extra argument that specifies the ve...
Definition: CGBuiltin.cpp:4373
bool isVoidType() const
Definition: Type.h:5906
The collection of all-type qualifiers we support.
Definition: Type.h:118
Expr * IgnoreImpCasts() LLVM_READONLY
IgnoreImpCasts - Skip past any implicit casts which might surround this expression.
Definition: Expr.h:2847
llvm::Value * EmitCommonNeonBuiltinExpr(unsigned BuiltinID, unsigned LLVMIntrinsic, unsigned AltLLVMIntrinsic, const char *NameHint, unsigned Modifier, const CallExpr *E, SmallVectorImpl< llvm::Value * > &Ops, Address PtrOp0, Address PtrOp1)
Definition: CGBuiltin.cpp:3786
void __ovld prefetch(const __global char *p, size_t num_elements)
Prefetch num_elements * sizeof(gentype) bytes into the global cache.
#define pow(__x, __y)
Definition: tgmath.h:506
bool hasAttr() const
Definition: DeclBase.h:521
CodeGenFunction - This class organizes the per-function state that is used while generating LLVM code...
llvm::Type * ConvertType(QualType T)
ConvertType - Convert type T into a llvm::Type.
static llvm::VectorType * GetNeonType(CodeGenFunction *CGF, NeonTypeFlags TypeFlags, bool V1Ty=false)
Definition: CGBuiltin.cpp:2975
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:128
static bool NEONSIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:3658
static RValue EmitBinaryAtomicPost(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E, Instruction::BinaryOps Op, bool Invert=false)
Utility to insert an atomic instruction based Instrinsic::ID and the expression node, where the return value is the result of the operation.
Definition: CGBuiltin.cpp:152
bool hasSameUnqualifiedType(QualType T1, QualType T2) const
Determine whether the given types are equivalent after cvr-qualifiers have been removed.
Definition: ASTContext.h:2128
static const NeonIntrinsicInfo AArch64SIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:3344
static Value * MakeBinaryAtomicValue(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Utility to insert an atomic instruction based on Instrinsic::ID and the expression node...
Definition: CGBuiltin.cpp:93
T * getAttr() const
Definition: DeclBase.h:518
llvm::Value * EmitLoadOfScalar(Address Addr, bool Volatile, QualType Ty, SourceLocation Loc, LValueBaseInfo BaseInfo=LValueBaseInfo(AlignmentSource::Type), llvm::MDNode *TBAAInfo=nullptr, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitLoadOfScalar - Load a scalar value from an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1437
llvm::Value * BuildVector(ArrayRef< llvm::Value * > Ops)
Definition: CGBuiltin.cpp:7075
CharUnits getNaturalPointeeTypeAlignment(QualType T, LValueBaseInfo *BaseInfo=nullptr)
Address CreateElementBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Cast the element type of the given address to a different type, preserving information like the align...
Definition: CGBuilder.h:150
CharUnits - This is an opaque type for sizes expressed in character units.
Definition: CharUnits.h:38
static const NeonIntrinsicInfo AArch64SISDIntrinsicMap[]
Definition: CGBuiltin.cpp:3459
APValue Val
Val - This is the value the expression can be folded to.
Definition: Expr.h:570
uint32_t Offset
Definition: CacheTokens.cpp:43
#define INTRINSIC_WITH_CC(NAME)
QualType GetBuiltinType(unsigned ID, GetBuiltinTypeError &Error, unsigned *IntegerConstantArgs=nullptr) const
Return the type for the specified builtin.
bool isQuad() const
bool isUnsigned() const
static bool AArch64SISDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:3661
Expr * IgnoreParenCasts() LLVM_READONLY
IgnoreParenCasts - Ignore parentheses and casts.
Definition: Expr.cpp:2399
llvm::Constant * CreateRuntimeVariable(llvm::Type *Ty, StringRef Name)
Create a new runtime global variable with the specified type and name.
CharUnits getTypeSizeInChars(QualType T) const
Return the size of the specified (complete) type T, in characters.
RValue EmitBuiltinExpr(const FunctionDecl *FD, unsigned BuiltinID, const CallExpr *E, ReturnValueSlot ReturnValue)
Definition: CGBuiltin.cpp:644
bool computeOSLogBufferLayout(clang::ASTContext &Ctx, const clang::CallExpr *E, OSLogBufferLayout &layout)
Definition: OSLog.cpp:171
detail::InMemoryDirectory::const_iterator I
QualType getType() const
Definition: Decl.h:589
llvm::Constant * CreateRuntimeFunction(llvm::FunctionType *Ty, StringRef Name, llvm::AttributeList ExtraAttrs=llvm::AttributeList(), bool Local=false)
Create a new runtime function with the specified type and name.
std::pair< llvm::Value *, llvm::Value * > ComplexPairTy
Represents a prototype with parameter type info, e.g.
Definition: Type.h:3129
llvm::CallInst * EmitNounwindRuntimeCall(llvm::Value *callee, const Twine &name="")
#define NEONMAP0(NameBase)
Definition: CGBuiltin.cpp:3115
void EmitAnyExprToMem(const Expr *E, Address Location, Qualifiers Quals, bool IsInitializer)
EmitAnyExprToMem - Emits the code necessary to evaluate an arbitrary expression into the given memory...
Definition: CGExpr.cpp:198
bool EvaluateAsRValue(EvalResult &Result, const ASTContext &Ctx) const
EvaluateAsRValue - Return true if this is a constant which we can fold to an rvalue using any crazy t...
RValue - This trivial value class is used to represent the result of an expression that is evaluated...
Definition: CGValue.h:38
ASTContext * Context
const SmallVectorImpl< AnnotatedLine * >::const_iterator End
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:414
llvm::Value * EmitVAStartEnd(llvm::Value *ArgValue, bool IsStart)
Emits a call to an LLVM variable-argument intrinsic, either llvm.va_start or llvm.va_end.
Definition: CGBuiltin.cpp:397
Exposes information about the current target.
Definition: TargetInfo.h:54
int * Depth
llvm::Value * getPointer() const
Definition: Address.h:38
#define copysign(__x, __y)
Definition: tgmath.h:634
Expr - This represents one expression.
Definition: Expr.h:105
const char * getName(unsigned ID) const
Return the identifier name for the specified builtin, e.g.
Definition: Builtins.h:83
static Address invalid()
Definition: Address.h:35
static RValue emitLibraryCall(CodeGenFunction &CGF, const FunctionDecl *FD, const CallExpr *E, llvm::Constant *calleeValue)
Definition: CGBuiltin.cpp:312
static CGCallee forDirect(llvm::Constant *functionPtr, const CGCalleeInfo &abstractInfo=CGCalleeInfo())
Definition: CGCall.h:125
#define INTRINSIC_X86_XSAVE_ID(NAME)
static Value * EmitX86Select(CodeGenFunction &CGF, Value *Mask, Value *Op0, Value *Op1)
Definition: CGBuiltin.cpp:7176
ASTContext & getContext() const
llvm::Value * EmitToMemory(llvm::Value *Value, QualType Ty)
EmitToMemory - Change a scalar value from its value representation to its in-memory representation...
Definition: CGExpr.cpp:1502
static Value * emitUnaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:230
SourceLocation Begin
static CharUnits fromQuantity(QuantityType Quantity)
fromQuantity - Construct a CharUnits quantity from a raw integer type.
Definition: CharUnits.h:63
void add(RValue rvalue, QualType type, bool needscopy=false)
Definition: CGCall.h:207
static Value * EmitFromInt(CodeGenFunction &CGF, llvm::Value *V, QualType T, llvm::Type *ResultType)
Definition: CGBuiltin.cpp:80
void EmitStoreOfScalar(llvm::Value *Value, Address Addr, bool Volatile, QualType Ty, LValueBaseInfo BaseInfo=LValueBaseInfo(AlignmentSource::Type), llvm::MDNode *TBAAInfo=nullptr, bool isInit=false, QualType TBAABaseTy=QualType(), uint64_t TBAAOffset=0, bool isNontemporal=false)
EmitStoreOfScalar - Store a scalar value to an address, taking care to appropriately convert from the...
Definition: CGExpr.cpp:1527
char __ovld __cnfn min(char x, char y)
Returns y if y < x, otherwise it returns x.
static SVal getValue(SVal val, SValBuilder &svalBuilder)
llvm::LLVMContext & getLLVMContext()
static Value * EmitX86MaskedStore(CodeGenFunction &CGF, SmallVectorImpl< Value * > &Ops, unsigned Align)
Definition: CGBuiltin.cpp:7121
static Value * emitRangedBuiltin(CodeGenFunction &CGF, unsigned IntrinsicID, int low, int high)
Definition: CGBuiltin.cpp:342
static const NeonIntrinsicInfo ARMSIMDIntrinsicMap[]
Definition: CGBuiltin.cpp:3127
bool EvaluateAsInt(llvm::APSInt &Result, const ASTContext &Ctx, SideEffectsKind AllowSideEffects=SE_NoSideEffects) const
EvaluateAsInt - Return true if this is a constant which we can fold and convert to an integer...
LValue MakeNaturalAlignAddrLValue(llvm::Value *V, QualType T)
Address EmitPointerWithAlignment(const Expr *Addr, LValueBaseInfo *BaseInfo=nullptr)
EmitPointerWithAlignment - Given an expression with a pointer type, emit the value and compute our be...
Definition: CGExpr.cpp:896
static bool areBOSTypesCompatible(int From, int To)
Checks if using the result of __builtin_object_size(p, From) in place of __builtin_object_size(p, To) is correct.
Definition: CGBuiltin.cpp:409
llvm::CallInst * CreateMemSet(Address Dest, llvm::Value *Value, llvm::Value *Size, bool IsVolatile=false)
Definition: CGBuilder.h:274
llvm::Function * getIntrinsic(unsigned IID, ArrayRef< llvm::Type * > Tys=None)
static const NeonIntrinsicInfo * findNeonIntrinsicInMap(ArrayRef< NeonIntrinsicInfo > IntrinsicMap, unsigned BuiltinID, bool &MapProvenSorted)
Definition: CGBuiltin.cpp:3665
static Value * EmitNontemporalLoad(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:135
llvm::Value * EmitX86BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:7251
GlobalDecl - represents a global declaration.
Definition: GlobalDecl.h:29
static Value * EmitX86MaskedLoad(CodeGenFunction &CGF, SmallVectorImpl< Value * > &Ops, unsigned Align)
Definition: CGBuiltin.cpp:7139
llvm::Value * EmitAMDGPUBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:8557
The l-value was considered opaque, so the alignment was determined from a type.
llvm::Value * EmitPPCBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:8127
static llvm::Value * EmitOverflowIntrinsic(CodeGenFunction &CGF, const llvm::Intrinsic::ID IntrinsicID, llvm::Value *X, llvm::Value *Y, llvm::Value *&Carry)
Emit a call to llvm.
Definition: CGBuiltin.cpp:327
Enumerates target-specific builtins in their own namespaces within namespace clang.
Address CreateBitCast(Address Addr, llvm::Type *Ty, const llvm::Twine &Name="")
Definition: CGBuilder.h:142
TypeInfo getTypeInfo(const Type *T) const
Get the size and alignment of the specified complete type in bits.
CharUnits getTypeAlignInChars(QualType T) const
Return the ABI-specified alignment of a (complete) type T, in characters.
MSVCIntrin
Definition: CGBuiltin.cpp:484
Kind
static Value * packTBLDVectorList(CodeGenFunction &CGF, ArrayRef< Value * > Ops, Value *ExtOp, Value *IndexOp, llvm::Type *ResTy, unsigned IntID, const char *Name)
Definition: CGBuiltin.cpp:4234
ASTContext & getContext() const
void EmitARCIntrinsicUse(ArrayRef< llvm::Value * > values)
Given a number of pointers, inform the optimizer that they're being intrinsically used up until this ...
Definition: CGObjC.cpp:1805
llvm::Value * EmitFromMemory(llvm::Value *Value, QualType Ty)
EmitFromMemory - Change a scalar value from its memory representation to its value representation...
Definition: CGExpr.cpp:1516
bool hasSideEffects() const
Definition: Expr.h:562
static llvm::VectorType * GetFloatNeonType(CodeGenFunction *CGF, NeonTypeFlags IntTypeFlags)
Definition: CGBuiltin.cpp:3005
const std::string ID
#define rint(__x)
Definition: tgmath.h:1147
static Value * EmitNontemporalStore(CodeGenFunction &CGF, const CallExpr *E)
Definition: CGBuiltin.cpp:121
llvm::APSInt EvaluateKnownConstInt(const ASTContext &Ctx, SmallVectorImpl< PartialDiagnosticAt > *Diag=nullptr) const
EvaluateKnownConstInt - Call EvaluateAsRValue and return the folded integer.
APFloat & getFloat()
Definition: APValue.h:209
llvm::Value * EmitAArch64BuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:5301
static Value * MakeAtomicCmpXchgValue(CodeGenFunction &CGF, const CallExpr *E, bool ReturnBool)
Utility to insert an atomic cmpxchg instruction.
Definition: CGBuiltin.cpp:198
#define round(__x)
Definition: tgmath.h:1164
OpenMPLinearClauseKind Modifier
Modifier of 'linear' clause.
Definition: OpenMPClause.h:86
bool isIntegerConstantExpr(llvm::APSInt &Result, const ASTContext &Ctx, SourceLocation *Loc=nullptr, bool isEvaluated=true) const
isIntegerConstantExpr - Return true if this expression is a valid integer constant expression...
llvm::Value * EmitNeonCall(llvm::Function *F, SmallVectorImpl< llvm::Value * > &O, const char *name, unsigned shift=0, bool rightshift=false)
Definition: CGBuiltin.cpp:3024
EltType getEltType() const
An aligned address.
Definition: Address.h:25
const T * castAs() const
Member-template castAs<specific type>.
Definition: Type.h:6105
All available information about a concrete callee.
Definition: CGCall.h:66
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
static Value * EmitX86SExtMask(CodeGenFunction &CGF, Value *Op, llvm::Type *DstTy)
Definition: CGBuiltin.cpp:7244
bool isVolatileQualified() const
Determine whether this type is volatile-qualified.
Definition: Type.h:5559
char __ovld __cnfn rotate(char v, char i)
For each element in v, the bits are shifted left by the number of bits given by the corresponding ele...
SourceLocation getExprLoc() const LLVM_READONLY
getExprLoc - Return the preferred location for the arrow when diagnosing a problem with a generic exp...
Definition: Expr.cpp:216
static Value * EmitFAbs(CodeGenFunction &CGF, Value *V)
EmitFAbs - Emit a call to .fabs().
Definition: CGBuiltin.cpp:274
building frameworks.
llvm::Value * EmitScalarExpr(const Expr *E, bool IgnoreResultAssign=false)
EmitScalarExpr - Emit the computation of the specified expression of LLVM scalar type, returning the result.
QualType getCallReturnType(const ASTContext &Ctx) const
getCallReturnType - Get the return type of the call expr.
Definition: Expr.cpp:1307
ast_type_traits::DynTypedNode Node
QualType getType() const
Definition: Expr.h:127
CGFunctionInfo - Class to encapsulate the information about a function definition.
llvm::Value * EmitNeonRShiftImm(llvm::Value *Vec, llvm::Value *Amt, llvm::Type *Ty, bool usgn, const char *name)
Definition: CGBuiltin.cpp:3045
CharUnits getAlignment() const
Return the alignment of this pointer.
Definition: Address.h:67
This class organizes the cross-function state that is used while generating LLVM code.
#define NEONMAP2(NameBase, LLVMIntrinsic, AltLLVMIntrinsic, TypeModifier)
Definition: CGBuiltin.cpp:3122
bool tryEvaluateObjectSize(uint64_t &Result, ASTContext &Ctx, unsigned Type) const
If the current Expr is a pointer, this will try to statically determine the number of bytes available...
static Value * EmitX86SubVectorBroadcast(CodeGenFunction &CGF, SmallVectorImpl< Value * > &Ops, llvm::Type *DstTy, unsigned SrcSizeInBits, unsigned Align)
Definition: CGBuiltin.cpp:7156
static RValue getComplex(llvm::Value *V1, llvm::Value *V2)
Definition: CGValue.h:92
EvalResult is a struct with detailed info about an evaluated expression.
Definition: Expr.h:568
StringRef Name
Definition: USRFinder.cpp:123
FunctionDecl * getDirectCallee()
If the callee is a FunctionDecl, return it. Otherwise return 0.
Definition: Expr.cpp:1216
llvm::Value * EmitWebAssemblyBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:9290
llvm::LoadInst * CreateAlignedLoad(llvm::Value *Addr, CharUnits Align, const llvm::Twine &Name="")
Definition: CGBuilder.h:91
const TargetInfo * getAuxTargetInfo() const
Definition: ASTContext.h:644
llvm::LoadInst * CreateLoad(Address Addr, const llvm::Twine &Name="")
Definition: CGBuilder.h:70
#define ceil(__x)
Definition: tgmath.h:617
static Value * EmitTargetArchBuiltinExpr(CodeGenFunction *CGF, unsigned BuiltinID, const CallExpr *E, llvm::Triple::ArchType Arch)
Definition: CGBuiltin.cpp:2927
static Value * EmitX86MinMax(CodeGenFunction &CGF, ICmpInst::Predicate Pred, ArrayRef< Value * > Ops)
Definition: CGBuiltin.cpp:7232
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:792
detail::InMemoryDirectory::const_iterator E
#define floor(__x)
Definition: tgmath.h:738
llvm::StoreInst * CreateStore(llvm::Value *Val, Address Addr, bool IsVolatile=false)
Definition: CGBuilder.h:108
unsigned getNumArgs() const
getNumArgs - Return the number of actual arguments to this call.
Definition: Expr.h:2263
Flags to identify the types for overloaded Neon builtins.
bool isFloat() const
Definition: APValue.h:184
Expr * IgnoreParenImpCasts() LLVM_READONLY
IgnoreParenImpCasts - Ignore parentheses and implicit casts.
Definition: Expr.cpp:2486
llvm::Value * EmitNeonSplat(llvm::Value *V, llvm::Constant *Idx)
Definition: CGBuiltin.cpp:3018
static Value * EmitCommonNeonSISDBuiltinExpr(CodeGenFunction &CGF, const NeonIntrinsicInfo &SISDInfo, SmallVectorImpl< Value * > &Ops, const CallExpr *E)
Definition: CGBuiltin.cpp:3723
llvm::PointerType * getType() const
Return the type of the pointer value.
Definition: Address.h:44
static Value * EmitAArch64TblBuiltinExpr(CodeGenFunction &CGF, unsigned BuiltinID, const CallExpr *E, SmallVectorImpl< Value * > &Ops)
Definition: CGBuiltin.cpp:5145
bool isLibFunction(unsigned ID) const
Return true if this is a builtin for a libc/libm function, with a "__builtin_" prefix (e...
Definition: Builtins.h:131
llvm::Value * vectorWrapScalar16(llvm::Value *Op)
Definition: CGBuiltin.cpp:5292
static llvm::Value * getDefaultBuiltinObjectSizeResult(unsigned Type, llvm::IntegerType *ResType)
Definition: CGBuiltin.cpp:417
llvm::Value * EmitNeonShiftVector(llvm::Value *V, llvm::Type *Ty, bool negateForRightShift)
Definition: CGBuiltin.cpp:3038
#define nearbyint(__x)
Definition: tgmath.h:1054
void setNontemporal(bool Value)
Definition: CGValue.h:305
char __ovld __cnfn max(char x, char y)
Returns y if x < y, otherwise it returns x.
X
Add a minimal nested name specifier fixit hint to allow lookup of a tag name from an outer enclosing ...
Definition: SemaDecl.cpp:13074
BoundNodesTreeBuilder *const Builder
llvm::Function * LookupNeonLLVMIntrinsic(unsigned IntrinsicID, unsigned Modifier, llvm::Type *ArgTy, const CallExpr *E)
Definition: CGBuiltin.cpp:3684
llvm::Type * ConvertType(QualType T)
#define sqrt(__x)
Definition: tgmath.h:536
Builtin::Context & BuiltinInfo
Definition: ASTContext.h:515
bool isArrayType() const
Definition: Type.h:5751
llvm::Constant * getBuiltinLibFunction(const FunctionDecl *FD, unsigned BuiltinID)
Given a builtin id for a function like "__builtin_fabsf", return a Function* for "fabsf".
Definition: CGBuiltin.cpp:45
#define fabs(__x)
Definition: tgmath.h:565
Defines the clang::TargetInfo interface.
CallExpr - Represents a function call (C99 6.5.2.2, C++ [expr.call]).
Definition: Expr.h:2206
static Value * emitTernaryBuiltin(CodeGenFunction &CGF, const CallExpr *E, unsigned IntrinsicID)
Definition: CGBuiltin.cpp:251
uint64_t Width
Definition: ASTContext.h:117
bool isBigEndian() const
Definition: TargetInfo.h:988
bool isInt() const
Definition: APValue.h:183
static Value * EmitSignBit(CodeGenFunction &CGF, Value *V)
Emit the computation of the sign bit for a floating point value.
Definition: CGBuiltin.cpp:283
static RValue get(llvm::Value *V)
Definition: CGValue.h:85
llvm::StoreInst * CreateAlignedStore(llvm::Value *Val, llvm::Value *Addr, CharUnits Align, bool IsVolatile=false)
Definition: CGBuilder.h:115
LValue - This represents an lvalue references.
Definition: CGValue.h:171
Information for lazily generating a cleanup.
Definition: EHScopeStack.h:147
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1744
APSInt & getInt()
Definition: APValue.h:201
SourceLocation getLocStart() const LLVM_READONLY
Definition: Stmt.cpp:257
CallArgList - Type for representing both the value and type of arguments in a call.
Definition: CGCall.h:182
Address CreateMemTemp(QualType T, const Twine &Name="tmp", bool CastToDefaultAddrSpace=true)
CreateMemTemp - Create a temporary memory object of the given type, with appropriate alignment...
Definition: CGExpr.cpp:123
static RValue EmitBinaryAtomic(CodeGenFunction &CGF, llvm::AtomicRMWInst::BinOp Kind, const CallExpr *E)
Definition: CGBuiltin.cpp:143
bool isPoly() const
const NamedDecl * Result
Definition: USRFinder.cpp:70
RValue EmitCall(const CGFunctionInfo &CallInfo, const CGCallee &Callee, ReturnValueSlot ReturnValue, const CallArgList &Args, llvm::Instruction **callOrInvoke=nullptr)
EmitCall - Generate a call of the given function, expecting the given result type, and using the given argument list which specifies both the LLVM arguments and the types they were derived from.
Definition: CGCall.cpp:3695
bool isIntegerType() const
isIntegerType() does not include complex integers (a GCC extension).
Definition: Type.h:5928
static bool AArch64SIMDIntrinsicsProvenSorted
Definition: CGBuiltin.cpp:3660
llvm::Value * EmitSystemZBuiltinExpr(unsigned BuiltinID, const CallExpr *E)
Definition: CGBuiltin.cpp:8725
bool isPointerType() const
Definition: Type.h:5712