LLVM  6.0.0svn
SelectionDAGBuilder.cpp
Go to the documentation of this file.
1 //===-- SelectionDAGBuilder.cpp - Selection-DAG building ------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SelectionDAGBuilder.h"
15 #include "SDNodeDbgValue.h"
16 #include "llvm/ADT/BitVector.h"
17 #include "llvm/ADT/Optional.h"
18 #include "llvm/ADT/SmallSet.h"
19 #include "llvm/ADT/Statistic.h"
23 #include "llvm/Analysis/Loads.h"
27 #include "llvm/CodeGen/Analysis.h"
28 #include "llvm/CodeGen/FastISel.h"
40 #include "llvm/CodeGen/StackMaps.h"
42 #include "llvm/IR/CallingConv.h"
43 #include "llvm/IR/ConstantRange.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DataLayout.h"
46 #include "llvm/IR/DebugInfo.h"
47 #include "llvm/IR/DerivedTypes.h"
48 #include "llvm/IR/DIBuilder.h"
49 #include "llvm/IR/Function.h"
51 #include "llvm/IR/GlobalVariable.h"
52 #include "llvm/IR/InlineAsm.h"
53 #include "llvm/IR/Instructions.h"
54 #include "llvm/IR/IntrinsicInst.h"
55 #include "llvm/IR/Intrinsics.h"
56 #include "llvm/IR/LLVMContext.h"
57 #include "llvm/IR/Module.h"
58 #include "llvm/IR/Statepoint.h"
59 #include "llvm/MC/MCSymbol.h"
61 #include "llvm/Support/Debug.h"
71 #include <algorithm>
72 #include <utility>
73 using namespace llvm;
74 
75 #define DEBUG_TYPE "isel"
76 
77 /// LimitFloatPrecision - Generate low-precision inline sequences for
78 /// some float libcalls (6, 8 or 12 bits).
79 static unsigned LimitFloatPrecision;
80 
82 LimitFPPrecision("limit-float-precision",
83  cl::desc("Generate low-precision inline sequences "
84  "for some float libcalls"),
85  cl::location(LimitFloatPrecision),
86  cl::init(0));
87 // Limit the width of DAG chains. This is important in general to prevent
88 // DAG-based analysis from blowing up. For example, alias analysis and
89 // load clustering may not complete in reasonable time. It is difficult to
90 // recognize and avoid this situation within each individual analysis, and
91 // future analyses are likely to have the same behavior. Limiting DAG width is
92 // the safe approach and will be especially important with global DAGs.
93 //
94 // MaxParallelChains default is arbitrarily high to avoid affecting
95 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
96 // sequence over this should have been converted to llvm.memcpy by the
97 // frontend. It is easy to induce this behavior with .ll code such as:
98 // %buffer = alloca [4096 x i8]
99 // %data = load [4096 x i8]* %argPtr
100 // store [4096 x i8] %data, [4096 x i8]* %buffer
101 static const unsigned MaxParallelChains = 64;
102 
103 // True if the Value passed requires ABI mangling as it is a parameter to a
104 // function or a return value from a function which is not an intrinsic.
105 static bool isABIRegCopy(const Value * V) {
106  const bool IsRetInst = V && isa<ReturnInst>(V);
107  const bool IsCallInst = V && isa<CallInst>(V);
108  const bool IsInLineAsm =
109  IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
110  const bool IsIndirectFunctionCall =
111  IsCallInst && !IsInLineAsm &&
112  !static_cast<const CallInst *>(V)->getCalledFunction();
113  // It is possible that the call instruction is an inline asm statement or an
114  // indirect function call in which case the return value of
115  // getCalledFunction() would be nullptr.
116  const bool IsInstrinsicCall =
117  IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
118  static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
120 
121  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
122 }
123 
124 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
125  const SDValue *Parts, unsigned NumParts,
126  MVT PartVT, EVT ValueVT, const Value *V,
127  bool IsABIRegCopy);
128 
129 /// getCopyFromParts - Create a value that contains the specified legal parts
130 /// combined into the value they represent. If the parts combine to a type
131 /// larger than ValueVT then AssertOp can be used to specify whether the extra
132 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
133 /// (ISD::AssertSext).
135  const SDValue *Parts, unsigned NumParts,
136  MVT PartVT, EVT ValueVT, const Value *V,
137  Optional<ISD::NodeType> AssertOp = None,
138  bool IsABIRegCopy = false) {
139  if (ValueVT.isVector())
140  return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
141  PartVT, ValueVT, V, IsABIRegCopy);
142 
143  assert(NumParts > 0 && "No parts to assemble!");
144  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
145  SDValue Val = Parts[0];
146 
147  if (NumParts > 1) {
148  // Assemble the value from multiple parts.
149  if (ValueVT.isInteger()) {
150  unsigned PartBits = PartVT.getSizeInBits();
151  unsigned ValueBits = ValueVT.getSizeInBits();
152 
153  // Assemble the power of 2 part.
154  unsigned RoundParts = NumParts & (NumParts - 1) ?
155  1 << Log2_32(NumParts) : NumParts;
156  unsigned RoundBits = PartBits * RoundParts;
157  EVT RoundVT = RoundBits == ValueBits ?
158  ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
159  SDValue Lo, Hi;
160 
161  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
162 
163  if (RoundParts > 2) {
164  Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
165  PartVT, HalfVT, V);
166  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
167  RoundParts / 2, PartVT, HalfVT, V);
168  } else {
169  Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
170  Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
171  }
172 
173  if (DAG.getDataLayout().isBigEndian())
174  std::swap(Lo, Hi);
175 
176  Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
177 
178  if (RoundParts < NumParts) {
179  // Assemble the trailing non-power-of-2 part.
180  unsigned OddParts = NumParts - RoundParts;
181  EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
182  Hi = getCopyFromParts(DAG, DL,
183  Parts + RoundParts, OddParts, PartVT, OddVT, V);
184 
185  // Combine the round and odd parts.
186  Lo = Val;
187  if (DAG.getDataLayout().isBigEndian())
188  std::swap(Lo, Hi);
189  EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
190  Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
191  Hi =
192  DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
193  DAG.getConstant(Lo.getValueSizeInBits(), DL,
194  TLI.getPointerTy(DAG.getDataLayout())));
195  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
196  Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
197  }
198  } else if (PartVT.isFloatingPoint()) {
199  // FP split into multiple FP parts (for ppcf128)
200  assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
201  "Unexpected split");
202  SDValue Lo, Hi;
203  Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
204  Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
205  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
206  std::swap(Lo, Hi);
207  Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
208  } else {
209  // FP split into integer parts (soft fp)
210  assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
211  !PartVT.isVector() && "Unexpected split");
212  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
213  Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
214  }
215  }
216 
217  // There is now one part, held in Val. Correct it to match ValueVT.
218  // PartEVT is the type of the register class that holds the value.
219  // ValueVT is the type of the inline asm operation.
220  EVT PartEVT = Val.getValueType();
221 
222  if (PartEVT == ValueVT)
223  return Val;
224 
225  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
226  ValueVT.bitsLT(PartEVT)) {
227  // For an FP value in an integer part, we need to truncate to the right
228  // width first.
229  PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
230  Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
231  }
232 
233  // Handle types that have the same size.
234  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
235  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
236 
237  // Handle types with different sizes.
238  if (PartEVT.isInteger() && ValueVT.isInteger()) {
239  if (ValueVT.bitsLT(PartEVT)) {
240  // For a truncate, see if we have any information to
241  // indicate whether the truncated bits will always be
242  // zero or sign-extension.
243  if (AssertOp.hasValue())
244  Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
245  DAG.getValueType(ValueVT));
246  return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
247  }
248  return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
249  }
250 
251  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
252  // FP_ROUND's are always exact here.
253  if (ValueVT.bitsLT(Val.getValueType()))
254  return DAG.getNode(
255  ISD::FP_ROUND, DL, ValueVT, Val,
256  DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
257 
258  return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
259  }
260 
261  llvm_unreachable("Unknown mismatch!");
262 }
263 
265  const Twine &ErrMsg) {
266  const Instruction *I = dyn_cast_or_null<Instruction>(V);
267  if (!V)
268  return Ctx.emitError(ErrMsg);
269 
270  const char *AsmError = ", possible invalid constraint for vector type";
271  if (const CallInst *CI = dyn_cast<CallInst>(I))
272  if (isa<InlineAsm>(CI->getCalledValue()))
273  return Ctx.emitError(I, ErrMsg + AsmError);
274 
275  return Ctx.emitError(I, ErrMsg);
276 }
277 
278 /// getCopyFromPartsVector - Create a value that contains the specified legal
279 /// parts combined into the value they represent. If the parts combine to a
280 /// type larger than ValueVT then AssertOp can be used to specify whether the
281 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
282 /// ValueVT (ISD::AssertSext).
284  const SDValue *Parts, unsigned NumParts,
285  MVT PartVT, EVT ValueVT, const Value *V,
286  bool IsABIRegCopy) {
287  assert(ValueVT.isVector() && "Not a vector value");
288  assert(NumParts > 0 && "No parts to assemble!");
289  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
290  SDValue Val = Parts[0];
291 
292  // Handle a multi-element vector.
293  if (NumParts > 1) {
294  EVT IntermediateVT;
295  MVT RegisterVT;
296  unsigned NumIntermediates;
297  unsigned NumRegs;
298 
299  if (IsABIRegCopy) {
301  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
302  RegisterVT);
303  } else {
304  NumRegs =
305  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
306  NumIntermediates, RegisterVT);
307  }
308 
309  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
310  NumParts = NumRegs; // Silence a compiler warning.
311  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
312  assert(RegisterVT.getSizeInBits() ==
313  Parts[0].getSimpleValueType().getSizeInBits() &&
314  "Part type sizes don't match!");
315 
316  // Assemble the parts into intermediate operands.
317  SmallVector<SDValue, 8> Ops(NumIntermediates);
318  if (NumIntermediates == NumParts) {
319  // If the register was not expanded, truncate or copy the value,
320  // as appropriate.
321  for (unsigned i = 0; i != NumParts; ++i)
322  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
323  PartVT, IntermediateVT, V);
324  } else if (NumParts > 0) {
325  // If the intermediate type was expanded, build the intermediate
326  // operands from the parts.
327  assert(NumParts % NumIntermediates == 0 &&
328  "Must expand into a divisible number of parts!");
329  unsigned Factor = NumParts / NumIntermediates;
330  for (unsigned i = 0; i != NumIntermediates; ++i)
331  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
332  PartVT, IntermediateVT, V);
333  }
334 
335  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
336  // intermediate operands.
337  EVT BuiltVectorTy =
338  EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
339  (IntermediateVT.isVector()
340  ? IntermediateVT.getVectorNumElements() * NumParts
341  : NumIntermediates));
342  Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
344  DL, BuiltVectorTy, Ops);
345  }
346 
347  // There is now one part, held in Val. Correct it to match ValueVT.
348  EVT PartEVT = Val.getValueType();
349 
350  if (PartEVT == ValueVT)
351  return Val;
352 
353  if (PartEVT.isVector()) {
354  // If the element type of the source/dest vectors are the same, but the
355  // parts vector has more elements than the value vector, then we have a
356  // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
357  // elements we want.
358  if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
359  assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
360  "Cannot narrow, it would be a lossy transformation");
361  return DAG.getNode(
362  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
363  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
364  }
365 
366  // Vector/Vector bitcast.
367  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
368  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
369 
370  assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
371  "Cannot handle this kind of promotion");
372  // Promoted vector extract
373  return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
374 
375  }
376 
377  // Trivial bitcast if the types are the same size and the destination
378  // vector type is legal.
379  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
380  TLI.isTypeLegal(ValueVT))
381  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
382 
383  if (ValueVT.getVectorNumElements() != 1) {
384  // Certain ABIs require that vectors are passed as integers. For vectors
385  // are the same size, this is an obvious bitcast.
386  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
387  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
388  } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
389  // Bitcast Val back the original type and extract the corresponding
390  // vector we want.
391  unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
392  EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
393  ValueVT.getVectorElementType(), Elts);
394  Val = DAG.getBitcast(WiderVecType, Val);
395  return DAG.getNode(
396  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
397  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
398  }
399 
401  *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
402  return DAG.getUNDEF(ValueVT);
403  }
404 
405  // Handle cases such as i8 -> <1 x i1>
406  EVT ValueSVT = ValueVT.getVectorElementType();
407  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
408  Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
409  : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
410 
411  return DAG.getBuildVector(ValueVT, DL, Val);
412 }
413 
414 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
415  SDValue Val, SDValue *Parts, unsigned NumParts,
416  MVT PartVT, const Value *V, bool IsABIRegCopy);
417 
418 /// getCopyToParts - Create a series of nodes that contain the specified value
419 /// split into legal parts. If the parts contain more bits than Val, then, for
420 /// integers, ExtendKind can be used to specify how to generate the extra bits.
421 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
422  SDValue *Parts, unsigned NumParts, MVT PartVT,
423  const Value *V,
424  ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
425  bool IsABIRegCopy = false) {
426  EVT ValueVT = Val.getValueType();
427 
428  // Handle the vector case separately.
429  if (ValueVT.isVector())
430  return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
431  IsABIRegCopy);
432 
433  unsigned PartBits = PartVT.getSizeInBits();
434  unsigned OrigNumParts = NumParts;
435  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
436  "Copying to an illegal type!");
437 
438  if (NumParts == 0)
439  return;
440 
441  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
442  EVT PartEVT = PartVT;
443  if (PartEVT == ValueVT) {
444  assert(NumParts == 1 && "No-op copy with multiple parts!");
445  Parts[0] = Val;
446  return;
447  }
448 
449  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
450  // If the parts cover more bits than the value has, promote the value.
451  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
452  assert(NumParts == 1 && "Do not know what to promote to!");
453  Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
454  } else {
455  if (ValueVT.isFloatingPoint()) {
456  // FP values need to be bitcast, then extended if they are being put
457  // into a larger container.
458  ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
459  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
460  }
461  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
462  ValueVT.isInteger() &&
463  "Unknown mismatch!");
464  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
465  Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
466  if (PartVT == MVT::x86mmx)
467  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
468  }
469  } else if (PartBits == ValueVT.getSizeInBits()) {
470  // Different types of the same size.
471  assert(NumParts == 1 && PartEVT != ValueVT);
472  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
473  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
474  // If the parts cover less bits than value has, truncate the value.
475  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
476  ValueVT.isInteger() &&
477  "Unknown mismatch!");
478  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
479  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
480  if (PartVT == MVT::x86mmx)
481  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
482  }
483 
484  // The value may have changed - recompute ValueVT.
485  ValueVT = Val.getValueType();
486  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
487  "Failed to tile the value with PartVT!");
488 
489  if (NumParts == 1) {
490  if (PartEVT != ValueVT) {
492  "scalar-to-vector conversion failed");
493  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
494  }
495 
496  Parts[0] = Val;
497  return;
498  }
499 
500  // Expand the value into multiple parts.
501  if (NumParts & (NumParts - 1)) {
502  // The number of parts is not a power of 2. Split off and copy the tail.
503  assert(PartVT.isInteger() && ValueVT.isInteger() &&
504  "Do not know what to expand to!");
505  unsigned RoundParts = 1 << Log2_32(NumParts);
506  unsigned RoundBits = RoundParts * PartBits;
507  unsigned OddParts = NumParts - RoundParts;
508  SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
509  DAG.getIntPtrConstant(RoundBits, DL));
510  getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
511 
512  if (DAG.getDataLayout().isBigEndian())
513  // The odd parts were reversed by getCopyToParts - unreverse them.
514  std::reverse(Parts + RoundParts, Parts + NumParts);
515 
516  NumParts = RoundParts;
517  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
518  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
519  }
520 
521  // The number of parts is a power of 2. Repeatedly bisect the value using
522  // EXTRACT_ELEMENT.
523  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
525  ValueVT.getSizeInBits()),
526  Val);
527 
528  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
529  for (unsigned i = 0; i < NumParts; i += StepSize) {
530  unsigned ThisBits = StepSize * PartBits / 2;
531  EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
532  SDValue &Part0 = Parts[i];
533  SDValue &Part1 = Parts[i+StepSize/2];
534 
535  Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
536  ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
537  Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
538  ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
539 
540  if (ThisBits == PartBits && ThisVT != PartVT) {
541  Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
542  Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
543  }
544  }
545  }
546 
547  if (DAG.getDataLayout().isBigEndian())
548  std::reverse(Parts, Parts + OrigNumParts);
549 }
550 
551 
552 /// getCopyToPartsVector - Create a series of nodes that contain the specified
553 /// value split into legal parts.
554 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
555  SDValue Val, SDValue *Parts, unsigned NumParts,
556  MVT PartVT, const Value *V,
557  bool IsABIRegCopy) {
558 
559  EVT ValueVT = Val.getValueType();
560  assert(ValueVT.isVector() && "Not a vector");
561  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
562 
563  if (NumParts == 1) {
564  EVT PartEVT = PartVT;
565  if (PartEVT == ValueVT) {
566  // Nothing to do.
567  } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
568  // Bitconvert vector->vector case.
569  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
570  } else if (PartVT.isVector() &&
571  PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
572  PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
573  EVT ElementVT = PartVT.getVectorElementType();
574  // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
575  // undef elements.
577  for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
578  Ops.push_back(DAG.getNode(
579  ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
580  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
581 
582  for (unsigned i = ValueVT.getVectorNumElements(),
583  e = PartVT.getVectorNumElements(); i != e; ++i)
584  Ops.push_back(DAG.getUNDEF(ElementVT));
585 
586  Val = DAG.getBuildVector(PartVT, DL, Ops);
587 
588  // FIXME: Use CONCAT for 2x -> 4x.
589 
590  //SDValue UndefElts = DAG.getUNDEF(VectorTy);
591  //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
592  } else if (PartVT.isVector() &&
593  PartEVT.getVectorElementType().bitsGE(
594  ValueVT.getVectorElementType()) &&
595  PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
596 
597  // Promoted vector extract
598  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
599  } else {
600  if (ValueVT.getVectorNumElements() == 1) {
601  Val = DAG.getNode(
602  ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
603  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
604 
605  } else {
606  assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
607  "lossy conversion of vector to scalar type");
608  EVT IntermediateType =
609  EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
610  Val = DAG.getBitcast(IntermediateType, Val);
611  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
612  }
613  }
614 
615  assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
616  Parts[0] = Val;
617  return;
618  }
619 
620  // Handle a multi-element vector.
621  EVT IntermediateVT;
622  MVT RegisterVT;
623  unsigned NumIntermediates;
624  unsigned NumRegs;
625  if (IsABIRegCopy) {
626  NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
627  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
628  RegisterVT);
629  } else {
630  NumRegs =
631  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
632  NumIntermediates, RegisterVT);
633  }
634  unsigned NumElements = ValueVT.getVectorNumElements();
635 
636  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
637  NumParts = NumRegs; // Silence a compiler warning.
638  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
639 
640  // Convert the vector to the appropiate type if necessary.
641  unsigned DestVectorNoElts =
642  NumIntermediates *
643  (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
644  EVT BuiltVectorTy = EVT::getVectorVT(
645  *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
646  if (Val.getValueType() != BuiltVectorTy)
647  Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
648 
649  // Split the vector into intermediate operands.
650  SmallVector<SDValue, 8> Ops(NumIntermediates);
651  for (unsigned i = 0; i != NumIntermediates; ++i) {
652  if (IntermediateVT.isVector())
653  Ops[i] =
654  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
655  DAG.getConstant(i * (NumElements / NumIntermediates), DL,
656  TLI.getVectorIdxTy(DAG.getDataLayout())));
657  else
658  Ops[i] = DAG.getNode(
659  ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
660  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
661  }
662 
663  // Split the intermediate operands into legal parts.
664  if (NumParts == NumIntermediates) {
665  // If the register was not expanded, promote or copy the value,
666  // as appropriate.
667  for (unsigned i = 0; i != NumParts; ++i)
668  getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
669  } else if (NumParts > 0) {
670  // If the intermediate type was expanded, split each the value into
671  // legal parts.
672  assert(NumIntermediates != 0 && "division by zero");
673  assert(NumParts % NumIntermediates == 0 &&
674  "Must expand into a divisible number of parts!");
675  unsigned Factor = NumParts / NumIntermediates;
676  for (unsigned i = 0; i != NumIntermediates; ++i)
677  getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
678  }
679 }
680 
682 
684  EVT valuevt, bool IsABIMangledValue)
685  : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
686  RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
687 
689  const DataLayout &DL, unsigned Reg, Type *Ty,
690  bool IsABIMangledValue) {
691  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
692 
693  IsABIMangled = IsABIMangledValue;
694 
695  for (EVT ValueVT : ValueVTs) {
696  unsigned NumRegs = IsABIMangledValue
697  ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
698  : TLI.getNumRegisters(Context, ValueVT);
699  MVT RegisterVT = IsABIMangledValue
700  ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
701  : TLI.getRegisterType(Context, ValueVT);
702  for (unsigned i = 0; i != NumRegs; ++i)
703  Regs.push_back(Reg + i);
704  RegVTs.push_back(RegisterVT);
705  RegCount.push_back(NumRegs);
706  Reg += NumRegs;
707  }
708 }
709 
711  FunctionLoweringInfo &FuncInfo,
712  const SDLoc &dl, SDValue &Chain,
713  SDValue *Flag, const Value *V) const {
714  // A Value with type {} or [0 x %t] needs no registers.
715  if (ValueVTs.empty())
716  return SDValue();
717 
718  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
719 
720  // Assemble the legal parts into the final values.
721  SmallVector<SDValue, 4> Values(ValueVTs.size());
723  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
724  // Copy the legal parts from the registers.
725  EVT ValueVT = ValueVTs[Value];
726  unsigned NumRegs = RegCount[Value];
727  MVT RegisterVT = IsABIMangled
729  : RegVTs[Value];
730 
731  Parts.resize(NumRegs);
732  for (unsigned i = 0; i != NumRegs; ++i) {
733  SDValue P;
734  if (!Flag) {
735  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
736  } else {
737  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
738  *Flag = P.getValue(2);
739  }
740 
741  Chain = P.getValue(1);
742  Parts[i] = P;
743 
744  // If the source register was virtual and if we know something about it,
745  // add an assert node.
747  !RegisterVT.isInteger() || RegisterVT.isVector())
748  continue;
749 
751  FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
752  if (!LOI)
753  continue;
754 
755  unsigned RegSize = RegisterVT.getSizeInBits();
756  unsigned NumSignBits = LOI->NumSignBits;
757  unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
758 
759  if (NumZeroBits == RegSize) {
760  // The current value is a zero.
761  // Explicitly express that as it would be easier for
762  // optimizations to kick in.
763  Parts[i] = DAG.getConstant(0, dl, RegisterVT);
764  continue;
765  }
766 
767  // FIXME: We capture more information than the dag can represent. For
768  // now, just use the tightest assertzext/assertsext possible.
769  bool isSExt = true;
770  EVT FromVT(MVT::Other);
771  if (NumSignBits == RegSize) {
772  isSExt = true; // ASSERT SEXT 1
773  FromVT = MVT::i1;
774  } else if (NumZeroBits >= RegSize - 1) {
775  isSExt = false; // ASSERT ZEXT 1
776  FromVT = MVT::i1;
777  } else if (NumSignBits > RegSize - 8) {
778  isSExt = true; // ASSERT SEXT 8
779  FromVT = MVT::i8;
780  } else if (NumZeroBits >= RegSize - 8) {
781  isSExt = false; // ASSERT ZEXT 8
782  FromVT = MVT::i8;
783  } else if (NumSignBits > RegSize - 16) {
784  isSExt = true; // ASSERT SEXT 16
785  FromVT = MVT::i16;
786  } else if (NumZeroBits >= RegSize - 16) {
787  isSExt = false; // ASSERT ZEXT 16
788  FromVT = MVT::i16;
789  } else if (NumSignBits > RegSize - 32) {
790  isSExt = true; // ASSERT SEXT 32
791  FromVT = MVT::i32;
792  } else if (NumZeroBits >= RegSize - 32) {
793  isSExt = false; // ASSERT ZEXT 32
794  FromVT = MVT::i32;
795  } else {
796  continue;
797  }
798  // Add an assertion node.
799  assert(FromVT != MVT::Other);
800  Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
801  RegisterVT, P, DAG.getValueType(FromVT));
802  }
803 
804  Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
805  NumRegs, RegisterVT, ValueVT, V);
806  Part += NumRegs;
807  Parts.clear();
808  }
809 
810  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
811 }
812 
814  const SDLoc &dl, SDValue &Chain, SDValue *Flag,
815  const Value *V,
816  ISD::NodeType PreferredExtendType) const {
817  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
818  ISD::NodeType ExtendKind = PreferredExtendType;
819 
820  // Get the list of the values's legal parts.
821  unsigned NumRegs = Regs.size();
822  SmallVector<SDValue, 8> Parts(NumRegs);
823  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
824  unsigned NumParts = RegCount[Value];
825 
826  MVT RegisterVT = IsABIMangled
828  : RegVTs[Value];
829 
830  if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
831  ExtendKind = ISD::ZERO_EXTEND;
832 
833  getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
834  &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
835  Part += NumParts;
836  }
837 
838  // Copy the parts into the registers.
839  SmallVector<SDValue, 8> Chains(NumRegs);
840  for (unsigned i = 0; i != NumRegs; ++i) {
841  SDValue Part;
842  if (!Flag) {
843  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
844  } else {
845  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
846  *Flag = Part.getValue(1);
847  }
848 
849  Chains[i] = Part.getValue(0);
850  }
851 
852  if (NumRegs == 1 || Flag)
853  // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
854  // flagged to it. That is the CopyToReg nodes and the user are considered
855  // a single scheduling unit. If we create a TokenFactor and return it as
856  // chain, then the TokenFactor is both a predecessor (operand) of the
857  // user as well as a successor (the TF operands are flagged to the user).
858  // c1, f1 = CopyToReg
859  // c2, f2 = CopyToReg
860  // c3 = TokenFactor c1, c2
861  // ...
862  // = op c3, ..., f2
863  Chain = Chains[NumRegs-1];
864  else
865  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
866 }
867 
868 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
869  unsigned MatchingIdx, const SDLoc &dl,
870  SelectionDAG &DAG,
871  std::vector<SDValue> &Ops) const {
872  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
873 
874  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
875  if (HasMatching)
876  Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
877  else if (!Regs.empty() &&
879  // Put the register class of the virtual registers in the flag word. That
880  // way, later passes can recompute register class constraints for inline
881  // assembly as well as normal instructions.
882  // Don't do this for tied operands that can use the regclass information
883  // from the def.
885  const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
886  Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
887  }
888 
889  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
890  Ops.push_back(Res);
891 
892  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
893  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
894  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
895  MVT RegisterVT = RegVTs[Value];
896  for (unsigned i = 0; i != NumRegs; ++i) {
897  assert(Reg < Regs.size() && "Mismatch in # registers expected");
898  unsigned TheReg = Regs[Reg++];
899  Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
900 
901  if (TheReg == SP && Code == InlineAsm::Kind_Clobber) {
902  // If we clobbered the stack pointer, MFI should know about it.
904  }
905  }
906  }
907 }
908 
910  const TargetLibraryInfo *li) {
911  AA = aa;
912  GFI = gfi;
913  LibInfo = li;
914  DL = &DAG.getDataLayout();
915  Context = DAG.getContext();
916  LPadToCallSiteMap.clear();
917 }
918 
920  NodeMap.clear();
921  UnusedArgNodeMap.clear();
922  PendingLoads.clear();
923  PendingExports.clear();
924  CurInst = nullptr;
925  HasTailCall = false;
926  SDNodeOrder = LowestSDNodeOrder;
927  StatepointLowering.clear();
928 }
929 
931  DanglingDebugInfoMap.clear();
932 }
933 
935  if (PendingLoads.empty())
936  return DAG.getRoot();
937 
938  if (PendingLoads.size() == 1) {
939  SDValue Root = PendingLoads[0];
940  DAG.setRoot(Root);
941  PendingLoads.clear();
942  return Root;
943  }
944 
945  // Otherwise, we have to make a token factor node.
946  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
947  PendingLoads);
948  PendingLoads.clear();
949  DAG.setRoot(Root);
950  return Root;
951 }
952 
954  SDValue Root = DAG.getRoot();
955 
956  if (PendingExports.empty())
957  return Root;
958 
959  // Turn all of the CopyToReg chains into one factored node.
960  if (Root.getOpcode() != ISD::EntryToken) {
961  unsigned i = 0, e = PendingExports.size();
962  for (; i != e; ++i) {
963  assert(PendingExports[i].getNode()->getNumOperands() > 1);
964  if (PendingExports[i].getNode()->getOperand(0) == Root)
965  break; // Don't add the root if we already indirectly depend on it.
966  }
967 
968  if (i == e)
969  PendingExports.push_back(Root);
970  }
971 
972  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
973  PendingExports);
974  PendingExports.clear();
975  DAG.setRoot(Root);
976  return Root;
977 }
978 
980  // Set up outgoing PHI node register values before emitting the terminator.
981  if (isa<TerminatorInst>(&I)) {
982  HandlePHINodesInSuccessorBlocks(I.getParent());
983  }
984 
985  // Increase the SDNodeOrder if dealing with a non-debug instruction.
986  if (!isa<DbgInfoIntrinsic>(I))
987  ++SDNodeOrder;
988 
989  CurInst = &I;
990 
991  visit(I.getOpcode(), I);
992 
993  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
994  !isStatepoint(&I)) // statepoints handle their exports internally
995  CopyToExportRegsIfNeeded(&I);
996 
997  CurInst = nullptr;
998 }
999 
1000 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1001  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1002 }
1003 
1004 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1005  // Note: this doesn't use InstVisitor, because it has to work with
1006  // ConstantExpr's in addition to instructions.
1007  switch (Opcode) {
1008  default: llvm_unreachable("Unknown instruction type encountered!");
1009  // Build the switch statement using the Instruction.def file.
1010 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1011  case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1012 #include "llvm/IR/Instruction.def"
1013  }
1014 }
1015 
1016 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1017 // generate the debug data structures now that we've seen its definition.
1019  SDValue Val) {
1020  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
1021  if (DDI.getDI()) {
1022  const DbgValueInst *DI = DDI.getDI();
1023  DebugLoc dl = DDI.getdl();
1024  unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1025  DILocalVariable *Variable = DI->getVariable();
1026  DIExpression *Expr = DI->getExpression();
1027  assert(Variable->isValidLocationForIntrinsic(dl) &&
1028  "Expected inlined-at fields to agree");
1029  SDDbgValue *SDV;
1030  if (Val.getNode()) {
1031  if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1032  SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
1033  DAG.AddDbgValue(SDV, Val.getNode(), false);
1034  }
1035  } else
1036  DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1037  DanglingDebugInfoMap[V] = DanglingDebugInfo();
1038  }
1039 }
1040 
1041 /// getCopyFromRegs - If there was virtual register allocated for the value V
1042 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1044  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1045  SDValue Result;
1046 
1047  if (It != FuncInfo.ValueMap.end()) {
1048  unsigned InReg = It->second;
1049 
1050  RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1051  DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
1052  SDValue Chain = DAG.getEntryNode();
1053  Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1054  V);
1055  resolveDanglingDebugInfo(V, Result);
1056  }
1057 
1058  return Result;
1059 }
1060 
1061 /// getValue - Return an SDValue for the given Value.
1063  // If we already have an SDValue for this value, use it. It's important
1064  // to do this first, so that we don't create a CopyFromReg if we already
1065  // have a regular SDValue.
1066  SDValue &N = NodeMap[V];
1067  if (N.getNode()) return N;
1068 
1069  // If there's a virtual register allocated and initialized for this
1070  // value, use it.
1071  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1072  return copyFromReg;
1073 
1074  // Otherwise create a new SDValue and remember it.
1075  SDValue Val = getValueImpl(V);
1076  NodeMap[V] = Val;
1077  resolveDanglingDebugInfo(V, Val);
1078  return Val;
1079 }
1080 
1081 // Return true if SDValue exists for the given Value
1083  return (NodeMap.find(V) != NodeMap.end()) ||
1084  (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1085 }
1086 
1087 /// getNonRegisterValue - Return an SDValue for the given Value, but
1088 /// don't look in FuncInfo.ValueMap for a virtual register.
1090  // If we already have an SDValue for this value, use it.
1091  SDValue &N = NodeMap[V];
1092  if (N.getNode()) {
1093  if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1094  // Remove the debug location from the node as the node is about to be used
1095  // in a location which may differ from the original debug location. This
1096  // is relevant to Constant and ConstantFP nodes because they can appear
1097  // as constant expressions inside PHI nodes.
1098  N->setDebugLoc(DebugLoc());
1099  }
1100  return N;
1101  }
1102 
1103  // Otherwise create a new SDValue and remember it.
1104  SDValue Val = getValueImpl(V);
1105  NodeMap[V] = Val;
1106  resolveDanglingDebugInfo(V, Val);
1107  return Val;
1108 }
1109 
1110 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1111 /// Create an SDValue for the given value.
1113  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1114 
1115  if (const Constant *C = dyn_cast<Constant>(V)) {
1116  EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1117 
1118  if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1119  return DAG.getConstant(*CI, getCurSDLoc(), VT);
1120 
1121  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1122  return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1123 
1124  if (isa<ConstantPointerNull>(C)) {
1125  unsigned AS = V->getType()->getPointerAddressSpace();
1126  return DAG.getConstant(0, getCurSDLoc(),
1127  TLI.getPointerTy(DAG.getDataLayout(), AS));
1128  }
1129 
1130  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1131  return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1132 
1133  if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1134  return DAG.getUNDEF(VT);
1135 
1136  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1137  visit(CE->getOpcode(), *CE);
1138  SDValue N1 = NodeMap[V];
1139  assert(N1.getNode() && "visit didn't populate the NodeMap!");
1140  return N1;
1141  }
1142 
1143  if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1145  for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1146  OI != OE; ++OI) {
1147  SDNode *Val = getValue(*OI).getNode();
1148  // If the operand is an empty aggregate, there are no values.
1149  if (!Val) continue;
1150  // Add each leaf value from the operand to the Constants list
1151  // to form a flattened list of all the values.
1152  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1153  Constants.push_back(SDValue(Val, i));
1154  }
1155 
1156  return DAG.getMergeValues(Constants, getCurSDLoc());
1157  }
1158 
1159  if (const ConstantDataSequential *CDS =
1160  dyn_cast<ConstantDataSequential>(C)) {
1162  for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1163  SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1164  // Add each leaf value from the operand to the Constants list
1165  // to form a flattened list of all the values.
1166  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1167  Ops.push_back(SDValue(Val, i));
1168  }
1169 
1170  if (isa<ArrayType>(CDS->getType()))
1171  return DAG.getMergeValues(Ops, getCurSDLoc());
1172  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1173  }
1174 
1175  if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1176  assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1177  "Unknown struct or array constant!");
1178 
1180  ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1181  unsigned NumElts = ValueVTs.size();
1182  if (NumElts == 0)
1183  return SDValue(); // empty struct
1185  for (unsigned i = 0; i != NumElts; ++i) {
1186  EVT EltVT = ValueVTs[i];
1187  if (isa<UndefValue>(C))
1188  Constants[i] = DAG.getUNDEF(EltVT);
1189  else if (EltVT.isFloatingPoint())
1190  Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1191  else
1192  Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1193  }
1194 
1195  return DAG.getMergeValues(Constants, getCurSDLoc());
1196  }
1197 
1198  if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1199  return DAG.getBlockAddress(BA, VT);
1200 
1201  VectorType *VecTy = cast<VectorType>(V->getType());
1202  unsigned NumElements = VecTy->getNumElements();
1203 
1204  // Now that we know the number and type of the elements, get that number of
1205  // elements into the Ops array based on what kind of constant it is.
1207  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1208  for (unsigned i = 0; i != NumElements; ++i)
1209  Ops.push_back(getValue(CV->getOperand(i)));
1210  } else {
1211  assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1212  EVT EltVT =
1213  TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1214 
1215  SDValue Op;
1216  if (EltVT.isFloatingPoint())
1217  Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1218  else
1219  Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1220  Ops.assign(NumElements, Op);
1221  }
1222 
1223  // Create a BUILD_VECTOR node.
1224  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1225  }
1226 
1227  // If this is a static alloca, generate it as the frameindex instead of
1228  // computation.
1229  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1231  FuncInfo.StaticAllocaMap.find(AI);
1232  if (SI != FuncInfo.StaticAllocaMap.end())
1233  return DAG.getFrameIndex(SI->second,
1234  TLI.getFrameIndexTy(DAG.getDataLayout()));
1235  }
1236 
1237  // If this is an instruction which fast-isel has deferred, select it now.
1238  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1239  unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1240 
1241  RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1242  Inst->getType(), isABIRegCopy(V));
1243  SDValue Chain = DAG.getEntryNode();
1244  return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1245  }
1246 
1247  llvm_unreachable("Can't get register for value!");
1248 }
1249 
1250 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1251  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1252  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1253  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1254  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1255  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1256  if (IsMSVCCXX || IsCoreCLR)
1257  CatchPadMBB->setIsEHFuncletEntry();
1258 
1259  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
1260 }
1261 
1262 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1263  // Update machine-CFG edge.
1264  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1265  FuncInfo.MBB->addSuccessor(TargetMBB);
1266 
1267  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1268  bool IsSEH = isAsynchronousEHPersonality(Pers);
1269  if (IsSEH) {
1270  // If this is not a fall-through branch or optimizations are switched off,
1271  // emit the branch.
1272  if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1273  TM.getOptLevel() == CodeGenOpt::None)
1274  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1275  getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1276  return;
1277  }
1278 
1279  // Figure out the funclet membership for the catchret's successor.
1280  // This will be used by the FuncletLayout pass to determine how to order the
1281  // BB's.
1282  // A 'catchret' returns to the outer scope's color.
1283  Value *ParentPad = I.getCatchSwitchParentPad();
1284  const BasicBlock *SuccessorColor;
1285  if (isa<ConstantTokenNone>(ParentPad))
1286  SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1287  else
1288  SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1289  assert(SuccessorColor && "No parent funclet for catchret!");
1290  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1291  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1292 
1293  // Create the terminator node.
1294  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1295  getControlRoot(), DAG.getBasicBlock(TargetMBB),
1296  DAG.getBasicBlock(SuccessorColorMBB));
1297  DAG.setRoot(Ret);
1298 }
1299 
1300 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1301  // Don't emit any special code for the cleanuppad instruction. It just marks
1302  // the start of a funclet.
1303  FuncInfo.MBB->setIsEHFuncletEntry();
1304  FuncInfo.MBB->setIsCleanupFuncletEntry();
1305 }
1306 
1307 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1308 /// many places it could ultimately go. In the IR, we have a single unwind
1309 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1310 /// This function skips over imaginary basic blocks that hold catchswitch
1311 /// instructions, and finds all the "real" machine
1312 /// basic block destinations. As those destinations may not be successors of
1313 /// EHPadBB, here we also calculate the edge probability to those destinations.
1314 /// The passed-in Prob is the edge probability to EHPadBB.
1316  FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1317  BranchProbability Prob,
1318  SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1319  &UnwindDests) {
1320  EHPersonality Personality =
1322  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1323  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1324 
1325  while (EHPadBB) {
1326  const Instruction *Pad = EHPadBB->getFirstNonPHI();
1327  BasicBlock *NewEHPadBB = nullptr;
1328  if (isa<LandingPadInst>(Pad)) {
1329  // Stop on landingpads. They are not funclets.
1330  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1331  break;
1332  } else if (isa<CleanupPadInst>(Pad)) {
1333  // Stop on cleanup pads. Cleanups are always funclet entries for all known
1334  // personalities.
1335  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1336  UnwindDests.back().first->setIsEHFuncletEntry();
1337  break;
1338  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1339  // Add the catchpad handlers to the possible destinations.
1340  for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1341  UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1342  // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1343  if (IsMSVCCXX || IsCoreCLR)
1344  UnwindDests.back().first->setIsEHFuncletEntry();
1345  }
1346  NewEHPadBB = CatchSwitch->getUnwindDest();
1347  } else {
1348  continue;
1349  }
1350 
1351  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1352  if (BPI && NewEHPadBB)
1353  Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1354  EHPadBB = NewEHPadBB;
1355  }
1356 }
1357 
1358 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1359  // Update successor info.
1361  auto UnwindDest = I.getUnwindDest();
1362  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1363  BranchProbability UnwindDestProb =
1364  (BPI && UnwindDest)
1365  ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1367  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1368  for (auto &UnwindDest : UnwindDests) {
1369  UnwindDest.first->setIsEHPad();
1370  addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1371  }
1372  FuncInfo.MBB->normalizeSuccProbs();
1373 
1374  // Create the terminator node.
1375  SDValue Ret =
1376  DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1377  DAG.setRoot(Ret);
1378 }
1379 
1380 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1381  report_fatal_error("visitCatchSwitch not yet implemented!");
1382 }
1383 
1384 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1385  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1386  auto &DL = DAG.getDataLayout();
1387  SDValue Chain = getControlRoot();
1389  SmallVector<SDValue, 8> OutVals;
1390 
1391  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1392  // lower
1393  //
1394  // %val = call <ty> @llvm.experimental.deoptimize()
1395  // ret <ty> %val
1396  //
1397  // differently.
1399  LowerDeoptimizingReturn();
1400  return;
1401  }
1402 
1403  if (!FuncInfo.CanLowerReturn) {
1404  unsigned DemoteReg = FuncInfo.DemoteRegister;
1405  const Function *F = I.getParent()->getParent();
1406 
1407  // Emit a store of the return value through the virtual register.
1408  // Leave Outs empty so that LowerReturn won't try to load return
1409  // registers the usual way.
1410  SmallVector<EVT, 1> PtrValueVTs;
1412  PtrValueVTs);
1413 
1414  SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1415  DemoteReg, PtrValueVTs[0]);
1416  SDValue RetOp = getValue(I.getOperand(0));
1417 
1420  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1421  unsigned NumValues = ValueVTs.size();
1422 
1423  // An aggregate return value cannot wrap around the address space, so
1424  // offsets to its parts don't wrap either.
1426  Flags.setNoUnsignedWrap(true);
1427 
1428  SmallVector<SDValue, 4> Chains(NumValues);
1429  for (unsigned i = 0; i != NumValues; ++i) {
1430  SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
1431  RetPtr.getValueType(), RetPtr,
1432  DAG.getIntPtrConstant(Offsets[i],
1433  getCurSDLoc()),
1434  Flags);
1435  Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
1436  SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1437  // FIXME: better loc info would be nice.
1438  Add, MachinePointerInfo());
1439  }
1440 
1441  Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1442  MVT::Other, Chains);
1443  } else if (I.getNumOperands() != 0) {
1445  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1446  unsigned NumValues = ValueVTs.size();
1447  if (NumValues) {
1448  SDValue RetOp = getValue(I.getOperand(0));
1449 
1450  const Function *F = I.getParent()->getParent();
1451 
1452  ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1453  if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1454  Attribute::SExt))
1455  ExtendKind = ISD::SIGN_EXTEND;
1456  else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1457  Attribute::ZExt))
1458  ExtendKind = ISD::ZERO_EXTEND;
1459 
1460  LLVMContext &Context = F->getContext();
1461  bool RetInReg = F->getAttributes().hasAttribute(
1462  AttributeList::ReturnIndex, Attribute::InReg);
1463 
1464  for (unsigned j = 0; j != NumValues; ++j) {
1465  EVT VT = ValueVTs[j];
1466 
1467  if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1468  VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1469 
1470  unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
1471  MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
1472  SmallVector<SDValue, 4> Parts(NumParts);
1473  getCopyToParts(DAG, getCurSDLoc(),
1474  SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1475  &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
1476 
1477  // 'inreg' on function refers to return value
1479  if (RetInReg)
1480  Flags.setInReg();
1481 
1482  // Propagate extension type if any
1483  if (ExtendKind == ISD::SIGN_EXTEND)
1484  Flags.setSExt();
1485  else if (ExtendKind == ISD::ZERO_EXTEND)
1486  Flags.setZExt();
1487 
1488  for (unsigned i = 0; i < NumParts; ++i) {
1489  Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1490  VT, /*isfixed=*/true, 0, 0));
1491  OutVals.push_back(Parts[i]);
1492  }
1493  }
1494  }
1495  }
1496 
1497  // Push in swifterror virtual register as the last element of Outs. This makes
1498  // sure swifterror virtual register will be returned in the swifterror
1499  // physical register.
1500  const Function *F = I.getParent()->getParent();
1501  if (TLI.supportSwiftError() &&
1502  F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1503  assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1505  Flags.setSwiftError();
1506  Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1507  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1508  true /*isfixed*/, 1 /*origidx*/,
1509  0 /*partOffs*/));
1510  // Create SDNode for the swifterror virtual register.
1511  OutVals.push_back(
1512  DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1513  &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1514  EVT(TLI.getPointerTy(DL))));
1515  }
1516 
1517  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1518  CallingConv::ID CallConv =
1520  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1521  Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1522 
1523  // Verify that the target's LowerReturn behaved as expected.
1524  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1525  "LowerReturn didn't return a valid chain!");
1526 
1527  // Update the DAG with the new chain value resulting from return lowering.
1528  DAG.setRoot(Chain);
1529 }
1530 
1531 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1532 /// created for it, emit nodes to copy the value into the virtual
1533 /// registers.
1535  // Skip empty types
1536  if (V->getType()->isEmptyTy())
1537  return;
1538 
1539  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1540  if (VMI != FuncInfo.ValueMap.end()) {
1541  assert(!V->use_empty() && "Unused value assigned virtual registers!");
1542  CopyValueToVirtualRegister(V, VMI->second);
1543  }
1544 }
1545 
1546 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1547 /// the current basic block, add it to ValueMap now so that we'll get a
1548 /// CopyTo/FromReg.
1550  // No need to export constants.
1551  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1552 
1553  // Already exported?
1554  if (FuncInfo.isExportedInst(V)) return;
1555 
1556  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1557  CopyValueToVirtualRegister(V, Reg);
1558 }
1559 
1561  const BasicBlock *FromBB) {
1562  // The operands of the setcc have to be in this block. We don't know
1563  // how to export them from some other block.
1564  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1565  // Can export from current BB.
1566  if (VI->getParent() == FromBB)
1567  return true;
1568 
1569  // Is already exported, noop.
1570  return FuncInfo.isExportedInst(V);
1571  }
1572 
1573  // If this is an argument, we can export it if the BB is the entry block or
1574  // if it is already exported.
1575  if (isa<Argument>(V)) {
1576  if (FromBB == &FromBB->getParent()->getEntryBlock())
1577  return true;
1578 
1579  // Otherwise, can only export this if it is already exported.
1580  return FuncInfo.isExportedInst(V);
1581  }
1582 
1583  // Otherwise, constants can always be exported.
1584  return true;
1585 }
1586 
1587 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1589 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1590  const MachineBasicBlock *Dst) const {
1591  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1592  const BasicBlock *SrcBB = Src->getBasicBlock();
1593  const BasicBlock *DstBB = Dst->getBasicBlock();
1594  if (!BPI) {
1595  // If BPI is not available, set the default probability as 1 / N, where N is
1596  // the number of successors.
1597  auto SuccSize = std::max<uint32_t>(
1598  std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
1599  return BranchProbability(1, SuccSize);
1600  }
1601  return BPI->getEdgeProbability(SrcBB, DstBB);
1602 }
1603 
1604 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1605  MachineBasicBlock *Dst,
1606  BranchProbability Prob) {
1607  if (!FuncInfo.BPI)
1608  Src->addSuccessorWithoutProb(Dst);
1609  else {
1610  if (Prob.isUnknown())
1611  Prob = getEdgeProbability(Src, Dst);
1612  Src->addSuccessor(Dst, Prob);
1613  }
1614 }
1615 
1616 static bool InBlock(const Value *V, const BasicBlock *BB) {
1617  if (const Instruction *I = dyn_cast<Instruction>(V))
1618  return I->getParent() == BB;
1619  return true;
1620 }
1621 
1622 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1623 /// This function emits a branch and is used at the leaves of an OR or an
1624 /// AND operator tree.
1625 ///
1626 void
1628  MachineBasicBlock *TBB,
1629  MachineBasicBlock *FBB,
1630  MachineBasicBlock *CurBB,
1631  MachineBasicBlock *SwitchBB,
1632  BranchProbability TProb,
1633  BranchProbability FProb,
1634  bool InvertCond) {
1635  const BasicBlock *BB = CurBB->getBasicBlock();
1636 
1637  // If the leaf of the tree is a comparison, merge the condition into
1638  // the caseblock.
1639  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1640  // The operands of the cmp have to be in this block. We don't know
1641  // how to export them from some other block. If this is the first block
1642  // of the sequence, no exporting is needed.
1643  if (CurBB == SwitchBB ||
1644  (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1645  isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1646  ISD::CondCode Condition;
1647  if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1648  ICmpInst::Predicate Pred =
1649  InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1650  Condition = getICmpCondCode(Pred);
1651  } else {
1652  const FCmpInst *FC = cast<FCmpInst>(Cond);
1653  FCmpInst::Predicate Pred =
1654  InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1655  Condition = getFCmpCondCode(Pred);
1656  if (TM.Options.NoNaNsFPMath)
1657  Condition = getFCmpCodeWithoutNaN(Condition);
1658  }
1659 
1660  CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1661  TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1662  SwitchCases.push_back(CB);
1663  return;
1664  }
1665  }
1666 
1667  // Create a CaseBlock record representing this branch.
1668  ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1669  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1670  nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1671  SwitchCases.push_back(CB);
1672 }
1673 
1674 /// FindMergedConditions - If Cond is an expression like
1676  MachineBasicBlock *TBB,
1677  MachineBasicBlock *FBB,
1678  MachineBasicBlock *CurBB,
1679  MachineBasicBlock *SwitchBB,
1681  BranchProbability TProb,
1682  BranchProbability FProb,
1683  bool InvertCond) {
1684  // Skip over not part of the tree and remember to invert op and operands at
1685  // next level.
1686  if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
1687  const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1688  if (InBlock(CondOp, CurBB->getBasicBlock())) {
1689  FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1690  !InvertCond);
1691  return;
1692  }
1693  }
1694 
1695  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1696  // Compute the effective opcode for Cond, taking into account whether it needs
1697  // to be inverted, e.g.
1698  // and (not (or A, B)), C
1699  // gets lowered as
1700  // and (and (not A, not B), C)
1701  unsigned BOpc = 0;
1702  if (BOp) {
1703  BOpc = BOp->getOpcode();
1704  if (InvertCond) {
1705  if (BOpc == Instruction::And)
1706  BOpc = Instruction::Or;
1707  else if (BOpc == Instruction::Or)
1708  BOpc = Instruction::And;
1709  }
1710  }
1711 
1712  // If this node is not part of the or/and tree, emit it as a branch.
1713  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1714  BOpc != Opc || !BOp->hasOneUse() ||
1715  BOp->getParent() != CurBB->getBasicBlock() ||
1716  !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1717  !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1718  EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1719  TProb, FProb, InvertCond);
1720  return;
1721  }
1722 
1723  // Create TmpBB after CurBB.
1724  MachineFunction::iterator BBI(CurBB);
1725  MachineFunction &MF = DAG.getMachineFunction();
1727  CurBB->getParent()->insert(++BBI, TmpBB);
1728 
1729  if (Opc == Instruction::Or) {
1730  // Codegen X | Y as:
1731  // BB1:
1732  // jmp_if_X TBB
1733  // jmp TmpBB
1734  // TmpBB:
1735  // jmp_if_Y TBB
1736  // jmp FBB
1737  //
1738 
1739  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1740  // The requirement is that
1741  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1742  // = TrueProb for original BB.
1743  // Assuming the original probabilities are A and B, one choice is to set
1744  // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1745  // A/(1+B) and 2B/(1+B). This choice assumes that
1746  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1747  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1748  // TmpBB, but the math is more complicated.
1749 
1750  auto NewTrueProb = TProb / 2;
1751  auto NewFalseProb = TProb / 2 + FProb;
1752  // Emit the LHS condition.
1753  FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1754  NewTrueProb, NewFalseProb, InvertCond);
1755 
1756  // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1757  SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1758  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1759  // Emit the RHS condition into TmpBB.
1760  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1761  Probs[0], Probs[1], InvertCond);
1762  } else {
1763  assert(Opc == Instruction::And && "Unknown merge op!");
1764  // Codegen X & Y as:
1765  // BB1:
1766  // jmp_if_X TmpBB
1767  // jmp FBB
1768  // TmpBB:
1769  // jmp_if_Y TBB
1770  // jmp FBB
1771  //
1772  // This requires creation of TmpBB after CurBB.
1773 
1774  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1775  // The requirement is that
1776  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1777  // = FalseProb for original BB.
1778  // Assuming the original probabilities are A and B, one choice is to set
1779  // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1780  // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1781  // TrueProb for BB1 * FalseProb for TmpBB.
1782 
1783  auto NewTrueProb = TProb + FProb / 2;
1784  auto NewFalseProb = FProb / 2;
1785  // Emit the LHS condition.
1786  FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1787  NewTrueProb, NewFalseProb, InvertCond);
1788 
1789  // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1790  SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1791  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1792  // Emit the RHS condition into TmpBB.
1793  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1794  Probs[0], Probs[1], InvertCond);
1795  }
1796 }
1797 
1798 /// If the set of cases should be emitted as a series of branches, return true.
1799 /// If we should emit this as a bunch of and/or'd together conditions, return
1800 /// false.
1801 bool
1802 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1803  if (Cases.size() != 2) return true;
1804 
1805  // If this is two comparisons of the same values or'd or and'd together, they
1806  // will get folded into a single comparison, so don't emit two blocks.
1807  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1808  Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1809  (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1810  Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1811  return false;
1812  }
1813 
1814  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1815  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1816  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1817  Cases[0].CC == Cases[1].CC &&
1818  isa<Constant>(Cases[0].CmpRHS) &&
1819  cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1820  if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1821  return false;
1822  if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1823  return false;
1824  }
1825 
1826  return true;
1827 }
1828 
1829 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1830  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1831 
1832  // Update machine-CFG edges.
1833  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1834 
1835  if (I.isUnconditional()) {
1836  // Update machine-CFG edges.
1837  BrMBB->addSuccessor(Succ0MBB);
1838 
1839  // If this is not a fall-through branch or optimizations are switched off,
1840  // emit the branch.
1841  if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1842  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1843  MVT::Other, getControlRoot(),
1844  DAG.getBasicBlock(Succ0MBB)));
1845 
1846  return;
1847  }
1848 
1849  // If this condition is one of the special cases we handle, do special stuff
1850  // now.
1851  const Value *CondVal = I.getCondition();
1852  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1853 
1854  // If this is a series of conditions that are or'd or and'd together, emit
1855  // this as a sequence of branches instead of setcc's with and/or operations.
1856  // As long as jumps are not expensive, this should improve performance.
1857  // For example, instead of something like:
1858  // cmp A, B
1859  // C = seteq
1860  // cmp D, E
1861  // F = setle
1862  // or C, F
1863  // jnz foo
1864  // Emit:
1865  // cmp A, B
1866  // je foo
1867  // cmp D, E
1868  // jle foo
1869  //
1870  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1871  Instruction::BinaryOps Opcode = BOp->getOpcode();
1872  if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
1874  (Opcode == Instruction::And || Opcode == Instruction::Or)) {
1875  FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1876  Opcode,
1877  getEdgeProbability(BrMBB, Succ0MBB),
1878  getEdgeProbability(BrMBB, Succ1MBB),
1879  /*InvertCond=*/false);
1880  // If the compares in later blocks need to use values not currently
1881  // exported from this block, export them now. This block should always
1882  // be the first entry.
1883  assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1884 
1885  // Allow some cases to be rejected.
1886  if (ShouldEmitAsBranches(SwitchCases)) {
1887  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1888  ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1889  ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1890  }
1891 
1892  // Emit the branch for this block.
1893  visitSwitchCase(SwitchCases[0], BrMBB);
1894  SwitchCases.erase(SwitchCases.begin());
1895  return;
1896  }
1897 
1898  // Okay, we decided not to do this, remove any inserted MBB's and clear
1899  // SwitchCases.
1900  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1901  FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1902 
1903  SwitchCases.clear();
1904  }
1905  }
1906 
1907  // Create a CaseBlock record representing this branch.
1908  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1909  nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
1910 
1911  // Use visitSwitchCase to actually insert the fast branch sequence for this
1912  // cond branch.
1913  visitSwitchCase(CB, BrMBB);
1914 }
1915 
1916 /// visitSwitchCase - Emits the necessary code to represent a single node in
1917 /// the binary search tree resulting from lowering a switch instruction.
1919  MachineBasicBlock *SwitchBB) {
1920  SDValue Cond;
1921  SDValue CondLHS = getValue(CB.CmpLHS);
1922  SDLoc dl = CB.DL;
1923 
1924  // Build the setcc now.
1925  if (!CB.CmpMHS) {
1926  // Fold "(X == true)" to X and "(X == false)" to !X to
1927  // handle common cases produced by branch lowering.
1928  if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1929  CB.CC == ISD::SETEQ)
1930  Cond = CondLHS;
1931  else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1932  CB.CC == ISD::SETEQ) {
1933  SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
1934  Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1935  } else
1936  Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1937  } else {
1938  assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1939 
1940  const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1941  const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
1942 
1943  SDValue CmpOp = getValue(CB.CmpMHS);
1944  EVT VT = CmpOp.getValueType();
1945 
1946  if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
1947  Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
1948  ISD::SETLE);
1949  } else {
1950  SDValue SUB = DAG.getNode(ISD::SUB, dl,
1951  VT, CmpOp, DAG.getConstant(Low, dl, VT));
1952  Cond = DAG.getSetCC(dl, MVT::i1, SUB,
1953  DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
1954  }
1955  }
1956 
1957  // Update successor info
1958  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
1959  // TrueBB and FalseBB are always different unless the incoming IR is
1960  // degenerate. This only happens when running llc on weird IR.
1961  if (CB.TrueBB != CB.FalseBB)
1962  addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
1963  SwitchBB->normalizeSuccProbs();
1964 
1965  // If the lhs block is the next block, invert the condition so that we can
1966  // fall through to the lhs instead of the rhs block.
1967  if (CB.TrueBB == NextBlock(SwitchBB)) {
1968  std::swap(CB.TrueBB, CB.FalseBB);
1969  SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
1970  Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
1971  }
1972 
1973  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
1974  MVT::Other, getControlRoot(), Cond,
1975  DAG.getBasicBlock(CB.TrueBB));
1976 
1977  // Insert the false branch. Do this even if it's a fall through branch,
1978  // this makes it easier to do DAG optimizations which require inverting
1979  // the branch condition.
1980  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
1981  DAG.getBasicBlock(CB.FalseBB));
1982 
1983  DAG.setRoot(BrCond);
1984 }
1985 
1986 /// visitJumpTable - Emit JumpTable node in the current MBB
1988  // Emit the code for the jump table
1989  assert(JT.Reg != -1U && "Should lower JT Header first!");
1991  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
1992  JT.Reg, PTy);
1993  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
1994  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
1995  MVT::Other, Index.getValue(1),
1996  Table, Index);
1997  DAG.setRoot(BrJumpTable);
1998 }
1999 
2000 /// visitJumpTableHeader - This function emits necessary code to produce index
2001 /// in the JumpTable from switch case.
2003  JumpTableHeader &JTH,
2004  MachineBasicBlock *SwitchBB) {
2005  SDLoc dl = getCurSDLoc();
2006 
2007  // Subtract the lowest switch case value from the value being switched on and
2008  // conditional branch to default mbb if the result is greater than the
2009  // difference between smallest and largest cases.
2010  SDValue SwitchOp = getValue(JTH.SValue);
2011  EVT VT = SwitchOp.getValueType();
2012  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2013  DAG.getConstant(JTH.First, dl, VT));
2014 
2015  // The SDNode we just created, which holds the value being switched on minus
2016  // the smallest case value, needs to be copied to a virtual register so it
2017  // can be used as an index into the jump table in a subsequent basic block.
2018  // This value may be smaller or larger than the target's pointer type, and
2019  // therefore require extension or truncating.
2020  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2021  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2022 
2023  unsigned JumpTableReg =
2024  FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2025  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2026  JumpTableReg, SwitchOp);
2027  JT.Reg = JumpTableReg;
2028 
2029  // Emit the range check for the jump table, and branch to the default block
2030  // for the switch statement if the value being switched on exceeds the largest
2031  // case in the switch.
2032  SDValue CMP = DAG.getSetCC(
2033  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2034  Sub.getValueType()),
2035  Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2036 
2037  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2038  MVT::Other, CopyTo, CMP,
2039  DAG.getBasicBlock(JT.Default));
2040 
2041  // Avoid emitting unnecessary branches to the next block.
2042  if (JT.MBB != NextBlock(SwitchBB))
2043  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2044  DAG.getBasicBlock(JT.MBB));
2045 
2046  DAG.setRoot(BrCond);
2047 }
2048 
2049 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2050 /// variable if there exists one.
2052  SDValue &Chain) {
2053  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2054  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2055  MachineFunction &MF = DAG.getMachineFunction();
2056  Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
2057  MachineSDNode *Node =
2058  DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2059  if (Global) {
2060  MachinePointerInfo MPInfo(Global);
2064  *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
2065  DAG.getEVTAlignment(PtrTy));
2066  Node->setMemRefs(MemRefs, MemRefs + 1);
2067  }
2068  return SDValue(Node, 0);
2069 }
2070 
2071 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2072 /// tail spliced into a stack protector check success bb.
2073 ///
2074 /// For a high level explanation of how this fits into the stack protector
2075 /// generation see the comment on the declaration of class
2076 /// StackProtectorDescriptor.
2077 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2078  MachineBasicBlock *ParentBB) {
2079 
2080  // First create the loads to the guard/stack slot for the comparison.
2081  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2082  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2083 
2084  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2085  int FI = MFI.getStackProtectorIndex();
2086 
2087  SDValue Guard;
2088  SDLoc dl = getCurSDLoc();
2089  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2090  const Module &M = *ParentBB->getParent()->getFunction()->getParent();
2091  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2092 
2093  // Generate code to load the content of the guard slot.
2094  SDValue StackSlot = DAG.getLoad(
2095  PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2098 
2099  // Retrieve guard check function, nullptr if instrumentation is inlined.
2100  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2101  // The target provides a guard check function to validate the guard value.
2102  // Generate a call to that function with the content of the guard slot as
2103  // argument.
2104  auto *Fn = cast<Function>(GuardCheck);
2105  FunctionType *FnTy = Fn->getFunctionType();
2106  assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2107 
2110  Entry.Node = StackSlot;
2111  Entry.Ty = FnTy->getParamType(0);
2112  if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2113  Entry.IsInReg = true;
2114  Args.push_back(Entry);
2115 
2117  CLI.setDebugLoc(getCurSDLoc())
2118  .setChain(DAG.getEntryNode())
2119  .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2120  getValue(GuardCheck), std::move(Args));
2121 
2122  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2123  DAG.setRoot(Result.second);
2124  return;
2125  }
2126 
2127  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2128  // Otherwise, emit a volatile load to retrieve the stack guard value.
2129  SDValue Chain = DAG.getEntryNode();
2130  if (TLI.useLoadStackGuardNode()) {
2131  Guard = getLoadStackGuard(DAG, dl, Chain);
2132  } else {
2133  const Value *IRGuard = TLI.getSDagStackGuard(M);
2134  SDValue GuardPtr = getValue(IRGuard);
2135 
2136  Guard =
2137  DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2139  }
2140 
2141  // Perform the comparison via a subtract/getsetcc.
2142  EVT VT = Guard.getValueType();
2143  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
2144 
2145  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2146  *DAG.getContext(),
2147  Sub.getValueType()),
2148  Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2149 
2150  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2151  // branch to failure MBB.
2152  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2153  MVT::Other, StackSlot.getOperand(0),
2154  Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2155  // Otherwise branch to success MBB.
2156  SDValue Br = DAG.getNode(ISD::BR, dl,
2157  MVT::Other, BrCond,
2158  DAG.getBasicBlock(SPD.getSuccessMBB()));
2159 
2160  DAG.setRoot(Br);
2161 }
2162 
2163 /// Codegen the failure basic block for a stack protector check.
2164 ///
2165 /// A failure stack protector machine basic block consists simply of a call to
2166 /// __stack_chk_fail().
2167 ///
2168 /// For a high level explanation of how this fits into the stack protector
2169 /// generation see the comment on the declaration of class
2170 /// StackProtectorDescriptor.
2171 void
2172 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2173  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2174  SDValue Chain =
2175  TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2176  None, false, getCurSDLoc(), false, false).second;
2177  DAG.setRoot(Chain);
2178 }
2179 
2180 /// visitBitTestHeader - This function emits necessary code to produce value
2181 /// suitable for "bit tests"
2183  MachineBasicBlock *SwitchBB) {
2184  SDLoc dl = getCurSDLoc();
2185 
2186  // Subtract the minimum value
2187  SDValue SwitchOp = getValue(B.SValue);
2188  EVT VT = SwitchOp.getValueType();
2189  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2190  DAG.getConstant(B.First, dl, VT));
2191 
2192  // Check range
2193  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2194  SDValue RangeCmp = DAG.getSetCC(
2195  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2196  Sub.getValueType()),
2197  Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2198 
2199  // Determine the type of the test operands.
2200  bool UsePtrType = false;
2201  if (!TLI.isTypeLegal(VT))
2202  UsePtrType = true;
2203  else {
2204  for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2205  if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2206  // Switch table case range are encoded into series of masks.
2207  // Just use pointer type, it's guaranteed to fit.
2208  UsePtrType = true;
2209  break;
2210  }
2211  }
2212  if (UsePtrType) {
2213  VT = TLI.getPointerTy(DAG.getDataLayout());
2214  Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2215  }
2216 
2217  B.RegVT = VT.getSimpleVT();
2218  B.Reg = FuncInfo.CreateReg(B.RegVT);
2219  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2220 
2221  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2222 
2223  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2224  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2225  SwitchBB->normalizeSuccProbs();
2226 
2227  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2228  MVT::Other, CopyTo, RangeCmp,
2229  DAG.getBasicBlock(B.Default));
2230 
2231  // Avoid emitting unnecessary branches to the next block.
2232  if (MBB != NextBlock(SwitchBB))
2233  BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2234  DAG.getBasicBlock(MBB));
2235 
2236  DAG.setRoot(BrRange);
2237 }
2238 
2239 /// visitBitTestCase - this function produces one "bit test"
2241  MachineBasicBlock* NextMBB,
2242  BranchProbability BranchProbToNext,
2243  unsigned Reg,
2244  BitTestCase &B,
2245  MachineBasicBlock *SwitchBB) {
2246  SDLoc dl = getCurSDLoc();
2247  MVT VT = BB.RegVT;
2248  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2249  SDValue Cmp;
2250  unsigned PopCount = countPopulation(B.Mask);
2251  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2252  if (PopCount == 1) {
2253  // Testing for a single bit; just compare the shift count with what it
2254  // would need to be to shift a 1 bit in that position.
2255  Cmp = DAG.getSetCC(
2256  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2257  ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2258  ISD::SETEQ);
2259  } else if (PopCount == BB.Range) {
2260  // There is only one zero bit in the range, test for it directly.
2261  Cmp = DAG.getSetCC(
2262  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2263  ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2264  ISD::SETNE);
2265  } else {
2266  // Make desired shift
2267  SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2268  DAG.getConstant(1, dl, VT), ShiftOp);
2269 
2270  // Emit bit tests and jumps
2271  SDValue AndOp = DAG.getNode(ISD::AND, dl,
2272  VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2273  Cmp = DAG.getSetCC(
2274  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2275  AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2276  }
2277 
2278  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2279  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2280  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2281  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2282  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2283  // one as they are relative probabilities (and thus work more like weights),
2284  // and hence we need to normalize them to let the sum of them become one.
2285  SwitchBB->normalizeSuccProbs();
2286 
2287  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2288  MVT::Other, getControlRoot(),
2289  Cmp, DAG.getBasicBlock(B.TargetBB));
2290 
2291  // Avoid emitting unnecessary branches to the next block.
2292  if (NextMBB != NextBlock(SwitchBB))
2293  BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2294  DAG.getBasicBlock(NextMBB));
2295 
2296  DAG.setRoot(BrAnd);
2297 }
2298 
2299 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2300  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2301 
2302  // Retrieve successors. Look through artificial IR level blocks like
2303  // catchswitch for successors.
2304  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2305  const BasicBlock *EHPadBB = I.getSuccessor(1);
2306 
2307  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2308  // have to do anything here to lower funclet bundles.
2310  {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2311  "Cannot lower invokes with arbitrary operand bundles yet!");
2312 
2313  const Value *Callee(I.getCalledValue());
2314  const Function *Fn = dyn_cast<Function>(Callee);
2315  if (isa<InlineAsm>(Callee))
2316  visitInlineAsm(&I);
2317  else if (Fn && Fn->isIntrinsic()) {
2318  switch (Fn->getIntrinsicID()) {
2319  default:
2320  llvm_unreachable("Cannot invoke this intrinsic");
2321  case Intrinsic::donothing:
2322  // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2323  break;
2324  case Intrinsic::experimental_patchpoint_void:
2325  case Intrinsic::experimental_patchpoint_i64:
2326  visitPatchpoint(&I, EHPadBB);
2327  break;
2328  case Intrinsic::experimental_gc_statepoint:
2329  LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2330  break;
2331  }
2333  // Currently we do not lower any intrinsic calls with deopt operand bundles.
2334  // Eventually we will support lowering the @llvm.experimental.deoptimize
2335  // intrinsic, and right now there are no plans to support other intrinsics
2336  // with deopt state.
2337  LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2338  } else {
2339  LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2340  }
2341 
2342  // If the value of the invoke is used outside of its defining block, make it
2343  // available as a virtual register.
2344  // We already took care of the exported value for the statepoint instruction
2345  // during call to the LowerStatepoint.
2346  if (!isStatepoint(I)) {
2347  CopyToExportRegsIfNeeded(&I);
2348  }
2349 
2351  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2352  BranchProbability EHPadBBProb =
2353  BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2355  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2356 
2357  // Update successor info.
2358  addSuccessorWithProb(InvokeMBB, Return);
2359  for (auto &UnwindDest : UnwindDests) {
2360  UnwindDest.first->setIsEHPad();
2361  addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2362  }
2363  InvokeMBB->normalizeSuccProbs();
2364 
2365  // Drop into normal successor.
2366  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2367  MVT::Other, getControlRoot(),
2368  DAG.getBasicBlock(Return)));
2369 }
2370 
2371 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2372  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2373 }
2374 
2375 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2376  assert(FuncInfo.MBB->isEHPad() &&
2377  "Call to landingpad not in landing pad!");
2378 
2379  MachineBasicBlock *MBB = FuncInfo.MBB;
2380  addLandingPadInfo(LP, *MBB);
2381 
2382  // If there aren't registers to copy the values into (e.g., during SjLj
2383  // exceptions), then don't bother to create these DAG nodes.
2384  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2385  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2386  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2387  TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2388  return;
2389 
2390  // If landingpad's return type is token type, we don't create DAG nodes
2391  // for its exception pointer and selector value. The extraction of exception
2392  // pointer or selector value from token type landingpads is not currently
2393  // supported.
2394  if (LP.getType()->isTokenTy())
2395  return;
2396 
2398  SDLoc dl = getCurSDLoc();
2399  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2400  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2401 
2402  // Get the two live-in registers as SDValues. The physregs have already been
2403  // copied into virtual registers.
2404  SDValue Ops[2];
2405  if (FuncInfo.ExceptionPointerVirtReg) {
2406  Ops[0] = DAG.getZExtOrTrunc(
2407  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2408  FuncInfo.ExceptionPointerVirtReg,
2409  TLI.getPointerTy(DAG.getDataLayout())),
2410  dl, ValueVTs[0]);
2411  } else {
2412  Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2413  }
2414  Ops[1] = DAG.getZExtOrTrunc(
2415  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2416  FuncInfo.ExceptionSelectorVirtReg,
2417  TLI.getPointerTy(DAG.getDataLayout())),
2418  dl, ValueVTs[1]);
2419 
2420  // Merge into one.
2421  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2422  DAG.getVTList(ValueVTs), Ops);
2423  setValue(&LP, Res);
2424 }
2425 
2426 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2427 #ifndef NDEBUG
2428  for (const CaseCluster &CC : Clusters)
2429  assert(CC.Low == CC.High && "Input clusters must be single-case");
2430 #endif
2431 
2432  std::sort(Clusters.begin(), Clusters.end(),
2433  [](const CaseCluster &a, const CaseCluster &b) {
2434  return a.Low->getValue().slt(b.Low->getValue());
2435  });
2436 
2437  // Merge adjacent clusters with the same destination.
2438  const unsigned N = Clusters.size();
2439  unsigned DstIndex = 0;
2440  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2441  CaseCluster &CC = Clusters[SrcIndex];
2442  const ConstantInt *CaseVal = CC.Low;
2443  MachineBasicBlock *Succ = CC.MBB;
2444 
2445  if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2446  (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2447  // If this case has the same successor and is a neighbour, merge it into
2448  // the previous cluster.
2449  Clusters[DstIndex - 1].High = CaseVal;
2450  Clusters[DstIndex - 1].Prob += CC.Prob;
2451  } else {
2452  std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2453  sizeof(Clusters[SrcIndex]));
2454  }
2455  }
2456  Clusters.resize(DstIndex);
2457 }
2458 
2460  MachineBasicBlock *Last) {
2461  // Update JTCases.
2462  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2463  if (JTCases[i].first.HeaderBB == First)
2464  JTCases[i].first.HeaderBB = Last;
2465 
2466  // Update BitTestCases.
2467  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2468  if (BitTestCases[i].Parent == First)
2469  BitTestCases[i].Parent = Last;
2470 }
2471 
2472 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2473  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2474 
2475  // Update machine-CFG edges with unique successors.
2477  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2478  BasicBlock *BB = I.getSuccessor(i);
2479  bool Inserted = Done.insert(BB).second;
2480  if (!Inserted)
2481  continue;
2482 
2483  MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2484  addSuccessorWithProb(IndirectBrMBB, Succ);
2485  }
2486  IndirectBrMBB->normalizeSuccProbs();
2487 
2488  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2489  MVT::Other, getControlRoot(),
2490  getValue(I.getAddress())));
2491 }
2492 
2493 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2494  if (DAG.getTarget().Options.TrapUnreachable)
2495  DAG.setRoot(
2496  DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2497 }
2498 
2499 void SelectionDAGBuilder::visitFSub(const User &I) {
2500  // -0.0 - X --> fneg
2501  Type *Ty = I.getType();
2502  if (isa<Constant>(I.getOperand(0)) &&
2504  SDValue Op2 = getValue(I.getOperand(1));
2505  setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2506  Op2.getValueType(), Op2));
2507  return;
2508  }
2509 
2510  visitBinary(I, ISD::FSUB);
2511 }
2512 
2513 /// Checks if the given instruction performs a vector reduction, in which case
2514 /// we have the freedom to alter the elements in the result as long as the
2515 /// reduction of them stays unchanged.
2516 static bool isVectorReductionOp(const User *I) {
2517  const Instruction *Inst = dyn_cast<Instruction>(I);
2518  if (!Inst || !Inst->getType()->isVectorTy())
2519  return false;
2520 
2521  auto OpCode = Inst->getOpcode();
2522  switch (OpCode) {
2523  case Instruction::Add:
2524  case Instruction::Mul:
2525  case Instruction::And:
2526  case Instruction::Or:
2527  case Instruction::Xor:
2528  break;
2529  case Instruction::FAdd:
2530  case Instruction::FMul:
2531  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2532  if (FPOp->getFastMathFlags().unsafeAlgebra())
2533  break;
2535  default:
2536  return false;
2537  }
2538 
2539  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2540  unsigned ElemNumToReduce = ElemNum;
2541 
2542  // Do DFS search on the def-use chain from the given instruction. We only
2543  // allow four kinds of operations during the search until we reach the
2544  // instruction that extracts the first element from the vector:
2545  //
2546  // 1. The reduction operation of the same opcode as the given instruction.
2547  //
2548  // 2. PHI node.
2549  //
2550  // 3. ShuffleVector instruction together with a reduction operation that
2551  // does a partial reduction.
2552  //
2553  // 4. ExtractElement that extracts the first element from the vector, and we
2554  // stop searching the def-use chain here.
2555  //
2556  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2557  // from 1-3 to the stack to continue the DFS. The given instruction is not
2558  // a reduction operation if we meet any other instructions other than those
2559  // listed above.
2560 
2561  SmallVector<const User *, 16> UsersToVisit{Inst};
2563  bool ReduxExtracted = false;
2564 
2565  while (!UsersToVisit.empty()) {
2566  auto User = UsersToVisit.back();
2567  UsersToVisit.pop_back();
2568  if (!Visited.insert(User).second)
2569  continue;
2570 
2571  for (const auto &U : User->users()) {
2572  auto Inst = dyn_cast<Instruction>(U);
2573  if (!Inst)
2574  return false;
2575 
2576  if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2577  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2578  if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().unsafeAlgebra())
2579  return false;
2580  UsersToVisit.push_back(U);
2581  } else if (const ShuffleVectorInst *ShufInst =
2582  dyn_cast<ShuffleVectorInst>(U)) {
2583  // Detect the following pattern: A ShuffleVector instruction together
2584  // with a reduction that do partial reduction on the first and second
2585  // ElemNumToReduce / 2 elements, and store the result in
2586  // ElemNumToReduce / 2 elements in another vector.
2587 
2588  unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2589  if (ResultElements < ElemNum)
2590  return false;
2591 
2592  if (ElemNumToReduce == 1)
2593  return false;
2594  if (!isa<UndefValue>(U->getOperand(1)))
2595  return false;
2596  for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2597  if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2598  return false;
2599  for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2600  if (ShufInst->getMaskValue(i) != -1)
2601  return false;
2602 
2603  // There is only one user of this ShuffleVector instruction, which
2604  // must be a reduction operation.
2605  if (!U->hasOneUse())
2606  return false;
2607 
2608  auto U2 = dyn_cast<Instruction>(*U->user_begin());
2609  if (!U2 || U2->getOpcode() != OpCode)
2610  return false;
2611 
2612  // Check operands of the reduction operation.
2613  if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2614  (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2615  UsersToVisit.push_back(U2);
2616  ElemNumToReduce /= 2;
2617  } else
2618  return false;
2619  } else if (isa<ExtractElementInst>(U)) {
2620  // At this moment we should have reduced all elements in the vector.
2621  if (ElemNumToReduce != 1)
2622  return false;
2623 
2624  const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2625  if (!Val || Val->getZExtValue() != 0)
2626  return false;
2627 
2628  ReduxExtracted = true;
2629  } else
2630  return false;
2631  }
2632  }
2633  return ReduxExtracted;
2634 }
2635 
2636 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2637  SDValue Op1 = getValue(I.getOperand(0));
2638  SDValue Op2 = getValue(I.getOperand(1));
2639 
2640  bool nuw = false;
2641  bool nsw = false;
2642  bool exact = false;
2643  bool vec_redux = false;
2644  FastMathFlags FMF;
2645 
2646  if (const OverflowingBinaryOperator *OFBinOp =
2647  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2648  nuw = OFBinOp->hasNoUnsignedWrap();
2649  nsw = OFBinOp->hasNoSignedWrap();
2650  }
2651  if (const PossiblyExactOperator *ExactOp =
2652  dyn_cast<const PossiblyExactOperator>(&I))
2653  exact = ExactOp->isExact();
2654  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
2655  FMF = FPOp->getFastMathFlags();
2656 
2657  if (isVectorReductionOp(&I)) {
2658  vec_redux = true;
2659  DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2660  }
2661 
2663  Flags.setExact(exact);
2664  Flags.setNoSignedWrap(nsw);
2665  Flags.setNoUnsignedWrap(nuw);
2666  Flags.setVectorReduction(vec_redux);
2667  Flags.setAllowReciprocal(FMF.allowReciprocal());
2668  Flags.setAllowContract(FMF.allowContract());
2669  Flags.setNoInfs(FMF.noInfs());
2670  Flags.setNoNaNs(FMF.noNaNs());
2671  Flags.setNoSignedZeros(FMF.noSignedZeros());
2672  Flags.setUnsafeAlgebra(FMF.unsafeAlgebra());
2673 
2674  SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
2675  Op1, Op2, Flags);
2676  setValue(&I, BinNodeValue);
2677 }
2678 
2679 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2680  SDValue Op1 = getValue(I.getOperand(0));
2681  SDValue Op2 = getValue(I.getOperand(1));
2682 
2683  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2684  Op2.getValueType(), DAG.getDataLayout());
2685 
2686  // Coerce the shift amount to the right type if we can.
2687  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2688  unsigned ShiftSize = ShiftTy.getSizeInBits();
2689  unsigned Op2Size = Op2.getValueSizeInBits();
2690  SDLoc DL = getCurSDLoc();
2691 
2692  // If the operand is smaller than the shift count type, promote it.
2693  if (ShiftSize > Op2Size)
2694  Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2695 
2696  // If the operand is larger than the shift count type but the shift
2697  // count type has enough bits to represent any shift value, truncate
2698  // it now. This is a common case and it exposes the truncate to
2699  // optimization early.
2700  else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2701  Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2702  // Otherwise we'll need to temporarily settle for some other convenient
2703  // type. Type legalization will make adjustments once the shiftee is split.
2704  else
2705  Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2706  }
2707 
2708  bool nuw = false;
2709  bool nsw = false;
2710  bool exact = false;
2711 
2712  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2713 
2714  if (const OverflowingBinaryOperator *OFBinOp =
2715  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2716  nuw = OFBinOp->hasNoUnsignedWrap();
2717  nsw = OFBinOp->hasNoSignedWrap();
2718  }
2719  if (const PossiblyExactOperator *ExactOp =
2720  dyn_cast<const PossiblyExactOperator>(&I))
2721  exact = ExactOp->isExact();
2722  }
2724  Flags.setExact(exact);
2725  Flags.setNoSignedWrap(nsw);
2726  Flags.setNoUnsignedWrap(nuw);
2727  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2728  Flags);
2729  setValue(&I, Res);
2730 }
2731 
2732 void SelectionDAGBuilder::visitSDiv(const User &I) {
2733  SDValue Op1 = getValue(I.getOperand(0));
2734  SDValue Op2 = getValue(I.getOperand(1));
2735 
2737  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2738  cast<PossiblyExactOperator>(&I)->isExact());
2739  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2740  Op2, Flags));
2741 }
2742 
2743 void SelectionDAGBuilder::visitICmp(const User &I) {
2745  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2746  predicate = IC->getPredicate();
2747  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2748  predicate = ICmpInst::Predicate(IC->getPredicate());
2749  SDValue Op1 = getValue(I.getOperand(0));
2750  SDValue Op2 = getValue(I.getOperand(1));
2751  ISD::CondCode Opcode = getICmpCondCode(predicate);
2752 
2753  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2754  I.getType());
2755  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2756 }
2757 
2758 void SelectionDAGBuilder::visitFCmp(const User &I) {
2760  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2761  predicate = FC->getPredicate();
2762  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2763  predicate = FCmpInst::Predicate(FC->getPredicate());
2764  SDValue Op1 = getValue(I.getOperand(0));
2765  SDValue Op2 = getValue(I.getOperand(1));
2766  ISD::CondCode Condition = getFCmpCondCode(predicate);
2767 
2768  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
2769  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
2770  // further optimization, but currently FMF is only applicable to binary nodes.
2771  if (TM.Options.NoNaNsFPMath)
2772  Condition = getFCmpCodeWithoutNaN(Condition);
2773  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2774  I.getType());
2775  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2776 }
2777 
2778 // Check if the condition of the select has one use or two users that are both
2779 // selects with the same condition.
2780 static bool hasOnlySelectUsers(const Value *Cond) {
2781  return all_of(Cond->users(), [](const Value *V) {
2782  return isa<SelectInst>(V);
2783  });
2784 }
2785 
2786 void SelectionDAGBuilder::visitSelect(const User &I) {
2789  ValueVTs);
2790  unsigned NumValues = ValueVTs.size();
2791  if (NumValues == 0) return;
2792 
2793  SmallVector<SDValue, 4> Values(NumValues);
2794  SDValue Cond = getValue(I.getOperand(0));
2795  SDValue LHSVal = getValue(I.getOperand(1));
2796  SDValue RHSVal = getValue(I.getOperand(2));
2797  auto BaseOps = {Cond};
2798  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2800 
2801  // Min/max matching is only viable if all output VTs are the same.
2802  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
2803  EVT VT = ValueVTs[0];
2804  LLVMContext &Ctx = *DAG.getContext();
2805  auto &TLI = DAG.getTargetLoweringInfo();
2806 
2807  // We care about the legality of the operation after it has been type
2808  // legalized.
2809  while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2810  VT != TLI.getTypeToTransformTo(Ctx, VT))
2811  VT = TLI.getTypeToTransformTo(Ctx, VT);
2812 
2813  // If the vselect is legal, assume we want to leave this as a vector setcc +
2814  // vselect. Otherwise, if this is going to be scalarized, we want to see if
2815  // min/max is legal on the scalar type.
2816  bool UseScalarMinMax = VT.isVector() &&
2817  !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2818 
2819  Value *LHS, *RHS;
2820  auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2822  switch (SPR.Flavor) {
2823  case SPF_UMAX: Opc = ISD::UMAX; break;
2824  case SPF_UMIN: Opc = ISD::UMIN; break;
2825  case SPF_SMAX: Opc = ISD::SMAX; break;
2826  case SPF_SMIN: Opc = ISD::SMIN; break;
2827  case SPF_FMINNUM:
2828  switch (SPR.NaNBehavior) {
2829  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2830  case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2831  case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2832  case SPNB_RETURNS_ANY: {
2833  if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2834  Opc = ISD::FMINNUM;
2835  else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
2836  Opc = ISD::FMINNAN;
2837  else if (UseScalarMinMax)
2838  Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2840  break;
2841  }
2842  }
2843  break;
2844  case SPF_FMAXNUM:
2845  switch (SPR.NaNBehavior) {
2846  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2847  case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2848  case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2849  case SPNB_RETURNS_ANY:
2850 
2851  if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2852  Opc = ISD::FMAXNUM;
2853  else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
2854  Opc = ISD::FMAXNAN;
2855  else if (UseScalarMinMax)
2856  Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
2858  break;
2859  }
2860  break;
2861  default: break;
2862  }
2863 
2864  if (Opc != ISD::DELETED_NODE &&
2865  (TLI.isOperationLegalOrCustom(Opc, VT) ||
2866  (UseScalarMinMax &&
2867  TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
2868  // If the underlying comparison instruction is used by any other
2869  // instruction, the consumed instructions won't be destroyed, so it is
2870  // not profitable to convert to a min/max.
2871  hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
2872  OpCode = Opc;
2873  LHSVal = getValue(LHS);
2874  RHSVal = getValue(RHS);
2875  BaseOps = {};
2876  }
2877  }
2878 
2879  for (unsigned i = 0; i != NumValues; ++i) {
2880  SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2881  Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2882  Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2883  Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2884  LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2885  Ops);
2886  }
2887 
2888  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
2889  DAG.getVTList(ValueVTs), Values));
2890 }
2891 
2892 void SelectionDAGBuilder::visitTrunc(const User &I) {
2893  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2894  SDValue N = getValue(I.getOperand(0));
2895  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2896  I.getType());
2897  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
2898 }
2899 
2900 void SelectionDAGBuilder::visitZExt(const User &I) {
2901  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2902  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2903  SDValue N = getValue(I.getOperand(0));
2904  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2905  I.getType());
2906  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
2907 }
2908 
2909 void SelectionDAGBuilder::visitSExt(const User &I) {
2910  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2911  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2912  SDValue N = getValue(I.getOperand(0));
2913  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2914  I.getType());
2915  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
2916 }
2917 
2918 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2919  // FPTrunc is never a no-op cast, no need to check
2920  SDValue N = getValue(I.getOperand(0));
2921  SDLoc dl = getCurSDLoc();
2922  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2923  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
2924  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
2925  DAG.getTargetConstant(
2926  0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
2927 }
2928 
2929 void SelectionDAGBuilder::visitFPExt(const User &I) {
2930  // FPExt is never a no-op cast, no need to check
2931  SDValue N = getValue(I.getOperand(0));
2932  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2933  I.getType());
2934  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
2935 }
2936 
2937 void SelectionDAGBuilder::visitFPToUI(const User &I) {
2938  // FPToUI is never a no-op cast, no need to check
2939  SDValue N = getValue(I.getOperand(0));
2940  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2941  I.getType());
2942  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
2943 }
2944 
2945 void SelectionDAGBuilder::visitFPToSI(const User &I) {
2946  // FPToSI is never a no-op cast, no need to check
2947  SDValue N = getValue(I.getOperand(0));
2948  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2949  I.getType());
2950  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
2951 }
2952 
2953 void SelectionDAGBuilder::visitUIToFP(const User &I) {
2954  // UIToFP is never a no-op cast, no need to check
2955  SDValue N = getValue(I.getOperand(0));
2956  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2957  I.getType());
2958  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
2959 }
2960 
2961 void SelectionDAGBuilder::visitSIToFP(const User &I) {
2962  // SIToFP is never a no-op cast, no need to check
2963  SDValue N = getValue(I.getOperand(0));
2964  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2965  I.getType());
2966  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
2967 }
2968 
2969 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
2970  // What to do depends on the size of the integer and the size of the pointer.
2971  // We can either truncate, zero extend, or no-op, accordingly.
2972  SDValue N = getValue(I.getOperand(0));
2973  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2974  I.getType());
2975  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
2976 }
2977 
2978 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
2979  // What to do depends on the size of the integer and the size of the pointer.
2980  // We can either truncate, zero extend, or no-op, accordingly.
2981  SDValue N = getValue(I.getOperand(0));
2982  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2983  I.getType());
2984  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
2985 }
2986 
2987 void SelectionDAGBuilder::visitBitCast(const User &I) {
2988  SDValue N = getValue(I.getOperand(0));
2989  SDLoc dl = getCurSDLoc();
2990  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2991  I.getType());
2992 
2993  // BitCast assures us that source and destination are the same size so this is
2994  // either a BITCAST or a no-op.
2995  if (DestVT != N.getValueType())
2996  setValue(&I, DAG.getNode(ISD::BITCAST, dl,
2997  DestVT, N)); // convert types.
2998  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
2999  // might fold any kind of constant expression to an integer constant and that
3000  // is not what we are looking for. Only recognize a bitcast of a genuine
3001  // constant integer as an opaque constant.
3002  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3003  setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3004  /*isOpaque*/true));
3005  else
3006  setValue(&I, N); // noop cast.
3007 }
3008 
3009 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3010  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3011  const Value *SV = I.getOperand(0);
3012  SDValue N = getValue(SV);
3013  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3014 
3015  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3016  unsigned DestAS = I.getType()->getPointerAddressSpace();
3017 
3018  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3019  N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3020 
3021  setValue(&I, N);
3022 }
3023 
3024 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3025  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3026  SDValue InVec = getValue(I.getOperand(0));
3027  SDValue InVal = getValue(I.getOperand(1));
3028  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3029  TLI.getVectorIdxTy(DAG.getDataLayout()));
3030  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3031  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3032  InVec, InVal, InIdx));
3033 }
3034 
3035 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3036  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3037  SDValue InVec = getValue(I.getOperand(0));
3038  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3039  TLI.getVectorIdxTy(DAG.getDataLayout()));
3040  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3041  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3042  InVec, InIdx));
3043 }
3044 
3045 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3046  SDValue Src1 = getValue(I.getOperand(0));
3047  SDValue Src2 = getValue(I.getOperand(1));
3048  SDLoc DL = getCurSDLoc();
3049 
3051  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3052  unsigned MaskNumElts = Mask.size();
3053 
3054  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3055  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3056  EVT SrcVT = Src1.getValueType();
3057  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3058 
3059  if (SrcNumElts == MaskNumElts) {
3060  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3061  return;
3062  }
3063 
3064  // Normalize the shuffle vector since mask and vector length don't match.
3065  if (SrcNumElts < MaskNumElts) {
3066  // Mask is longer than the source vectors. We can use concatenate vector to
3067  // make the mask and vectors lengths match.
3068 
3069  if (MaskNumElts % SrcNumElts == 0) {
3070  // Mask length is a multiple of the source vector length.
3071  // Check if the shuffle is some kind of concatenation of the input
3072  // vectors.
3073  unsigned NumConcat = MaskNumElts / SrcNumElts;
3074  bool IsConcat = true;
3075  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3076  for (unsigned i = 0; i != MaskNumElts; ++i) {
3077  int Idx = Mask[i];
3078  if (Idx < 0)
3079  continue;
3080  // Ensure the indices in each SrcVT sized piece are sequential and that
3081  // the same source is used for the whole piece.
3082  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3083  (ConcatSrcs[i / SrcNumElts] >= 0 &&
3084  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3085  IsConcat = false;
3086  break;
3087  }
3088  // Remember which source this index came from.
3089  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3090  }
3091 
3092  // The shuffle is concatenating multiple vectors together. Just emit
3093  // a CONCAT_VECTORS operation.
3094  if (IsConcat) {
3095  SmallVector<SDValue, 8> ConcatOps;
3096  for (auto Src : ConcatSrcs) {
3097  if (Src < 0)
3098  ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3099  else if (Src == 0)
3100  ConcatOps.push_back(Src1);
3101  else
3102  ConcatOps.push_back(Src2);
3103  }
3104  setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3105  return;
3106  }
3107  }
3108 
3109  unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3110  unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3111  EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3112  PaddedMaskNumElts);
3113 
3114  // Pad both vectors with undefs to make them the same length as the mask.
3115  SDValue UndefVal = DAG.getUNDEF(SrcVT);
3116 
3117  SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3118  SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3119  MOps1[0] = Src1;
3120  MOps2[0] = Src2;
3121 
3122  Src1 = Src1.isUndef()
3123  ? DAG.getUNDEF(PaddedVT)
3124  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3125  Src2 = Src2.isUndef()
3126  ? DAG.getUNDEF(PaddedVT)
3127  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3128 
3129  // Readjust mask for new input vector length.
3130  SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3131  for (unsigned i = 0; i != MaskNumElts; ++i) {
3132  int Idx = Mask[i];
3133  if (Idx >= (int)SrcNumElts)
3134  Idx -= SrcNumElts - PaddedMaskNumElts;
3135  MappedOps[i] = Idx;
3136  }
3137 
3138  SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3139 
3140  // If the concatenated vector was padded, extract a subvector with the
3141  // correct number of elements.
3142  if (MaskNumElts != PaddedMaskNumElts)
3143  Result = DAG.getNode(
3144  ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3145  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3146 
3147  setValue(&I, Result);
3148  return;
3149  }
3150 
3151  if (SrcNumElts > MaskNumElts) {
3152  // Analyze the access pattern of the vector to see if we can extract
3153  // two subvectors and do the shuffle.
3154  int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3155  bool CanExtract = true;
3156  for (int Idx : Mask) {
3157  unsigned Input = 0;
3158  if (Idx < 0)
3159  continue;
3160 
3161  if (Idx >= (int)SrcNumElts) {
3162  Input = 1;
3163  Idx -= SrcNumElts;
3164  }
3165 
3166  // If all the indices come from the same MaskNumElts sized portion of
3167  // the sources we can use extract. Also make sure the extract wouldn't
3168  // extract past the end of the source.
3169  int NewStartIdx = alignDown(Idx, MaskNumElts);
3170  if (NewStartIdx + MaskNumElts > SrcNumElts ||
3171  (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3172  CanExtract = false;
3173  // Make sure we always update StartIdx as we use it to track if all
3174  // elements are undef.
3175  StartIdx[Input] = NewStartIdx;
3176  }
3177 
3178  if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3179  setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3180  return;
3181  }
3182  if (CanExtract) {
3183  // Extract appropriate subvector and generate a vector shuffle
3184  for (unsigned Input = 0; Input < 2; ++Input) {
3185  SDValue &Src = Input == 0 ? Src1 : Src2;
3186  if (StartIdx[Input] < 0)
3187  Src = DAG.getUNDEF(VT);
3188  else {
3189  Src = DAG.getNode(
3190  ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3191  DAG.getConstant(StartIdx[Input], DL,
3192  TLI.getVectorIdxTy(DAG.getDataLayout())));
3193  }
3194  }
3195 
3196  // Calculate new mask.
3197  SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3198  for (int &Idx : MappedOps) {
3199  if (Idx >= (int)SrcNumElts)
3200  Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3201  else if (Idx >= 0)
3202  Idx -= StartIdx[0];
3203  }
3204 
3205  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3206  return;
3207  }
3208  }
3209 
3210  // We can't use either concat vectors or extract subvectors so fall back to
3211  // replacing the shuffle with extract and build vector.
3212  // to insert and build vector.
3213  EVT EltVT = VT.getVectorElementType();
3214  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3216  for (int Idx : Mask) {
3217  SDValue Res;
3218 
3219  if (Idx < 0) {
3220  Res = DAG.getUNDEF(EltVT);
3221  } else {
3222  SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3223  if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3224 
3225  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3226  EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3227  }
3228 
3229  Ops.push_back(Res);
3230  }
3231 
3232  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3233 }
3234 
3235 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3236  ArrayRef<unsigned> Indices;
3237  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3238  Indices = IV->getIndices();
3239  else
3240  Indices = cast<ConstantExpr>(&I)->getIndices();
3241 
3242  const Value *Op0 = I.getOperand(0);
3243  const Value *Op1 = I.getOperand(1);
3244  Type *AggTy = I.getType();
3245  Type *ValTy = Op1->getType();
3246  bool IntoUndef = isa<UndefValue>(Op0);
3247  bool FromUndef = isa<UndefValue>(Op1);
3248 
3249  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3250 
3251  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3252  SmallVector<EVT, 4> AggValueVTs;
3253  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3254  SmallVector<EVT, 4> ValValueVTs;
3255  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3256 
3257  unsigned NumAggValues = AggValueVTs.size();
3258  unsigned NumValValues = ValValueVTs.size();
3259  SmallVector<SDValue, 4> Values(NumAggValues);
3260 
3261  // Ignore an insertvalue that produces an empty object
3262  if (!NumAggValues) {
3263  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3264  return;
3265  }
3266 
3267  SDValue Agg = getValue(Op0);
3268  unsigned i = 0;
3269  // Copy the beginning value(s) from the original aggregate.
3270  for (; i != LinearIndex; ++i)
3271  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3272  SDValue(Agg.getNode(), Agg.getResNo() + i);
3273  // Copy values from the inserted value(s).
3274  if (NumValValues) {
3275  SDValue Val = getValue(Op1);
3276  for (; i != LinearIndex + NumValValues; ++i)
3277  Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3278  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3279  }
3280  // Copy remaining value(s) from the original aggregate.
3281  for (; i != NumAggValues; ++i)
3282  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3283  SDValue(Agg.getNode(), Agg.getResNo() + i);
3284 
3285  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3286  DAG.getVTList(AggValueVTs), Values));
3287 }
3288 
3289 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3290  ArrayRef<unsigned> Indices;
3291  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3292  Indices = EV->getIndices();
3293  else
3294  Indices = cast<ConstantExpr>(&I)->getIndices();
3295 
3296  const Value *Op0 = I.getOperand(0);
3297  Type *AggTy = Op0->getType();
3298  Type *ValTy = I.getType();
3299  bool OutOfUndef = isa<UndefValue>(Op0);
3300 
3301  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3302 
3303  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3304  SmallVector<EVT, 4> ValValueVTs;
3305  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3306 
3307  unsigned NumValValues = ValValueVTs.size();
3308 
3309  // Ignore a extractvalue that produces an empty object
3310  if (!NumValValues) {
3311  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3312  return;
3313  }
3314 
3315  SmallVector<SDValue, 4> Values(NumValValues);
3316 
3317  SDValue Agg = getValue(Op0);
3318  // Copy out the selected value(s).
3319  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3320  Values[i - LinearIndex] =
3321  OutOfUndef ?
3322  DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3323  SDValue(Agg.getNode(), Agg.getResNo() + i);
3324 
3325  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3326  DAG.getVTList(ValValueVTs), Values));
3327 }
3328 
3329 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3330  Value *Op0 = I.getOperand(0);
3331  // Note that the pointer operand may be a vector of pointers. Take the scalar
3332  // element which holds a pointer.
3333  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3334  SDValue N = getValue(Op0);
3335  SDLoc dl = getCurSDLoc();
3336 
3337  // Normalize Vector GEP - all scalar operands should be converted to the
3338  // splat vector.
3339  unsigned VectorWidth = I.getType()->isVectorTy() ?
3340  cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3341 
3342  if (VectorWidth && !N.getValueType().isVector()) {
3343  LLVMContext &Context = *DAG.getContext();
3344  EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3345  N = DAG.getSplatBuildVector(VT, dl, N);
3346  }
3347 
3348  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3349  GTI != E; ++GTI) {
3350  const Value *Idx = GTI.getOperand();
3351  if (StructType *StTy = GTI.getStructTypeOrNull()) {
3352  unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3353  if (Field) {
3354  // N = N + Offset
3355  uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3356 
3357  // In an inbounds GEP with an offset that is nonnegative even when
3358  // interpreted as signed, assume there is no unsigned overflow.
3360  if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3361  Flags.setNoUnsignedWrap(true);
3362 
3363  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3364  DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3365  }
3366  } else {
3367  MVT PtrTy =
3369  unsigned PtrSize = PtrTy.getSizeInBits();
3370  APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3371 
3372  // If this is a scalar constant or a splat vector of constants,
3373  // handle it quickly.
3374  const auto *CI = dyn_cast<ConstantInt>(Idx);
3375  if (!CI && isa<ConstantDataVector>(Idx) &&
3376  cast<ConstantDataVector>(Idx)->getSplatValue())
3377  CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3378 
3379  if (CI) {
3380  if (CI->isZero())
3381  continue;
3382  APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
3383  LLVMContext &Context = *DAG.getContext();
3384  SDValue OffsVal = VectorWidth ?
3385  DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
3386  DAG.getConstant(Offs, dl, PtrTy);
3387 
3388  // In an inbouds GEP with an offset that is nonnegative even when
3389  // interpreted as signed, assume there is no unsigned overflow.
3391  if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3392  Flags.setNoUnsignedWrap(true);
3393 
3394  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3395  continue;
3396  }
3397 
3398  // N = N + Idx * ElementSize;
3399  SDValue IdxN = getValue(Idx);
3400 
3401  if (!IdxN.getValueType().isVector() && VectorWidth) {
3402  EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3403  IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3404  }
3405 
3406  // If the index is smaller or larger than intptr_t, truncate or extend
3407  // it.
3408  IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3409 
3410  // If this is a multiply by a power of two, turn it into a shl
3411  // immediately. This is a very common case.
3412  if (ElementSize != 1) {
3413  if (ElementSize.isPowerOf2()) {
3414  unsigned Amt = ElementSize.logBase2();
3415  IdxN = DAG.getNode(ISD::SHL, dl,
3416  N.getValueType(), IdxN,
3417  DAG.getConstant(Amt, dl, IdxN.getValueType()));
3418  } else {
3419  SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3420  IdxN = DAG.getNode(ISD::MUL, dl,
3421  N.getValueType(), IdxN, Scale);
3422  }
3423  }
3424 
3425  N = DAG.getNode(ISD::ADD, dl,
3426  N.getValueType(), N, IdxN);
3427  }
3428  }
3429 
3430  setValue(&I, N);
3431 }
3432 
3433 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3434  // If this is a fixed sized alloca in the entry block of the function,
3435  // allocate it statically on the stack.
3436  if (FuncInfo.StaticAllocaMap.count(&I))
3437  return; // getValue will auto-populate this.
3438 
3439  SDLoc dl = getCurSDLoc();
3440  Type *Ty = I.getAllocatedType();
3441  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3442  auto &DL = DAG.getDataLayout();
3443  uint64_t TySize = DL.getTypeAllocSize(Ty);
3444  unsigned Align =
3445  std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3446 
3447  SDValue AllocSize = getValue(I.getArraySize());
3448 
3449  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout());
3450  if (AllocSize.getValueType() != IntPtr)
3451  AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3452 
3453  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3454  AllocSize,
3455  DAG.getConstant(TySize, dl, IntPtr));
3456 
3457  // Handle alignment. If the requested alignment is less than or equal to
3458  // the stack alignment, ignore it. If the size is greater than or equal to
3459  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3460  unsigned StackAlign =
3462  if (Align <= StackAlign)
3463  Align = 0;
3464 
3465  // Round the size of the allocation up to the stack alignment size
3466  // by add SA-1 to the size. This doesn't overflow because we're computing
3467  // an address inside an alloca.
3469  Flags.setNoUnsignedWrap(true);
3470  AllocSize = DAG.getNode(ISD::ADD, dl,
3471  AllocSize.getValueType(), AllocSize,
3472  DAG.getIntPtrConstant(StackAlign - 1, dl), Flags);
3473 
3474  // Mask out the low bits for alignment purposes.
3475  AllocSize = DAG.getNode(ISD::AND, dl,
3476  AllocSize.getValueType(), AllocSize,
3477  DAG.getIntPtrConstant(~(uint64_t)(StackAlign - 1),
3478  dl));
3479 
3480  SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align, dl) };
3481  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3482  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3483  setValue(&I, DSA);
3484  DAG.setRoot(DSA.getValue(1));
3485 
3486  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3487 }
3488 
3489 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3490  if (I.isAtomic())
3491  return visitAtomicLoad(I);
3492 
3493  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3494  const Value *SV = I.getOperand(0);
3495  if (TLI.supportSwiftError()) {
3496  // Swifterror values can come from either a function parameter with
3497  // swifterror attribute or an alloca with swifterror attribute.
3498  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3499  if (Arg->hasSwiftErrorAttr())
3500  return visitLoadFromSwiftError(I);
3501  }
3502 
3503  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3504  if (Alloca->isSwiftError())
3505  return visitLoadFromSwiftError(I);
3506  }
3507  }
3508 
3509  SDValue Ptr = getValue(SV);
3510 
3511  Type *Ty = I.getType();
3512 
3513  bool isVolatile = I.isVolatile();
3514  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3515  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3516  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3517  unsigned Alignment = I.getAlignment();
3518 
3519  AAMDNodes AAInfo;
3520  I.getAAMetadata(AAInfo);
3521  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3522 
3525  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3526  unsigned NumValues = ValueVTs.size();
3527  if (NumValues == 0)
3528  return;
3529 
3530  SDValue Root;
3531  bool ConstantMemory = false;
3532  if (isVolatile || NumValues > MaxParallelChains)
3533  // Serialize volatile loads with other side effects.
3534  Root = getRoot();
3535  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3536  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3537  // Do not serialize (non-volatile) loads of constant memory with anything.
3538  Root = DAG.getEntryNode();
3539  ConstantMemory = true;
3540  } else {
3541  // Do not serialize non-volatile loads against each other.
3542  Root = DAG.getRoot();
3543  }
3544 
3545  SDLoc dl = getCurSDLoc();
3546 
3547  if (isVolatile)
3548  Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3549 
3550  // An aggregate load cannot wrap around the address space, so offsets to its
3551  // parts don't wrap either.
3553  Flags.setNoUnsignedWrap(true);
3554 
3555  SmallVector<SDValue, 4> Values(NumValues);
3556  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3557  EVT PtrVT = Ptr.getValueType();
3558  unsigned ChainI = 0;
3559  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3560  // Serializing loads here may result in excessive register pressure, and
3561  // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3562  // could recover a bit by hoisting nodes upward in the chain by recognizing
3563  // they are side-effect free or do not alias. The optimizer should really
3564  // avoid this case by converting large object/array copies to llvm.memcpy
3565  // (MaxParallelChains should always remain as failsafe).
3566  if (ChainI == MaxParallelChains) {
3567  assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3568  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3569  makeArrayRef(Chains.data(), ChainI));
3570  Root = Chain;
3571  ChainI = 0;
3572  }
3573  SDValue A = DAG.getNode(ISD::ADD, dl,
3574  PtrVT, Ptr,
3575  DAG.getConstant(Offsets[i], dl, PtrVT),
3576  Flags);
3577  auto MMOFlags = MachineMemOperand::MONone;
3578  if (isVolatile)
3579  MMOFlags |= MachineMemOperand::MOVolatile;
3580  if (isNonTemporal)
3582  if (isInvariant)
3583  MMOFlags |= MachineMemOperand::MOInvariant;
3584  if (isDereferenceable)
3586  MMOFlags |= TLI.getMMOFlags(I);
3587 
3588  SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3589  MachinePointerInfo(SV, Offsets[i]), Alignment,
3590  MMOFlags, AAInfo, Ranges);
3591 
3592  Values[i] = L;
3593  Chains[ChainI] = L.getValue(1);
3594  }
3595 
3596  if (!ConstantMemory) {
3597  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3598  makeArrayRef(Chains.data(), ChainI));
3599  if (isVolatile)
3600  DAG.setRoot(Chain);
3601  else
3602  PendingLoads.push_back(Chain);
3603  }
3604 
3605  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3606  DAG.getVTList(ValueVTs), Values));
3607 }
3608 
3609 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3611  "call visitStoreToSwiftError when backend supports swifterror");
3612 
3615  const Value *SrcV = I.getOperand(0);
3617  SrcV->getType(), ValueVTs, &Offsets);
3618  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3619  "expect a single EVT for swifterror");
3620 
3621  SDValue Src = getValue(SrcV);
3622  // Create a virtual register, then update the virtual register.
3623  unsigned VReg; bool CreatedVReg;
3624  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3625  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3626  // Chain can be getRoot or getControlRoot.
3627  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3628  SDValue(Src.getNode(), Src.getResNo()));
3629  DAG.setRoot(CopyNode);
3630  if (CreatedVReg)
3631  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3632 }
3633 
3634 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3636  "call visitLoadFromSwiftError when backend supports swifterror");
3637 
3638  assert(!I.isVolatile() &&
3639  I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3641  "Support volatile, non temporal, invariant for load_from_swift_error");
3642 
3643  const Value *SV = I.getOperand(0);
3644  Type *Ty = I.getType();
3645  AAMDNodes AAInfo;
3646  I.getAAMetadata(AAInfo);
3647  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3648  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3649  "load_from_swift_error should not be constant memory");
3650 
3654  ValueVTs, &Offsets);
3655  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3656  "expect a single EVT for swifterror");
3657 
3658  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3659  SDValue L = DAG.getCopyFromReg(
3660  getRoot(), getCurSDLoc(),
3661  FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3662  ValueVTs[0]);
3663 
3664  setValue(&I, L);
3665 }
3666 
3667 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3668  if (I.isAtomic())
3669  return visitAtomicStore(I);
3670 
3671  const Value *SrcV = I.getOperand(0);
3672  const Value *PtrV = I.getOperand(1);
3673 
3674  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3675  if (TLI.supportSwiftError()) {
3676  // Swifterror values can come from either a function parameter with
3677  // swifterror attribute or an alloca with swifterror attribute.
3678  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3679  if (Arg->hasSwiftErrorAttr())
3680  return visitStoreToSwiftError(I);
3681  }
3682 
3683  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3684  if (Alloca->isSwiftError())
3685  return visitStoreToSwiftError(I);
3686  }
3687  }
3688 
3692  SrcV->getType(), ValueVTs, &Offsets);
3693  unsigned NumValues = ValueVTs.size();
3694  if (NumValues == 0)
3695  return;
3696 
3697  // Get the lowered operands. Note that we do this after
3698  // checking if NumResults is zero, because with zero results
3699  // the operands won't have values in the map.
3700  SDValue Src = getValue(SrcV);
3701  SDValue Ptr = getValue(PtrV);
3702 
3703  SDValue Root = getRoot();
3704  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3705  SDLoc dl = getCurSDLoc();
3706  EVT PtrVT = Ptr.getValueType();
3707  unsigned Alignment = I.getAlignment();
3708  AAMDNodes AAInfo;
3709  I.getAAMetadata(AAInfo);
3710 
3711  auto MMOFlags = MachineMemOperand::MONone;
3712  if (I.isVolatile())
3713  MMOFlags |= MachineMemOperand::MOVolatile;
3714  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3716  MMOFlags |= TLI.getMMOFlags(I);
3717 
3718  // An aggregate load cannot wrap around the address space, so offsets to its
3719  // parts don't wrap either.
3721  Flags.setNoUnsignedWrap(true);
3722 
3723  unsigned ChainI = 0;
3724  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3725  // See visitLoad comments.
3726  if (ChainI == MaxParallelChains) {
3727  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3728  makeArrayRef(Chains.data(), ChainI));
3729  Root = Chain;
3730  ChainI = 0;
3731  }
3732  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3733  DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3734  SDValue St = DAG.getStore(
3735  Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3736  MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3737  Chains[ChainI] = St;
3738  }
3739 
3740  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3741  makeArrayRef(Chains.data(), ChainI));
3742  DAG.setRoot(StoreNode);
3743 }
3744 
3745 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3746  bool IsCompressing) {
3747  SDLoc sdl = getCurSDLoc();
3748 
3749  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3750  unsigned& Alignment) {
3751  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3752  Src0 = I.getArgOperand(0);
3753  Ptr = I.getArgOperand(1);
3754  Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3755  Mask = I.getArgOperand(3);
3756  };
3757  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3758  unsigned& Alignment) {
3759  // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3760  Src0 = I.getArgOperand(0);
3761  Ptr = I.getArgOperand(1);
3762  Mask = I.getArgOperand(2);
3763  Alignment = 0;
3764  };
3765 
3766  Value *PtrOperand, *MaskOperand, *Src0Operand;
3767  unsigned Alignment;
3768  if (IsCompressing)
3769  getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3770  else
3771  getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3772 
3773  SDValue Ptr = getValue(PtrOperand);
3774  SDValue Src0 = getValue(Src0Operand);
3775  SDValue Mask = getValue(MaskOperand);
3776 
3777  EVT VT = Src0.getValueType();
3778  if (!Alignment)
3779  Alignment = DAG.getEVTAlignment(VT);
3780 
3781  AAMDNodes AAInfo;
3782  I.getAAMetadata(AAInfo);
3783 
3784  MachineMemOperand *MMO =
3785  DAG.getMachineFunction().
3786  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3788  Alignment, AAInfo);
3789  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3790  MMO, false /* Truncating */,
3791  IsCompressing);
3792  DAG.setRoot(StoreNode);
3793  setValue(&I, StoreNode);
3794 }
3795 
3796 // Get a uniform base for the Gather/Scatter intrinsic.
3797 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3798 // We try to represent it as a base pointer + vector of indices.
3799 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3800 // The first operand of the GEP may be a single pointer or a vector of pointers
3801 // Example:
3802 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3803 // or
3804 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3805 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3806 //
3807 // When the first GEP operand is a single pointer - it is the uniform base we
3808 // are looking for. If first operand of the GEP is a splat vector - we
3809 // extract the spalt value and use it as a uniform base.
3810 // In all other cases the function returns 'false'.
3811 //
3812 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3813  SelectionDAGBuilder* SDB) {
3814 
3815  SelectionDAG& DAG = SDB->DAG;
3816  LLVMContext &Context = *DAG.getContext();
3817 
3818  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3820  if (!GEP || GEP->getNumOperands() > 2)
3821  return false;
3822 
3823  const Value *GEPPtr = GEP->getPointerOperand();
3824  if (!GEPPtr->getType()->isVectorTy())
3825  Ptr = GEPPtr;
3826  else if (!(Ptr = getSplatValue(GEPPtr)))
3827  return false;
3828 
3829  Value *IndexVal = GEP->getOperand(1);
3830 
3831  // The operands of the GEP may be defined in another basic block.
3832  // In this case we'll not find nodes for the operands.
3833  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3834  return false;
3835 
3836  Base = SDB->getValue(Ptr);
3837  Index = SDB->getValue(IndexVal);
3838 
3839  // Suppress sign extension.
3840  if (SExtInst* Sext = dyn_cast<SExtInst>(IndexVal)) {
3841  if (SDB->findValue(Sext->getOperand(0))) {
3842  IndexVal = Sext->getOperand(0);
3843  Index = SDB->getValue(IndexVal);
3844  }
3845  }
3846  if (!Index.getValueType().isVector()) {
3847  unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3848  EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3849  Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3850  }
3851  return true;
3852 }
3853 
3854 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3855  SDLoc sdl = getCurSDLoc();
3856 
3857  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3858  const Value *Ptr = I.getArgOperand(1);
3859  SDValue Src0 = getValue(I.getArgOperand(0));
3860  SDValue Mask = getValue(I.getArgOperand(3));
3861  EVT VT = Src0.getValueType();
3862  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3863  if (!Alignment)
3864  Alignment = DAG.getEVTAlignment(VT);
3865  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3866 
3867  AAMDNodes AAInfo;
3868  I.getAAMetadata(AAInfo);
3869 
3870  SDValue Base;
3871  SDValue Index;
3872  const Value *BasePtr = Ptr;
3873  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
3874 
3875  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
3877  getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3878  MachineMemOperand::MOStore, VT.getStoreSize(),
3879  Alignment, AAInfo);
3880  if (!UniformBase) {
3881  Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3882  Index = getValue(Ptr);
3883  }
3884  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
3885  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3886  Ops, MMO);
3887  DAG.setRoot(Scatter);
3888  setValue(&I, Scatter);
3889 }
3890 
3891 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
3892  SDLoc sdl = getCurSDLoc();
3893 
3894  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3895  unsigned& Alignment) {
3896  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
3897  Ptr = I.getArgOperand(0);
3898  Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3899  Mask = I.getArgOperand(2);
3900  Src0 = I.getArgOperand(3);
3901  };
3902  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3903  unsigned& Alignment) {
3904  // @llvm.masked.expandload.*(Ptr, Mask, Src0)
3905  Ptr = I.getArgOperand(0);
3906  Alignment = 0;
3907  Mask = I.getArgOperand(1);
3908  Src0 = I.getArgOperand(2);
3909  };
3910 
3911  Value *PtrOperand, *MaskOperand, *Src0Operand;
3912  unsigned Alignment;
3913  if (IsExpanding)
3914  getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3915  else
3916  getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3917 
3918  SDValue Ptr = getValue(PtrOperand);
3919  SDValue Src0 = getValue(Src0Operand);
3920  SDValue Mask = getValue(MaskOperand);
3921 
3922  EVT VT = Src0.getValueType();
3923  if (!Alignment)
3924  Alignment = DAG.getEVTAlignment(VT);
3925 
3926  AAMDNodes AAInfo;
3927  I.getAAMetadata(AAInfo);
3928  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3929 
3930  // Do not serialize masked loads of constant memory with anything.
3931  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
3932  PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
3933  SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
3934 
3935  MachineMemOperand *MMO =
3936  DAG.getMachineFunction().
3937  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3939  Alignment, AAInfo, Ranges);
3940 
3941  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
3942  ISD::NON_EXTLOAD, IsExpanding);
3943  if (AddToChain) {
3944  SDValue OutChain = Load.getValue(1);
3945  DAG.setRoot(OutChain);
3946  }
3947  setValue(&I, Load);
3948 }
3949 
3950 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
3951  SDLoc sdl = getCurSDLoc();
3952 
3953  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
3954  const Value *Ptr = I.getArgOperand(0);
3955  SDValue Src0 = getValue(I.getArgOperand(3));
3956  SDValue Mask = getValue(I.getArgOperand(2));
3957 
3958  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3959  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3960  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
3961  if (!Alignment)
3962  Alignment = DAG.getEVTAlignment(VT);
3963 
3964  AAMDNodes AAInfo;
3965  I.getAAMetadata(AAInfo);
3966  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3967 
3968  SDValue Root = DAG.getRoot();
3969  SDValue Base;
3970  SDValue Index;
3971  const Value *BasePtr = Ptr;
3972  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
3973  bool ConstantMemory = false;
3974  if (UniformBase &&
3975  AA && AA->pointsToConstantMemory(MemoryLocation(
3976  BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
3977  AAInfo))) {
3978  // Do not serialize (non-volatile) loads of constant memory with anything.
3979  Root = DAG.getEntryNode();
3980  ConstantMemory = true;
3981  }
3982 
3983  MachineMemOperand *MMO =
3984  DAG.getMachineFunction().
3985  getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
3987  Alignment, AAInfo, Ranges);
3988 
3989  if (!UniformBase) {
3990  Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3991  Index = getValue(Ptr);
3992  }
3993  SDValue Ops[] = { Root, Src0, Mask, Base, Index };
3994  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
3995  Ops, MMO);
3996 
3997  SDValue OutChain = Gather.getValue(1);
3998  if (!ConstantMemory)
3999  PendingLoads.push_back(OutChain);
4000  setValue(&I, Gather);
4001 }
4002 
4003 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4004  SDLoc dl = getCurSDLoc();
4005  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4006  AtomicOrdering FailureOrder = I.getFailureOrdering();
4007  SyncScope::ID SSID = I.getSyncScopeID();
4008 
4009  SDValue InChain = getRoot();
4010 
4011  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4012  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4013  SDValue L = DAG.getAtomicCmpSwap(
4014  ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4015  getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4017  /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4018 
4019  SDValue OutChain = L.getValue(2);
4020 
4021  setValue(&I, L);
4022  DAG.setRoot(OutChain);
4023 }
4024 
4025 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4026  SDLoc dl = getCurSDLoc();
4027  ISD::NodeType NT;
4028  switch (I.getOperation()) {
4029  default: llvm_unreachable("Unknown atomicrmw operation");
4030  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4031  case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4032  case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4033  case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4034  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4035  case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4036  case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4037  case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4038  case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4039  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4040  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4041  }
4042  AtomicOrdering Order = I.getOrdering();
4043  SyncScope::ID SSID = I.getSyncScopeID();
4044 
4045  SDValue InChain = getRoot();
4046 
4047  SDValue L =
4048  DAG.getAtomic(NT, dl,
4049  getValue(I.getValOperand()).getSimpleValueType(),
4050  InChain,
4051  getValue(I.getPointerOperand()),
4052  getValue(I.getValOperand()),
4053  I.getPointerOperand(),
4054  /* Alignment=*/ 0, Order, SSID);
4055 
4056  SDValue OutChain = L.getValue(1);
4057 
4058  setValue(&I, L);
4059  DAG.setRoot(OutChain);
4060 }
4061 
4062 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4063  SDLoc dl = getCurSDLoc();
4064  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4065  SDValue Ops[3];
4066  Ops[0] = getRoot();
4067  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4068  TLI.getFenceOperandTy(DAG.getDataLayout()));
4069  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4070  TLI.getFenceOperandTy(DAG.getDataLayout()));
4071  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4072 }
4073 
4074 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4075  SDLoc dl = getCurSDLoc();
4076  AtomicOrdering Order = I.getOrdering();
4077  SyncScope::ID SSID = I.getSyncScopeID();
4078 
4079  SDValue InChain = getRoot();
4080 
4081  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4082  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4083 
4084  if (I.getAlignment() < VT.getSizeInBits() / 8)
4085  report_fatal_error("Cannot generate unaligned atomic load");
4086 
4087  MachineMemOperand *MMO =
4088  DAG.getMachineFunction().
4089  getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4092  VT.getStoreSize(),
4093  I.getAlignment() ? I.getAlignment() :
4094  DAG.getEVTAlignment(VT),
4095  AAMDNodes(), nullptr, SSID, Order);
4096 
4097  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4098  SDValue L =
4099  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4100  getValue(I.getPointerOperand()), MMO);
4101 
4102  SDValue OutChain = L.getValue(1);
4103 
4104  setValue(&I, L);
4105  DAG.setRoot(OutChain);
4106 }
4107 
4108 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4109  SDLoc dl = getCurSDLoc();
4110 
4111  AtomicOrdering Order = I.getOrdering();
4112  SyncScope::ID SSID = I.getSyncScopeID();
4113 
4114  SDValue InChain = getRoot();
4115 
4116  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4117  EVT VT =
4119 
4120  if (I.getAlignment() < VT.getSizeInBits() / 8)
4121  report_fatal_error("Cannot generate unaligned atomic store");
4122 
4123  SDValue OutChain =
4124  DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4125  InChain,
4126  getValue(I.getPointerOperand()),
4127  getValue(I.getValueOperand()),
4129  Order, SSID);
4130 
4131  DAG.setRoot(OutChain);
4132 }
4133 
4134 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4135 /// node.
4136 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4137  unsigned Intrinsic) {
4138  // Ignore the callsite's attributes. A specific call site may be marked with
4139  // readnone, but the lowering code will expect the chain based on the
4140  // definition.
4141  const Function *F = I.getCalledFunction();
4142  bool HasChain = !F->doesNotAccessMemory();
4143  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4144 
4145  // Build the operand list.
4147  if (HasChain) { // If this intrinsic has side-effects, chainify it.
4148  if (OnlyLoad) {
4149  // We don't need to serialize loads against other loads.
4150  Ops.push_back(DAG.getRoot());
4151  } else {
4152  Ops.push_back(getRoot());
4153  }
4154  }
4155 
4156  // Info is set by getTgtMemInstrinsic
4157  TargetLowering::IntrinsicInfo Info;
4158  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4159  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
4160 
4161  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4162  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4163  Info.opc == ISD::INTRINSIC_W_CHAIN)
4164  Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4165  TLI.getPointerTy(DAG.getDataLayout())));
4166 
4167  // Add all operands of the call to the operand list.
4168  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4169  SDValue Op = getValue(I.getArgOperand(i));
4170  Ops.push_back(Op);
4171  }
4172 
4174  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4175 
4176  if (HasChain)
4177  ValueVTs.push_back(MVT::Other);
4178 
4179  SDVTList VTs = DAG.getVTList(ValueVTs);
4180 
4181  // Create the node.
4182  SDValue Result;
4183  if (IsTgtIntrinsic) {
4184  // This is target intrinsic that touches memory
4185  Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
4186  VTs, Ops, Info.memVT,
4187  MachinePointerInfo(Info.ptrVal, Info.offset),
4188  Info.align, Info.vol,
4189  Info.readMem, Info.writeMem, Info.size);
4190  } else if (!HasChain) {
4191  Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4192  } else if (!I.getType()->isVoidTy()) {
4193  Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4194  } else {
4195  Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4196  }
4197 
4198  if (HasChain) {
4199  SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4200  if (OnlyLoad)
4201  PendingLoads.push_back(Chain);
4202  else
4203  DAG.setRoot(Chain);
4204  }
4205 
4206  if (!I.getType()->isVoidTy()) {
4207  if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4208  EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4209  Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4210  } else
4211  Result = lowerRangeToAssertZExt(DAG, I, Result);
4212 
4213  setValue(&I, Result);
4214  }
4215 }
4216 
4217 /// GetSignificand - Get the significand and build it into a floating-point
4218 /// number with exponent of 1:
4219 ///
4220 /// Op = (Op & 0x007fffff) | 0x3f800000;
4221 ///
4222 /// where Op is the hexadecimal representation of floating point value.
4224  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4225  DAG.getConstant(0x007fffff, dl, MVT::i32));
4226  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4227  DAG.getConstant(0x3f800000, dl, MVT::i32));
4228  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4229 }
4230 
4231 /// GetExponent - Get the exponent:
4232 ///
4233 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4234 ///
4235 /// where Op is the hexadecimal representation of floating point value.
4237  const TargetLowering &TLI, const SDLoc &dl) {
4238  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4239  DAG.getConstant(0x7f800000, dl, MVT::i32));
4240  SDValue t1 = DAG.getNode(
4241  ISD::SRL, dl, MVT::i32, t0,
4242  DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4243  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4244  DAG.getConstant(127, dl, MVT::i32));
4245  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4246 }
4247 
4248 /// getF32Constant - Get 32-bit floating point constant.
4249 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4250  const SDLoc &dl) {
4251  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4252  MVT::f32);
4253 }
4254 
4256  SelectionDAG &DAG) {
4257  // TODO: What fast-math-flags should be set on the floating-point nodes?
4258 
4259  // IntegerPartOfX = ((int32_t)(t0);
4260  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4261 
4262  // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4263  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4264  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4265 
4266  // IntegerPartOfX <<= 23;
4267  IntegerPartOfX = DAG.getNode(
4268  ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4270  DAG.getDataLayout())));
4271 
4272  SDValue TwoToFractionalPartOfX;
4273  if (LimitFloatPrecision <= 6) {
4274  // For floating-point precision of 6:
4275  //
4276  // TwoToFractionalPartOfX =
4277  // 0.997535578f +
4278  // (0.735607626f + 0.252464424f * x) * x;
4279  //
4280  // error 0.0144103317, which is 6 bits
4281  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4282  getF32Constant(DAG, 0x3e814304, dl));
4283  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4284  getF32Constant(DAG, 0x3f3c50c8, dl));
4285  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4286  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4287  getF32Constant(DAG, 0x3f7f5e7e, dl));
4288  } else if (LimitFloatPrecision <= 12) {
4289  // For floating-point precision of 12:
4290  //
4291  // TwoToFractionalPartOfX =
4292  // 0.999892986f +
4293  // (0.696457318f +
4294  // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4295  //
4296  // error 0.000107046256, which is 13 to 14 bits
4297  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4298  getF32Constant(DAG, 0x3da235e3, dl));
4299  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4300  getF32Constant(DAG, 0x3e65b8f3, dl));
4301  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4302  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4303  getF32Constant(DAG, 0x3f324b07, dl));
4304  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4305  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4306  getF32Constant(DAG, 0x3f7ff8fd, dl));
4307  } else { // LimitFloatPrecision <= 18
4308  // For floating-point precision of 18:
4309  //
4310  // TwoToFractionalPartOfX =
4311  // 0.999999982f +
4312  // (0.693148872f +
4313  // (0.240227044f +
4314  // (0.554906021e-1f +
4315  // (0.961591928e-2f +
4316  // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4317  // error 2.47208000*10^(-7), which is better than 18 bits
4318  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4319  getF32Constant(DAG, 0x3924b03e, dl));
4320  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4321  getF32Constant(DAG, 0x3ab24b87, dl));
4322  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4323  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4324  getF32Constant(DAG, 0x3c1d8c17, dl));
4325  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4326  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4327  getF32Constant(DAG, 0x3d634a1d, dl));
4328  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4329  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4330  getF32Constant(DAG, 0x3e75fe14, dl));
4331  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4332  SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4333  getF32Constant(DAG, 0x3f317234, dl));
4334  SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4335  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4336  getF32Constant(DAG, 0x3f800000, dl));
4337  }
4338 
4339  // Add the exponent into the result in integer domain.
4340  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4341  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4342  DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4343 }
4344 
4345 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4346 /// limited-precision mode.
4347 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4348  const TargetLowering &TLI) {
4349  if (Op.getValueType() == MVT::f32 &&
4350  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4351 
4352  // Put the exponent in the right bit position for later addition to the
4353  // final result:
4354  //
4355  // #define LOG2OFe 1.4426950f
4356  // t0 = Op * LOG2OFe
4357 
4358  // TODO: What fast-math-flags should be set here?
4359  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4360  getF32Constant(DAG, 0x3fb8aa3b, dl));
4361  return getLimitedPrecisionExp2(t0, dl, DAG);
4362  }
4363 
4364  // No special expansion.
4365  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4366 }
4367 
4368 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4369 /// limited-precision mode.
4370 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4371  const TargetLowering &TLI) {
4372 
4373  // TODO: What fast-math-flags should be set on the floating-point nodes?
4374 
4375  if (Op.getValueType() == MVT::f32 &&
4376  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4377  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4378 
4379  // Scale the exponent by log(2) [0.69314718f].
4380  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4381  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4382  getF32Constant(DAG, 0x3f317218, dl));
4383 
4384  // Get the significand and build it into a floating-point number with
4385  // exponent of 1.
4386  SDValue X = GetSignificand(DAG, Op1, dl);
4387 
4388  SDValue LogOfMantissa;
4389  if (LimitFloatPrecision <= 6) {
4390  // For floating-point precision of 6:
4391  //
4392  // LogofMantissa =
4393  // -1.1609546f +
4394  // (1.4034025f - 0.23903021f * x) * x;
4395  //
4396  // error 0.0034276066, which is better than 8 bits
4397  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4398  getF32Constant(DAG, 0xbe74c456, dl));
4399  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4400  getF32Constant(DAG, 0x3fb3a2b1, dl));
4401  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4402  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4403  getF32Constant(DAG, 0x3f949a29, dl));
4404  } else if (LimitFloatPrecision <= 12) {
4405  // For floating-point precision of 12:
4406  //
4407  // LogOfMantissa =
4408  // -1.7417939f +
4409  // (2.8212026f +
4410  // (-1.4699568f +
4411  // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4412  //
4413  // error 0.000061011436, which is 14 bits
4414  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4415  getF32Constant(DAG, 0xbd67b6d6, dl));
4416  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4417  getF32Constant(DAG, 0x3ee4f4b8, dl));
4418  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4419  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4420  getF32Constant(DAG, 0x3fbc278b, dl));
4421  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4422  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4423  getF32Constant(DAG, 0x40348e95, dl));
4424  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4425  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4426  getF32Constant(DAG, 0x3fdef31a, dl));
4427  } else { // LimitFloatPrecision <= 18
4428  // For floating-point precision of 18:
4429  //
4430  // LogOfMantissa =
4431  // -2.1072184f +
4432  // (4.2372794f +
4433  // (-3.7029485f +
4434  // (2.2781945f +
4435  // (-0.87823314f +
4436  // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4437  //
4438  // error 0.0000023660568, which is better than 18 bits
4439  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4440  getF32Constant(DAG, 0xbc91e5ac, dl));
4441  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4442  getF32Constant(DAG, 0x3e4350aa, dl));
4443  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4444  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4445  getF32Constant(DAG, 0x3f60d3e3, dl));
4446  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4447  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4448  getF32Constant(DAG, 0x4011cdf0, dl));
4449  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4450  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4451  getF32Constant(DAG, 0x406cfd1c, dl));
4452  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4453  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4454  getF32Constant(DAG, 0x408797cb, dl));
4455  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4456  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4457  getF32Constant(DAG, 0x4006dcab, dl));
4458  }
4459 
4460  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4461  }
4462 
4463  // No special expansion.
4464  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4465 }
4466 
4467 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4468 /// limited-precision mode.
4469 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4470  const TargetLowering &TLI) {
4471 
4472  // TODO: What fast-math-flags should be set on the floating-point nodes?
4473 
4474  if (Op.getValueType() == MVT::f32 &&
4475  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4476  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4477 
4478  // Get the exponent.
4479  SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4480 
4481  // Get the significand and build it into a floating-point number with
4482  // exponent of 1.
4483  SDValue X = GetSignificand(DAG, Op1, dl);
4484 
4485  // Different possible minimax approximations of significand in
4486  // floating-point for various degrees of accuracy over [1,2].
4487  SDValue Log2ofMantissa;
4488  if (LimitFloatPrecision <= 6) {
4489  // For floating-point precision of 6:
4490  //
4491  // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4492  //
4493  // error 0.0049451742, which is more than 7 bits
4494  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4495  getF32Constant(DAG, 0xbeb08fe0, dl));
4496  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4497  getF32Constant(DAG, 0x40019463, dl));
4498  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4499  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4500  getF32Constant(DAG, 0x3fd6633d, dl));
4501  } else if (LimitFloatPrecision <= 12) {
4502  // For floating-point precision of 12:
4503  //
4504  // Log2ofMantissa =
4505  // -2.51285454f +
4506  // (4.07009056f +
4507  // (-2.12067489f +
4508  // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4509  //
4510  // error 0.0000876136000, which is better than 13 bits
4511  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4512  getF32Constant(DAG, 0xbda7262e, dl));
4513  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4514  getF32Constant(DAG, 0x3f25280b, dl));
4515  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4516  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4517  getF32Constant(DAG, 0x4007b923, dl));
4518  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4519  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4520  getF32Constant(DAG, 0x40823e2f, dl));
4521  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4522  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4523  getF32Constant(DAG, 0x4020d29c, dl));
4524  } else { // LimitFloatPrecision <= 18
4525  // For floating-point precision of 18:
4526  //
4527  // Log2ofMantissa =
4528  // -3.0400495f +
4529  // (6.1129976f +
4530  // (-5.3420409f +
4531  // (3.2865683f +
4532  // (-1.2669343f +
4533  // (0.27515199f -
4534  // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4535  //
4536  // error 0.0000018516, which is better than 18 bits
4537  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4538  getF32Constant(DAG, 0xbcd2769e, dl));
4539  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4540  getF32Constant(DAG, 0x3e8ce0b9, dl));
4541  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4542  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4543  getF32Constant(DAG, 0x3fa22ae7, dl));
4544  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4545  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4546  getF32Constant(DAG, 0x40525723, dl));
4547  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4548  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4549  getF32Constant(DAG, 0x40aaf200, dl));
4550  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4551  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4552  getF32Constant(DAG, 0x40c39dad, dl));
4553  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4554  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4555  getF32Constant(DAG, 0x4042902c, dl));
4556  }
4557 
4558  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4559  }
4560 
4561  // No special expansion.
4562  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4563 }
4564 
4565 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4566 /// limited-precision mode.
4568  const TargetLowering &TLI) {
4569 
4570  // TODO: What fast-math-flags should be set on the floating-point nodes?
4571 
4572  if (Op.getValueType() == MVT::f32 &&
4573  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4574  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4575 
4576  // Scale the exponent by log10(2) [0.30102999f].
4577  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4578  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4579  getF32Constant(DAG, 0x3e9a209a, dl));
4580 
4581  // Get the significand and build it into a floating-point number with
4582  // exponent of 1.
4583  SDValue X = GetSignificand(DAG, Op1, dl);
4584 
4585  SDValue Log10ofMantissa;
4586  if (LimitFloatPrecision <= 6) {
4587  // For floating-point precision of 6:
4588  //
4589  // Log10ofMantissa =
4590  // -0.50419619f +
4591  // (0.60948995f - 0.10380950f * x) * x;
4592  //
4593  // error 0.0014886165, which is 6 bits
4594  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4595  getF32Constant(DAG, 0xbdd49a13, dl));
4596  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4597  getF32Constant(DAG, 0x3f1c0789, dl));
4598  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4599  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4600  getF32Constant(DAG, 0x3f011300, dl));
4601  } else if (LimitFloatPrecision <= 12) {
4602  // For floating-point precision of 12:
4603  //
4604  // Log10ofMantissa =
4605  // -0.64831180f +
4606  // (0.91751397f +
4607  // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4608  //
4609  // error 0.00019228036, which is better than 12 bits
4610  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4611  getF32Constant(DAG, 0x3d431f31, dl));
4612  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4613  getF32Constant(DAG, 0x3ea21fb2, dl));
4614  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4615  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4616  getF32Constant(DAG, 0x3f6ae232, dl));
4617  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4618  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4619  getF32Constant(DAG, 0x3f25f7c3, dl));
4620  } else { // LimitFloatPrecision <= 18
4621  // For floating-point precision of 18:
4622  //
4623  // Log10ofMantissa =
4624  // -0.84299375f +
4625  // (1.5327582f +
4626  // (-1.0688956f +
4627  // (0.49102474f +
4628  // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4629  //
4630  // error 0.0000037995730, which is better than 18 bits
4631  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4632  getF32Constant(DAG, 0x3c5d51ce, dl));
4633  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4634  getF32Constant(DAG, 0x3e00685a, dl));
4635  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4636  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4637  getF32Constant(DAG, 0x3efb6798, dl));
4638  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4639  SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4640  getF32Constant(DAG, 0x3f88d192, dl));
4641  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4642  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4643  getF32Constant(DAG, 0x3fc4316c, dl));
4644  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4645  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4646  getF32Constant(DAG, 0x3f57ce70, dl));
4647  }
4648 
4649  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4650  }
4651 
4652  // No special expansion.
4653  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4654 }
4655 
4656 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4657 /// limited-precision mode.
4658 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4659  const TargetLowering &TLI) {
4660  if (Op.getValueType() == MVT::f32 &&
4661  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4662  return getLimitedPrecisionExp2(Op, dl, DAG);
4663 
4664  // No special expansion.
4665  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4666 }
4667 
4668 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4669 /// limited-precision mode with x == 10.0f.
4670 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4671  SelectionDAG &DAG, const TargetLowering &TLI) {
4672  bool IsExp10 = false;
4673  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4674  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4675  if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4676  APFloat Ten(10.0f);
4677  IsExp10 = LHSC->isExactlyValue(Ten);
4678  }
4679  }
4680 
4681  // TODO: What fast-math-flags should be set on the FMUL node?
4682  if (IsExp10) {
4683  // Put the exponent in the right bit position for later addition to the
4684  // final result:
4685  //
4686  // #define LOG2OF10 3.3219281f
4687  // t0 = Op * LOG2OF10;
4688  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4689  getF32Constant(DAG, 0x40549a78, dl));
4690  return getLimitedPrecisionExp2(t0, dl, DAG);
4691  }
4692 
4693  // No special expansion.
4694  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4695 }
4696 
4697 
4698 /// ExpandPowI - Expand a llvm.powi intrinsic.
4699 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4700  SelectionDAG &DAG) {
4701  // If RHS is a constant, we can expand this out to a multiplication tree,
4702  // otherwise we end up lowering to a call to __powidf2 (for example). When
4703  // optimizing for size, we only want to do this if the expansion would produce
4704  // a small number of multiplies, otherwise we do the full expansion.
4705  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4706  // Get the exponent as a positive value.
4707  unsigned Val = RHSC->getSExtValue();
4708  if ((int)Val < 0) Val = -Val;
4709 
4710  // powi(x, 0) -> 1.0
4711  if (Val == 0)
4712  return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4713 
4714  const Function *F = DAG.getMachineFunction().getFunction();
4715  if (!F->optForSize() ||
4716  // If optimizing for size, don't insert too many multiplies.
4717  // This inserts up to 5 multiplies.
4718  countPopulation(Val) + Log2_32(Val) < 7) {
4719  // We use the simple binary decomposition method to generate the multiply
4720  // sequence. There are more optimal ways to do this (for example,
4721  // powi(x,15) generates one more multiply than it should), but this has
4722  // the benefit of being both really simple and much better than a libcall.
4723  SDValue Res; // Logically starts equal to 1.0
4724  SDValue CurSquare = LHS;
4725  // TODO: Intrinsics should have fast-math-flags that propagate to these
4726  // nodes.
4727  while (Val) {
4728  if (Val & 1) {
4729  if (Res.getNode())
4730  Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4731  else
4732  Res = CurSquare; // 1.0*CurSquare.
4733  }
4734 
4735  CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4736  CurSquare, CurSquare);
4737  Val >>= 1;
4738  }
4739 
4740  // If the original was negative, invert the result, producing 1/(x*x*x).
4741  if (RHSC->getSExtValue() < 0)
4742  Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4743  DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4744  return Res;
4745  }
4746  }
4747 
4748  // Otherwise, expand to a libcall.
4749  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4750 }
4751 
4752 // getUnderlyingArgReg - Find underlying register used for a truncated or
4753 // bitcasted argument.
4754 static unsigned getUnderlyingArgReg(const SDValue &N) {
4755  switch (N.getOpcode()) {
4756  case ISD::CopyFromReg:
4757  return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4758  case ISD::BITCAST:
4759  case ISD::AssertZext:
4760  case ISD::AssertSext:
4761  case ISD::TRUNCATE:
4762  return getUnderlyingArgReg(N.getOperand(0));
4763  default:
4764  return 0;
4765  }
4766 }
4767 
4768 /// If the DbgValueInst is a dbg_value of a function argument, create the
4769 /// corresponding DBG_VALUE machine instruction for it now. At the end of
4770 /// instruction selection, they will be inserted to the entry BB.
4771 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4772  const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4773  DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4774  const Argument *Arg = dyn_cast<Argument>(V);
4775  if (!Arg)
4776  return false;
4777 
4778  MachineFunction &MF = DAG.getMachineFunction();
4779  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4780 
4781  // Ignore inlined function arguments here.
4782  //
4783  // FIXME: Should we be checking DL->inlinedAt() to determine this?
4784  if (!Variable->getScope()->getSubprogram()->describes(MF.getFunction()))
4785  return false;
4786 
4787  bool IsIndirect = false;
4789  // Some arguments' frame index is recorded during argument lowering.
4790  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4791  if (FI != INT_MAX)
4792  Op = MachineOperand::CreateFI(FI);
4793 
4794  if (!Op && N.getNode()) {
4795  unsigned Reg = getUnderlyingArgReg(N);
4796  if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4797  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4798  unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4799  if (PR)
4800  Reg = PR;
4801  }
4802  if (Reg) {
4803  Op = MachineOperand::CreateReg(Reg, false);
4804  IsIndirect = IsDbgDeclare;
4805  }
4806  }
4807 
4808  if (!Op) {
4809  // Check if ValueMap has reg number.
4810  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4811  if (VMI != FuncInfo.ValueMap.end()) {
4812  const auto &TLI = DAG.getTargetLoweringInfo();
4813  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4814  V->getType(), isABIRegCopy(V));
4815  unsigned NumRegs =
4816  std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
4817  if (NumRegs > 1) {
4818  unsigned I = 0;
4819  unsigned Offset = 0;
4820  auto RegisterVT = RFV.RegVTs.begin();
4821  for (auto RegCount : RFV.RegCount) {
4822  unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
4823  for (unsigned E = I + RegCount; I != E; ++I) {
4824  // The vregs are guaranteed to be allocated in sequence.
4825  Op = MachineOperand::CreateReg(VMI->second + I, false);
4826  auto *FragmentExpr = DIExpression::createFragmentExpression(
4827  Expr, Offset, RegisterSize);
4828  FuncInfo.ArgDbgValues.push_back(
4829  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4830  Op->getReg(), Variable, FragmentExpr));
4831  Offset += RegisterSize;
4832  }
4833  }
4834  return true;
4835  }
4836  Op = MachineOperand::CreateReg(VMI->second, false);
4837  IsIndirect = IsDbgDeclare;
4838  }
4839  }
4840 
4841  if (!Op && N.getNode())
4842  // Check if frame index is available.
4843  if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4844  if (FrameIndexSDNode *FINode =
4845  dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4846  Op = MachineOperand::CreateFI(FINode->getIndex());
4847 
4848  if (!Op)
4849  return false;
4850 
4851  assert(Variable->isValidLocationForIntrinsic(DL) &&
4852  "Expected inlined-at fields to agree");
4853  if (Op->isReg())
4854  FuncInfo.ArgDbgValues.push_back(
4855  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4856  Op->getReg(), Variable, Expr));
4857  else
4858  FuncInfo.ArgDbgValues.push_back(
4859  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
4860  .add(*Op)
4861  .addImm(0)
4862  .addMetadata(Variable)
4863  .addMetadata(Expr));
4864 
4865  return true;
4866 }
4867 
4868 /// Return the appropriate SDDbgValue based on N.
4869 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4870  DILocalVariable *Variable,
4871  DIExpression *Expr,
4872  const DebugLoc &dl,
4873  unsigned DbgSDNodeOrder) {
4874  if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
4875  // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
4876  // stack slot locations as such instead of as indirectly addressed
4877  // locations.
4878  return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
4879  DbgSDNodeOrder);
4880  }
4881  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
4882  DbgSDNodeOrder);
4883 }
4884 
4885 // VisualStudio defines setjmp as _setjmp
4886 #if defined(_MSC_VER) && defined(setjmp) && \
4887  !defined(setjmp_undefined_for_msvc)
4888 # pragma push_macro("setjmp")
4889 # undef setjmp
4890 # define setjmp_undefined_for_msvc
4891 #endif
4892 
4893 /// Lower the call to the specified intrinsic function. If we want to emit this
4894 /// as a call to a named external function, return the name. Otherwise, lower it
4895 /// and return null.
4896 const char *
4897 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
4898  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4899  SDLoc sdl = getCurSDLoc();
4900  DebugLoc dl = getCurDebugLoc();
4901  SDValue Res;
4902 
4903  switch (Intrinsic) {
4904  default:
4905  // By default, turn this into a target intrinsic node.
4906  visitTargetIntrinsic(I, Intrinsic);
4907  return nullptr;
4908  case Intrinsic::vastart: visitVAStart(I); return nullptr;
4909  case Intrinsic::vaend: visitVAEnd(I); return nullptr;
4910  case Intrinsic::vacopy: visitVACopy(I); return nullptr;
4911  case Intrinsic::returnaddress:
4912  setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
4913  TLI.getPointerTy(DAG.getDataLayout()),
4914  getValue(I.getArgOperand(0))));
4915  return nullptr;
4916  case Intrinsic::addressofreturnaddress:
4917  setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
4918  TLI.getPointerTy(DAG.getDataLayout())));
4919  return nullptr;
4920  case Intrinsic::frameaddress:
4921  setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
4922  TLI.getPointerTy(DAG.getDataLayout()),
4923  getValue(I.getArgOperand(0))));
4924  return nullptr;
4925  case Intrinsic::read_register: {
4926  Value *Reg = I.getArgOperand(0);
4927  SDValue Chain = getRoot();
4928  SDValue RegName =
4929  DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4930  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4931  Res = DAG.getNode(ISD::READ_REGISTER, sdl,
4932  DAG.getVTList(VT, MVT::Other), Chain, RegName);
4933  setValue(&I, Res);
4934  DAG.setRoot(Res.getValue(1));
4935  return nullptr;
4936  }
4937  case Intrinsic::write_register: {
4938  Value *Reg = I.getArgOperand(0);
4939  Value *RegValue = I.getArgOperand(1);
4940  SDValue Chain = getRoot();
4941  SDValue RegName =
4942  DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4943  DAG.setRoot(DAG.getNode(ISD::WRITE_REGISTER, sdl, MVT::Other, Chain,
4944  RegName, getValue(RegValue)));
4945  return nullptr;
4946  }
4947  case Intrinsic::setjmp:
4948  return &"_setjmp"[!TLI.usesUnderscoreSetJmp()];
4949  case Intrinsic::longjmp:
4950  return &"_longjmp"[!TLI.usesUnderscoreLongJmp()];
4951  case Intrinsic::memcpy: {
4952  SDValue Op1 = getValue(I.getArgOperand(0));
4953  SDValue Op2 = getValue(I.getArgOperand(1));
4954  SDValue Op3 = getValue(I.getArgOperand(2));
4955  unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4956  if (!Align)
4957  Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment.
4958  bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4959  bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
4960  SDValue MC = DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
4961  false, isTC,
4964  updateDAGForMaybeTailCall(MC);
4965  return nullptr;
4966  }
4967  case Intrinsic::memset: {
4968  SDValue Op1 = getValue(I.getArgOperand(0));
4969  SDValue Op2 = getValue(I.getArgOperand(1));
4970  SDValue Op3 = getValue(I.getArgOperand(2));
4971  unsigned Align = cast<ConstantInt>(I.getArgOperand(3))->getZExtValue();
4972  if (!Align)
4973  Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment.
4974  bool isVol = cast<ConstantInt>(I.getArgOperand(4))->getZExtValue();
4975  bool isTC = I.isTailCall() && isInTailCallPosition(&I, DAG.getTarget());
4976  SDValue MS = DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol,
4977  isTC, MachinePointerInfo(I.getArgOperand(0)));
4978  updateDAGForMaybeTailCall(MS);
4979  return nullptr;
4980  }
4981  case Intrinsic::memmove: {
4982  SDValue Op1 = getValue(I.getArgOperand(0));
4983  SDValue Op2 = getValue(I.getArgOperand(1));