LLVM  7.0.0svn
SelectionDAGBuilder.cpp
Go to the documentation of this file.
1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SelectionDAGBuilder.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/None.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/Triple.h"
28 #include "llvm/ADT/Twine.h"
33 #include "llvm/Analysis/Loads.h"
38 #include "llvm/CodeGen/Analysis.h"
57 #include "llvm/CodeGen/StackMaps.h"
66 #include "llvm/IR/Argument.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/BasicBlock.h"
69 #include "llvm/IR/CFG.h"
70 #include "llvm/IR/CallSite.h"
71 #include "llvm/IR/CallingConv.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/ConstantRange.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DataLayout.h"
77 #include "llvm/IR/DebugLoc.h"
78 #include "llvm/IR/DerivedTypes.h"
79 #include "llvm/IR/Function.h"
81 #include "llvm/IR/InlineAsm.h"
82 #include "llvm/IR/InstrTypes.h"
83 #include "llvm/IR/Instruction.h"
84 #include "llvm/IR/Instructions.h"
85 #include "llvm/IR/IntrinsicInst.h"
86 #include "llvm/IR/Intrinsics.h"
87 #include "llvm/IR/LLVMContext.h"
88 #include "llvm/IR/Metadata.h"
89 #include "llvm/IR/Module.h"
90 #include "llvm/IR/Operator.h"
91 #include "llvm/IR/Statepoint.h"
92 #include "llvm/IR/Type.h"
93 #include "llvm/IR/User.h"
94 #include "llvm/IR/Value.h"
95 #include "llvm/MC/MCContext.h"
96 #include "llvm/MC/MCSymbol.h"
99 #include "llvm/Support/Casting.h"
100 #include "llvm/Support/CodeGen.h"
102 #include "llvm/Support/Compiler.h"
103 #include "llvm/Support/Debug.h"
105 #include "llvm/Support/MathExtras.h"
110 #include <algorithm>
111 #include <cassert>
112 #include <cstddef>
113 #include <cstdint>
114 #include <cstring>
115 #include <iterator>
116 #include <limits>
117 #include <numeric>
118 #include <tuple>
119 #include <utility>
120 #include <vector>
121 
122 using namespace llvm;
123 
124 #define DEBUG_TYPE "isel"
125 
126 /// LimitFloatPrecision - Generate low-precision inline sequences for
127 /// some float libcalls (6, 8 or 12 bits).
128 static unsigned LimitFloatPrecision;
129 
131  LimitFPPrecision("limit-float-precision",
132  cl::desc("Generate low-precision inline sequences "
133  "for some float libcalls"),
134  cl::location(LimitFloatPrecision), cl::Hidden,
135  cl::init(0));
136 
138  "switch-peel-threshold", cl::Hidden, cl::init(66),
139  cl::desc("Set the case probability threshold for peeling the case from a "
140  "switch statement. A value greater than 100 will void this "
141  "optimization"));
142 
143 // Limit the width of DAG chains. This is important in general to prevent
144 // DAG-based analysis from blowing up. For example, alias analysis and
145 // load clustering may not complete in reasonable time. It is difficult to
146 // recognize and avoid this situation within each individual analysis, and
147 // future analyses are likely to have the same behavior. Limiting DAG width is
148 // the safe approach and will be especially important with global DAGs.
149 //
150 // MaxParallelChains default is arbitrarily high to avoid affecting
151 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
152 // sequence over this should have been converted to llvm.memcpy by the
153 // frontend. It is easy to induce this behavior with .ll code such as:
154 // %buffer = alloca [4096 x i8]
155 // %data = load [4096 x i8]* %argPtr
156 // store [4096 x i8] %data, [4096 x i8]* %buffer
157 static const unsigned MaxParallelChains = 64;
158 
159 // True if the Value passed requires ABI mangling as it is a parameter to a
160 // function or a return value from a function which is not an intrinsic.
161 static bool isABIRegCopy(const Value *V) {
162  const bool IsRetInst = V && isa<ReturnInst>(V);
163  const bool IsCallInst = V && isa<CallInst>(V);
164  const bool IsInLineAsm =
165  IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
166  const bool IsIndirectFunctionCall =
167  IsCallInst && !IsInLineAsm &&
168  !static_cast<const CallInst *>(V)->getCalledFunction();
169  // It is possible that the call instruction is an inline asm statement or an
170  // indirect function call in which case the return value of
171  // getCalledFunction() would be nullptr.
172  const bool IsInstrinsicCall =
173  IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
174  static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
176 
177  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
178 }
179 
180 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
181  const SDValue *Parts, unsigned NumParts,
182  MVT PartVT, EVT ValueVT, const Value *V,
183  bool IsABIRegCopy);
184 
185 /// getCopyFromParts - Create a value that contains the specified legal parts
186 /// combined into the value they represent. If the parts combine to a type
187 /// larger than ValueVT then AssertOp can be used to specify whether the extra
188 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
189 /// (ISD::AssertSext).
191  const SDValue *Parts, unsigned NumParts,
192  MVT PartVT, EVT ValueVT, const Value *V,
193  Optional<ISD::NodeType> AssertOp = None,
194  bool IsABIRegCopy = false) {
195  if (ValueVT.isVector())
196  return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
197  PartVT, ValueVT, V, IsABIRegCopy);
198 
199  assert(NumParts > 0 && "No parts to assemble!");
200  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
201  SDValue Val = Parts[0];
202 
203  if (NumParts > 1) {
204  // Assemble the value from multiple parts.
205  if (ValueVT.isInteger()) {
206  unsigned PartBits = PartVT.getSizeInBits();
207  unsigned ValueBits = ValueVT.getSizeInBits();
208 
209  // Assemble the power of 2 part.
210  unsigned RoundParts = NumParts & (NumParts - 1) ?
211  1 << Log2_32(NumParts) : NumParts;
212  unsigned RoundBits = PartBits * RoundParts;
213  EVT RoundVT = RoundBits == ValueBits ?
214  ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
215  SDValue Lo, Hi;
216 
217  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
218 
219  if (RoundParts > 2) {
220  Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
221  PartVT, HalfVT, V);
222  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
223  RoundParts / 2, PartVT, HalfVT, V);
224  } else {
225  Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
226  Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
227  }
228 
229  if (DAG.getDataLayout().isBigEndian())
230  std::swap(Lo, Hi);
231 
232  Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
233 
234  if (RoundParts < NumParts) {
235  // Assemble the trailing non-power-of-2 part.
236  unsigned OddParts = NumParts - RoundParts;
237  EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
238  Hi = getCopyFromParts(DAG, DL,
239  Parts + RoundParts, OddParts, PartVT, OddVT, V);
240 
241  // Combine the round and odd parts.
242  Lo = Val;
243  if (DAG.getDataLayout().isBigEndian())
244  std::swap(Lo, Hi);
245  EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
246  Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
247  Hi =
248  DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
249  DAG.getConstant(Lo.getValueSizeInBits(), DL,
250  TLI.getPointerTy(DAG.getDataLayout())));
251  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
252  Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
253  }
254  } else if (PartVT.isFloatingPoint()) {
255  // FP split into multiple FP parts (for ppcf128)
256  assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
257  "Unexpected split");
258  SDValue Lo, Hi;
259  Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
260  Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
261  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
262  std::swap(Lo, Hi);
263  Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
264  } else {
265  // FP split into integer parts (soft fp)
266  assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
267  !PartVT.isVector() && "Unexpected split");
268  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
269  Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
270  }
271  }
272 
273  // There is now one part, held in Val. Correct it to match ValueVT.
274  // PartEVT is the type of the register class that holds the value.
275  // ValueVT is the type of the inline asm operation.
276  EVT PartEVT = Val.getValueType();
277 
278  if (PartEVT == ValueVT)
279  return Val;
280 
281  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
282  ValueVT.bitsLT(PartEVT)) {
283  // For an FP value in an integer part, we need to truncate to the right
284  // width first.
285  PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
286  Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
287  }
288 
289  // Handle types that have the same size.
290  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
291  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
292 
293  // Handle types with different sizes.
294  if (PartEVT.isInteger() && ValueVT.isInteger()) {
295  if (ValueVT.bitsLT(PartEVT)) {
296  // For a truncate, see if we have any information to
297  // indicate whether the truncated bits will always be
298  // zero or sign-extension.
299  if (AssertOp.hasValue())
300  Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
301  DAG.getValueType(ValueVT));
302  return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
303  }
304  return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
305  }
306 
307  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
308  // FP_ROUND's are always exact here.
309  if (ValueVT.bitsLT(Val.getValueType()))
310  return DAG.getNode(
311  ISD::FP_ROUND, DL, ValueVT, Val,
312  DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
313 
314  return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
315  }
316 
317  llvm_unreachable("Unknown mismatch!");
318 }
319 
321  const Twine &ErrMsg) {
322  const Instruction *I = dyn_cast_or_null<Instruction>(V);
323  if (!V)
324  return Ctx.emitError(ErrMsg);
325 
326  const char *AsmError = ", possible invalid constraint for vector type";
327  if (const CallInst *CI = dyn_cast<CallInst>(I))
328  if (isa<InlineAsm>(CI->getCalledValue()))
329  return Ctx.emitError(I, ErrMsg + AsmError);
330 
331  return Ctx.emitError(I, ErrMsg);
332 }
333 
334 /// getCopyFromPartsVector - Create a value that contains the specified legal
335 /// parts combined into the value they represent. If the parts combine to a
336 /// type larger than ValueVT then AssertOp can be used to specify whether the
337 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
338 /// ValueVT (ISD::AssertSext).
340  const SDValue *Parts, unsigned NumParts,
341  MVT PartVT, EVT ValueVT, const Value *V,
342  bool IsABIRegCopy) {
343  assert(ValueVT.isVector() && "Not a vector value");
344  assert(NumParts > 0 && "No parts to assemble!");
345  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
346  SDValue Val = Parts[0];
347 
348  // Handle a multi-element vector.
349  if (NumParts > 1) {
350  EVT IntermediateVT;
351  MVT RegisterVT;
352  unsigned NumIntermediates;
353  unsigned NumRegs;
354 
355  if (IsABIRegCopy) {
357  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
358  RegisterVT);
359  } else {
360  NumRegs =
361  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
362  NumIntermediates, RegisterVT);
363  }
364 
365  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
366  NumParts = NumRegs; // Silence a compiler warning.
367  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
368  assert(RegisterVT.getSizeInBits() ==
369  Parts[0].getSimpleValueType().getSizeInBits() &&
370  "Part type sizes don't match!");
371 
372  // Assemble the parts into intermediate operands.
373  SmallVector<SDValue, 8> Ops(NumIntermediates);
374  if (NumIntermediates == NumParts) {
375  // If the register was not expanded, truncate or copy the value,
376  // as appropriate.
377  for (unsigned i = 0; i != NumParts; ++i)
378  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
379  PartVT, IntermediateVT, V);
380  } else if (NumParts > 0) {
381  // If the intermediate type was expanded, build the intermediate
382  // operands from the parts.
383  assert(NumParts % NumIntermediates == 0 &&
384  "Must expand into a divisible number of parts!");
385  unsigned Factor = NumParts / NumIntermediates;
386  for (unsigned i = 0; i != NumIntermediates; ++i)
387  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
388  PartVT, IntermediateVT, V);
389  }
390 
391  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
392  // intermediate operands.
393  EVT BuiltVectorTy =
394  EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
395  (IntermediateVT.isVector()
396  ? IntermediateVT.getVectorNumElements() * NumParts
397  : NumIntermediates));
398  Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
400  DL, BuiltVectorTy, Ops);
401  }
402 
403  // There is now one part, held in Val. Correct it to match ValueVT.
404  EVT PartEVT = Val.getValueType();
405 
406  if (PartEVT == ValueVT)
407  return Val;
408 
409  if (PartEVT.isVector()) {
410  // If the element type of the source/dest vectors are the same, but the
411  // parts vector has more elements than the value vector, then we have a
412  // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
413  // elements we want.
414  if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
415  assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
416  "Cannot narrow, it would be a lossy transformation");
417  return DAG.getNode(
418  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
419  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
420  }
421 
422  // Vector/Vector bitcast.
423  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
424  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
425 
426  assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
427  "Cannot handle this kind of promotion");
428  // Promoted vector extract
429  return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
430 
431  }
432 
433  // Trivial bitcast if the types are the same size and the destination
434  // vector type is legal.
435  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
436  TLI.isTypeLegal(ValueVT))
437  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
438 
439  if (ValueVT.getVectorNumElements() != 1) {
440  // Certain ABIs require that vectors are passed as integers. For vectors
441  // are the same size, this is an obvious bitcast.
442  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
443  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
444  } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
445  // Bitcast Val back the original type and extract the corresponding
446  // vector we want.
447  unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
448  EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
449  ValueVT.getVectorElementType(), Elts);
450  Val = DAG.getBitcast(WiderVecType, Val);
451  return DAG.getNode(
452  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
453  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
454  }
455 
457  *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
458  return DAG.getUNDEF(ValueVT);
459  }
460 
461  // Handle cases such as i8 -> <1 x i1>
462  EVT ValueSVT = ValueVT.getVectorElementType();
463  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
464  Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
465  : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
466 
467  return DAG.getBuildVector(ValueVT, DL, Val);
468 }
469 
470 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
471  SDValue Val, SDValue *Parts, unsigned NumParts,
472  MVT PartVT, const Value *V, bool IsABIRegCopy);
473 
474 /// getCopyToParts - Create a series of nodes that contain the specified value
475 /// split into legal parts. If the parts contain more bits than Val, then, for
476 /// integers, ExtendKind can be used to specify how to generate the extra bits.
477 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
478  SDValue *Parts, unsigned NumParts, MVT PartVT,
479  const Value *V,
480  ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
481  bool IsABIRegCopy = false) {
482  EVT ValueVT = Val.getValueType();
483 
484  // Handle the vector case separately.
485  if (ValueVT.isVector())
486  return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
487  IsABIRegCopy);
488 
489  unsigned PartBits = PartVT.getSizeInBits();
490  unsigned OrigNumParts = NumParts;
491  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
492  "Copying to an illegal type!");
493 
494  if (NumParts == 0)
495  return;
496 
497  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
498  EVT PartEVT = PartVT;
499  if (PartEVT == ValueVT) {
500  assert(NumParts == 1 && "No-op copy with multiple parts!");
501  Parts[0] = Val;
502  return;
503  }
504 
505  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
506  // If the parts cover more bits than the value has, promote the value.
507  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
508  assert(NumParts == 1 && "Do not know what to promote to!");
509  Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
510  } else {
511  if (ValueVT.isFloatingPoint()) {
512  // FP values need to be bitcast, then extended if they are being put
513  // into a larger container.
514  ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
515  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
516  }
517  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
518  ValueVT.isInteger() &&
519  "Unknown mismatch!");
520  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
521  Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
522  if (PartVT == MVT::x86mmx)
523  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
524  }
525  } else if (PartBits == ValueVT.getSizeInBits()) {
526  // Different types of the same size.
527  assert(NumParts == 1 && PartEVT != ValueVT);
528  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
529  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
530  // If the parts cover less bits than value has, truncate the value.
531  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
532  ValueVT.isInteger() &&
533  "Unknown mismatch!");
534  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
535  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
536  if (PartVT == MVT::x86mmx)
537  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
538  }
539 
540  // The value may have changed - recompute ValueVT.
541  ValueVT = Val.getValueType();
542  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
543  "Failed to tile the value with PartVT!");
544 
545  if (NumParts == 1) {
546  if (PartEVT != ValueVT) {
548  "scalar-to-vector conversion failed");
549  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
550  }
551 
552  Parts[0] = Val;
553  return;
554  }
555 
556  // Expand the value into multiple parts.
557  if (NumParts & (NumParts - 1)) {
558  // The number of parts is not a power of 2. Split off and copy the tail.
559  assert(PartVT.isInteger() && ValueVT.isInteger() &&
560  "Do not know what to expand to!");
561  unsigned RoundParts = 1 << Log2_32(NumParts);
562  unsigned RoundBits = RoundParts * PartBits;
563  unsigned OddParts = NumParts - RoundParts;
564  SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
565  DAG.getIntPtrConstant(RoundBits, DL));
566  getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
567 
568  if (DAG.getDataLayout().isBigEndian())
569  // The odd parts were reversed by getCopyToParts - unreverse them.
570  std::reverse(Parts + RoundParts, Parts + NumParts);
571 
572  NumParts = RoundParts;
573  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
574  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
575  }
576 
577  // The number of parts is a power of 2. Repeatedly bisect the value using
578  // EXTRACT_ELEMENT.
579  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
581  ValueVT.getSizeInBits()),
582  Val);
583 
584  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
585  for (unsigned i = 0; i < NumParts; i += StepSize) {
586  unsigned ThisBits = StepSize * PartBits / 2;
587  EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
588  SDValue &Part0 = Parts[i];
589  SDValue &Part1 = Parts[i+StepSize/2];
590 
591  Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
592  ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
593  Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
594  ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
595 
596  if (ThisBits == PartBits && ThisVT != PartVT) {
597  Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
598  Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
599  }
600  }
601  }
602 
603  if (DAG.getDataLayout().isBigEndian())
604  std::reverse(Parts, Parts + OrigNumParts);
605 }
606 
607 
608 /// getCopyToPartsVector - Create a series of nodes that contain the specified
609 /// value split into legal parts.
610 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
611  SDValue Val, SDValue *Parts, unsigned NumParts,
612  MVT PartVT, const Value *V,
613  bool IsABIRegCopy) {
614  EVT ValueVT = Val.getValueType();
615  assert(ValueVT.isVector() && "Not a vector");
616  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
617 
618  if (NumParts == 1) {
619  EVT PartEVT = PartVT;
620  if (PartEVT == ValueVT) {
621  // Nothing to do.
622  } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
623  // Bitconvert vector->vector case.
624  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
625  } else if (PartVT.isVector() &&
626  PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
627  PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
628  EVT ElementVT = PartVT.getVectorElementType();
629  // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
630  // undef elements.
632  for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
633  Ops.push_back(DAG.getNode(
634  ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
635  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
636 
637  for (unsigned i = ValueVT.getVectorNumElements(),
638  e = PartVT.getVectorNumElements(); i != e; ++i)
639  Ops.push_back(DAG.getUNDEF(ElementVT));
640 
641  Val = DAG.getBuildVector(PartVT, DL, Ops);
642 
643  // FIXME: Use CONCAT for 2x -> 4x.
644 
645  //SDValue UndefElts = DAG.getUNDEF(VectorTy);
646  //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
647  } else if (PartVT.isVector() &&
648  PartEVT.getVectorElementType().bitsGE(
649  ValueVT.getVectorElementType()) &&
650  PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
651 
652  // Promoted vector extract
653  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
654  } else {
655  if (ValueVT.getVectorNumElements() == 1) {
656  Val = DAG.getNode(
657  ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
658  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
659  } else {
660  assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
661  "lossy conversion of vector to scalar type");
662  EVT IntermediateType =
663  EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
664  Val = DAG.getBitcast(IntermediateType, Val);
665  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
666  }
667  }
668 
669  assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
670  Parts[0] = Val;
671  return;
672  }
673 
674  // Handle a multi-element vector.
675  EVT IntermediateVT;
676  MVT RegisterVT;
677  unsigned NumIntermediates;
678  unsigned NumRegs;
679  if (IsABIRegCopy) {
680  NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
681  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
682  RegisterVT);
683  } else {
684  NumRegs =
685  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
686  NumIntermediates, RegisterVT);
687  }
688  unsigned NumElements = ValueVT.getVectorNumElements();
689 
690  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
691  NumParts = NumRegs; // Silence a compiler warning.
692  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
693 
694  // Convert the vector to the appropiate type if necessary.
695  unsigned DestVectorNoElts =
696  NumIntermediates *
697  (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
698  EVT BuiltVectorTy = EVT::getVectorVT(
699  *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
700  if (Val.getValueType() != BuiltVectorTy)
701  Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
702 
703  // Split the vector into intermediate operands.
704  SmallVector<SDValue, 8> Ops(NumIntermediates);
705  for (unsigned i = 0; i != NumIntermediates; ++i) {
706  if (IntermediateVT.isVector())
707  Ops[i] =
708  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
709  DAG.getConstant(i * (NumElements / NumIntermediates), DL,
710  TLI.getVectorIdxTy(DAG.getDataLayout())));
711  else
712  Ops[i] = DAG.getNode(
713  ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
714  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
715  }
716 
717  // Split the intermediate operands into legal parts.
718  if (NumParts == NumIntermediates) {
719  // If the register was not expanded, promote or copy the value,
720  // as appropriate.
721  for (unsigned i = 0; i != NumParts; ++i)
722  getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
723  } else if (NumParts > 0) {
724  // If the intermediate type was expanded, split each the value into
725  // legal parts.
726  assert(NumIntermediates != 0 && "division by zero");
727  assert(NumParts % NumIntermediates == 0 &&
728  "Must expand into a divisible number of parts!");
729  unsigned Factor = NumParts / NumIntermediates;
730  for (unsigned i = 0; i != NumIntermediates; ++i)
731  getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
732  }
733 }
734 
736  EVT valuevt, bool IsABIMangledValue)
737  : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
738  RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
739 
741  const DataLayout &DL, unsigned Reg, Type *Ty,
742  bool IsABIMangledValue) {
743  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
744 
745  IsABIMangled = IsABIMangledValue;
746 
747  for (EVT ValueVT : ValueVTs) {
748  unsigned NumRegs = IsABIMangledValue
749  ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
750  : TLI.getNumRegisters(Context, ValueVT);
751  MVT RegisterVT = IsABIMangledValue
752  ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
753  : TLI.getRegisterType(Context, ValueVT);
754  for (unsigned i = 0; i != NumRegs; ++i)
755  Regs.push_back(Reg + i);
756  RegVTs.push_back(RegisterVT);
757  RegCount.push_back(NumRegs);
758  Reg += NumRegs;
759  }
760 }
761 
763  FunctionLoweringInfo &FuncInfo,
764  const SDLoc &dl, SDValue &Chain,
765  SDValue *Flag, const Value *V) const {
766  // A Value with type {} or [0 x %t] needs no registers.
767  if (ValueVTs.empty())
768  return SDValue();
769 
770  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
771 
772  // Assemble the legal parts into the final values.
773  SmallVector<SDValue, 4> Values(ValueVTs.size());
775  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
776  // Copy the legal parts from the registers.
777  EVT ValueVT = ValueVTs[Value];
778  unsigned NumRegs = RegCount[Value];
779  MVT RegisterVT = IsABIMangled
781  : RegVTs[Value];
782 
783  Parts.resize(NumRegs);
784  for (unsigned i = 0; i != NumRegs; ++i) {
785  SDValue P;
786  if (!Flag) {
787  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
788  } else {
789  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
790  *Flag = P.getValue(2);
791  }
792 
793  Chain = P.getValue(1);
794  Parts[i] = P;
795 
796  // If the source register was virtual and if we know something about it,
797  // add an assert node.
799  !RegisterVT.isInteger() || RegisterVT.isVector())
800  continue;
801 
803  FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
804  if (!LOI)
805  continue;
806 
807  unsigned RegSize = RegisterVT.getSizeInBits();
808  unsigned NumSignBits = LOI->NumSignBits;
809  unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
810 
811  if (NumZeroBits == RegSize) {
812  // The current value is a zero.
813  // Explicitly express that as it would be easier for
814  // optimizations to kick in.
815  Parts[i] = DAG.getConstant(0, dl, RegisterVT);
816  continue;
817  }
818 
819  // FIXME: We capture more information than the dag can represent. For
820  // now, just use the tightest assertzext/assertsext possible.
821  bool isSExt = true;
822  EVT FromVT(MVT::Other);
823  if (NumSignBits == RegSize) {
824  isSExt = true; // ASSERT SEXT 1
825  FromVT = MVT::i1;
826  } else if (NumZeroBits >= RegSize - 1) {
827  isSExt = false; // ASSERT ZEXT 1
828  FromVT = MVT::i1;
829  } else if (NumSignBits > RegSize - 8) {
830  isSExt = true; // ASSERT SEXT 8
831  FromVT = MVT::i8;
832  } else if (NumZeroBits >= RegSize - 8) {
833  isSExt = false; // ASSERT ZEXT 8
834  FromVT = MVT::i8;
835  } else if (NumSignBits > RegSize - 16) {
836  isSExt = true; // ASSERT SEXT 16
837  FromVT = MVT::i16;
838  } else if (NumZeroBits >= RegSize - 16) {
839  isSExt = false; // ASSERT ZEXT 16
840  FromVT = MVT::i16;
841  } else if (NumSignBits > RegSize - 32) {
842  isSExt = true; // ASSERT SEXT 32
843  FromVT = MVT::i32;
844  } else if (NumZeroBits >= RegSize - 32) {
845  isSExt = false; // ASSERT ZEXT 32
846  FromVT = MVT::i32;
847  } else {
848  continue;
849  }
850  // Add an assertion node.
851  assert(FromVT != MVT::Other);
852  Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
853  RegisterVT, P, DAG.getValueType(FromVT));
854  }
855 
856  Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
857  NumRegs, RegisterVT, ValueVT, V);
858  Part += NumRegs;
859  Parts.clear();
860  }
861 
862  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
863 }
864 
866  const SDLoc &dl, SDValue &Chain, SDValue *Flag,
867  const Value *V,
868  ISD::NodeType PreferredExtendType) const {
869  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
870  ISD::NodeType ExtendKind = PreferredExtendType;
871 
872  // Get the list of the values's legal parts.
873  unsigned NumRegs = Regs.size();
874  SmallVector<SDValue, 8> Parts(NumRegs);
875  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
876  unsigned NumParts = RegCount[Value];
877 
878  MVT RegisterVT = IsABIMangled
880  : RegVTs[Value];
881 
882  if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
883  ExtendKind = ISD::ZERO_EXTEND;
884 
885  getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
886  &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
887  Part += NumParts;
888  }
889 
890  // Copy the parts into the registers.
891  SmallVector<SDValue, 8> Chains(NumRegs);
892  for (unsigned i = 0; i != NumRegs; ++i) {
893  SDValue Part;
894  if (!Flag) {
895  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
896  } else {
897  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
898  *Flag = Part.getValue(1);
899  }
900 
901  Chains[i] = Part.getValue(0);
902  }
903 
904  if (NumRegs == 1 || Flag)
905  // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
906  // flagged to it. That is the CopyToReg nodes and the user are considered
907  // a single scheduling unit. If we create a TokenFactor and return it as
908  // chain, then the TokenFactor is both a predecessor (operand) of the
909  // user as well as a successor (the TF operands are flagged to the user).
910  // c1, f1 = CopyToReg
911  // c2, f2 = CopyToReg
912  // c3 = TokenFactor c1, c2
913  // ...
914  // = op c3, ..., f2
915  Chain = Chains[NumRegs-1];
916  else
917  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
918 }
919 
920 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
921  unsigned MatchingIdx, const SDLoc &dl,
922  SelectionDAG &DAG,
923  std::vector<SDValue> &Ops) const {
924  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
925 
926  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
927  if (HasMatching)
928  Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
929  else if (!Regs.empty() &&
931  // Put the register class of the virtual registers in the flag word. That
932  // way, later passes can recompute register class constraints for inline
933  // assembly as well as normal instructions.
934  // Don't do this for tied operands that can use the regclass information
935  // from the def.
937  const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
938  Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
939  }
940 
941  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
942  Ops.push_back(Res);
943 
944  if (Code == InlineAsm::Kind_Clobber) {
945  // Clobbers should always have a 1:1 mapping with registers, and may
946  // reference registers that have illegal (e.g. vector) types. Hence, we
947  // shouldn't try to apply any sort of splitting logic to them.
948  assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
949  "No 1:1 mapping from clobbers to regs?");
950  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
951  (void)SP;
952  for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
953  Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
954  assert(
955  (Regs[I] != SP ||
957  "If we clobbered the stack pointer, MFI should know about it.");
958  }
959  return;
960  }
961 
962  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
963  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
964  MVT RegisterVT = RegVTs[Value];
965  for (unsigned i = 0; i != NumRegs; ++i) {
966  assert(Reg < Regs.size() && "Mismatch in # registers expected");
967  unsigned TheReg = Regs[Reg++];
968  Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
969  }
970  }
971 }
972 
974  const TargetLibraryInfo *li) {
975  AA = aa;
976  GFI = gfi;
977  LibInfo = li;
978  DL = &DAG.getDataLayout();
979  Context = DAG.getContext();
980  LPadToCallSiteMap.clear();
981 }
982 
984  NodeMap.clear();
985  UnusedArgNodeMap.clear();
986  PendingLoads.clear();
987  PendingExports.clear();
988  CurInst = nullptr;
989  HasTailCall = false;
990  SDNodeOrder = LowestSDNodeOrder;
991  StatepointLowering.clear();
992 }
993 
995  DanglingDebugInfoMap.clear();
996 }
997 
999  if (PendingLoads.empty())
1000  return DAG.getRoot();
1001 
1002  if (PendingLoads.size() == 1) {
1003  SDValue Root = PendingLoads[0];
1004  DAG.setRoot(Root);
1005  PendingLoads.clear();
1006  return Root;
1007  }
1008 
1009  // Otherwise, we have to make a token factor node.
1010  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1011  PendingLoads);
1012  PendingLoads.clear();
1013  DAG.setRoot(Root);
1014  return Root;
1015 }
1016 
1018  SDValue Root = DAG.getRoot();
1019 
1020  if (PendingExports.empty())
1021  return Root;
1022 
1023  // Turn all of the CopyToReg chains into one factored node.
1024  if (Root.getOpcode() != ISD::EntryToken) {
1025  unsigned i = 0, e = PendingExports.size();
1026  for (; i != e; ++i) {
1027  assert(PendingExports[i].getNode()->getNumOperands() > 1);
1028  if (PendingExports[i].getNode()->getOperand(0) == Root)
1029  break; // Don't add the root if we already indirectly depend on it.
1030  }
1031 
1032  if (i == e)
1033  PendingExports.push_back(Root);
1034  }
1035 
1036  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1037  PendingExports);
1038  PendingExports.clear();
1039  DAG.setRoot(Root);
1040  return Root;
1041 }
1042 
1044  // Set up outgoing PHI node register values before emitting the terminator.
1045  if (isa<TerminatorInst>(&I)) {
1046  HandlePHINodesInSuccessorBlocks(I.getParent());
1047  }
1048 
1049  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1050  if (!isa<DbgInfoIntrinsic>(I))
1051  ++SDNodeOrder;
1052 
1053  CurInst = &I;
1054 
1055  visit(I.getOpcode(), I);
1056 
1057  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
1058  !isStatepoint(&I)) // statepoints handle their exports internally
1059  CopyToExportRegsIfNeeded(&I);
1060 
1061  CurInst = nullptr;
1062 }
1063 
1064 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1065  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1066 }
1067 
1068 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1069  // Note: this doesn't use InstVisitor, because it has to work with
1070  // ConstantExpr's in addition to instructions.
1071  switch (Opcode) {
1072  default: llvm_unreachable("Unknown instruction type encountered!");
1073  // Build the switch statement using the Instruction.def file.
1074 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1075  case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1076 #include "llvm/IR/Instruction.def"
1077  }
1078 }
1079 
1080 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1081 // generate the debug data structures now that we've seen its definition.
1083  SDValue Val) {
1084  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
1085  if (DDI.getDI()) {
1086  const DbgValueInst *DI = DDI.getDI();
1087  DebugLoc dl = DDI.getdl();
1088  unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1089  DILocalVariable *Variable = DI->getVariable();
1090  DIExpression *Expr = DI->getExpression();
1091  assert(Variable->isValidLocationForIntrinsic(dl) &&
1092  "Expected inlined-at fields to agree");
1093  SDDbgValue *SDV;
1094  if (Val.getNode()) {
1095  if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1096  SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
1097  DAG.AddDbgValue(SDV, Val.getNode(), false);
1098  }
1099  } else
1100  DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1101  DanglingDebugInfoMap[V] = DanglingDebugInfo();
1102  }
1103 }
1104 
1105 /// getCopyFromRegs - If there was virtual register allocated for the value V
1106 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1108  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1109  SDValue Result;
1110 
1111  if (It != FuncInfo.ValueMap.end()) {
1112  unsigned InReg = It->second;
1113 
1114  RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1115  DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
1116  SDValue Chain = DAG.getEntryNode();
1117  Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1118  V);
1119  resolveDanglingDebugInfo(V, Result);
1120  }
1121 
1122  return Result;
1123 }
1124 
1125 /// getValue - Return an SDValue for the given Value.
1127  // If we already have an SDValue for this value, use it. It's important
1128  // to do this first, so that we don't create a CopyFromReg if we already
1129  // have a regular SDValue.
1130  SDValue &N = NodeMap[V];
1131  if (N.getNode()) return N;
1132 
1133  // If there's a virtual register allocated and initialized for this
1134  // value, use it.
1135  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1136  return copyFromReg;
1137 
1138  // Otherwise create a new SDValue and remember it.
1139  SDValue Val = getValueImpl(V);
1140  NodeMap[V] = Val;
1141  resolveDanglingDebugInfo(V, Val);
1142  return Val;
1143 }
1144 
1145 // Return true if SDValue exists for the given Value
1147  return (NodeMap.find(V) != NodeMap.end()) ||
1148  (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1149 }
1150 
1151 /// getNonRegisterValue - Return an SDValue for the given Value, but
1152 /// don't look in FuncInfo.ValueMap for a virtual register.
1154  // If we already have an SDValue for this value, use it.
1155  SDValue &N = NodeMap[V];
1156  if (N.getNode()) {
1157  if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1158  // Remove the debug location from the node as the node is about to be used
1159  // in a location which may differ from the original debug location. This
1160  // is relevant to Constant and ConstantFP nodes because they can appear
1161  // as constant expressions inside PHI nodes.
1162  N->setDebugLoc(DebugLoc());
1163  }
1164  return N;
1165  }
1166 
1167  // Otherwise create a new SDValue and remember it.
1168  SDValue Val = getValueImpl(V);
1169  NodeMap[V] = Val;
1170  resolveDanglingDebugInfo(V, Val);
1171  return Val;
1172 }
1173 
1174 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1175 /// Create an SDValue for the given value.
1177  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1178 
1179  if (const Constant *C = dyn_cast<Constant>(V)) {
1180  EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1181 
1182  if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1183  return DAG.getConstant(*CI, getCurSDLoc(), VT);
1184 
1185  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1186  return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1187 
1188  if (isa<ConstantPointerNull>(C)) {
1189  unsigned AS = V->getType()->getPointerAddressSpace();
1190  return DAG.getConstant(0, getCurSDLoc(),
1191  TLI.getPointerTy(DAG.getDataLayout(), AS));
1192  }
1193 
1194  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1195  return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1196 
1197  if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1198  return DAG.getUNDEF(VT);
1199 
1200  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1201  visit(CE->getOpcode(), *CE);
1202  SDValue N1 = NodeMap[V];
1203  assert(N1.getNode() && "visit didn't populate the NodeMap!");
1204  return N1;
1205  }
1206 
1207  if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1209  for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1210  OI != OE; ++OI) {
1211  SDNode *Val = getValue(*OI).getNode();
1212  // If the operand is an empty aggregate, there are no values.
1213  if (!Val) continue;
1214  // Add each leaf value from the operand to the Constants list
1215  // to form a flattened list of all the values.
1216  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1217  Constants.push_back(SDValue(Val, i));
1218  }
1219 
1220  return DAG.getMergeValues(Constants, getCurSDLoc());
1221  }
1222 
1223  if (const ConstantDataSequential *CDS =
1224  dyn_cast<ConstantDataSequential>(C)) {
1226  for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1227  SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1228  // Add each leaf value from the operand to the Constants list
1229  // to form a flattened list of all the values.
1230  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1231  Ops.push_back(SDValue(Val, i));
1232  }
1233 
1234  if (isa<ArrayType>(CDS->getType()))
1235  return DAG.getMergeValues(Ops, getCurSDLoc());
1236  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1237  }
1238 
1239  if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1240  assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1241  "Unknown struct or array constant!");
1242 
1244  ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1245  unsigned NumElts = ValueVTs.size();
1246  if (NumElts == 0)
1247  return SDValue(); // empty struct
1249  for (unsigned i = 0; i != NumElts; ++i) {
1250  EVT EltVT = ValueVTs[i];
1251  if (isa<UndefValue>(C))
1252  Constants[i] = DAG.getUNDEF(EltVT);
1253  else if (EltVT.isFloatingPoint())
1254  Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1255  else
1256  Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1257  }
1258 
1259  return DAG.getMergeValues(Constants, getCurSDLoc());
1260  }
1261 
1262  if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1263  return DAG.getBlockAddress(BA, VT);
1264 
1265  VectorType *VecTy = cast<VectorType>(V->getType());
1266  unsigned NumElements = VecTy->getNumElements();
1267 
1268  // Now that we know the number and type of the elements, get that number of
1269  // elements into the Ops array based on what kind of constant it is.
1271  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1272  for (unsigned i = 0; i != NumElements; ++i)
1273  Ops.push_back(getValue(CV->getOperand(i)));
1274  } else {
1275  assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1276  EVT EltVT =
1277  TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1278 
1279  SDValue Op;
1280  if (EltVT.isFloatingPoint())
1281  Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1282  else
1283  Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1284  Ops.assign(NumElements, Op);
1285  }
1286 
1287  // Create a BUILD_VECTOR node.
1288  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1289  }
1290 
1291  // If this is a static alloca, generate it as the frameindex instead of
1292  // computation.
1293  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1295  FuncInfo.StaticAllocaMap.find(AI);
1296  if (SI != FuncInfo.StaticAllocaMap.end())
1297  return DAG.getFrameIndex(SI->second,
1298  TLI.getFrameIndexTy(DAG.getDataLayout()));
1299  }
1300 
1301  // If this is an instruction which fast-isel has deferred, select it now.
1302  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1303  unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1304 
1305  RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1306  Inst->getType(), isABIRegCopy(V));
1307  SDValue Chain = DAG.getEntryNode();
1308  return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1309  }
1310 
1311  llvm_unreachable("Can't get register for value!");
1312 }
1313 
1314 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1315  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1316  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1317  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1318  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1319  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1320  if (IsMSVCCXX || IsCoreCLR)
1321  CatchPadMBB->setIsEHFuncletEntry();
1322 
1323  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
1324 }
1325 
1326 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1327  // Update machine-CFG edge.
1328  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1329  FuncInfo.MBB->addSuccessor(TargetMBB);
1330 
1331  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1332  bool IsSEH = isAsynchronousEHPersonality(Pers);
1333  if (IsSEH) {
1334  // If this is not a fall-through branch or optimizations are switched off,
1335  // emit the branch.
1336  if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1337  TM.getOptLevel() == CodeGenOpt::None)
1338  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1339  getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1340  return;
1341  }
1342 
1343  // Figure out the funclet membership for the catchret's successor.
1344  // This will be used by the FuncletLayout pass to determine how to order the
1345  // BB's.
1346  // A 'catchret' returns to the outer scope's color.
1347  Value *ParentPad = I.getCatchSwitchParentPad();
1348  const BasicBlock *SuccessorColor;
1349  if (isa<ConstantTokenNone>(ParentPad))
1350  SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1351  else
1352  SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1353  assert(SuccessorColor && "No parent funclet for catchret!");
1354  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1355  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1356 
1357  // Create the terminator node.
1358  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1359  getControlRoot(), DAG.getBasicBlock(TargetMBB),
1360  DAG.getBasicBlock(SuccessorColorMBB));
1361  DAG.setRoot(Ret);
1362 }
1363 
1364 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1365  // Don't emit any special code for the cleanuppad instruction. It just marks
1366  // the start of a funclet.
1367  FuncInfo.MBB->setIsEHFuncletEntry();
1368  FuncInfo.MBB->setIsCleanupFuncletEntry();
1369 }
1370 
1371 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1372 /// many places it could ultimately go. In the IR, we have a single unwind
1373 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1374 /// This function skips over imaginary basic blocks that hold catchswitch
1375 /// instructions, and finds all the "real" machine
1376 /// basic block destinations. As those destinations may not be successors of
1377 /// EHPadBB, here we also calculate the edge probability to those destinations.
1378 /// The passed-in Prob is the edge probability to EHPadBB.
1380  FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1381  BranchProbability Prob,
1382  SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1383  &UnwindDests) {
1384  EHPersonality Personality =
1386  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1387  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1388 
1389  while (EHPadBB) {
1390  const Instruction *Pad = EHPadBB->getFirstNonPHI();
1391  BasicBlock *NewEHPadBB = nullptr;
1392  if (isa<LandingPadInst>(Pad)) {
1393  // Stop on landingpads. They are not funclets.
1394  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1395  break;
1396  } else if (isa<CleanupPadInst>(Pad)) {
1397  // Stop on cleanup pads. Cleanups are always funclet entries for all known
1398  // personalities.
1399  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1400  UnwindDests.back().first->setIsEHFuncletEntry();
1401  break;
1402  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1403  // Add the catchpad handlers to the possible destinations.
1404  for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1405  UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1406  // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1407  if (IsMSVCCXX || IsCoreCLR)
1408  UnwindDests.back().first->setIsEHFuncletEntry();
1409  }
1410  NewEHPadBB = CatchSwitch->getUnwindDest();
1411  } else {
1412  continue;
1413  }
1414 
1415  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1416  if (BPI && NewEHPadBB)
1417  Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1418  EHPadBB = NewEHPadBB;
1419  }
1420 }
1421 
1422 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1423  // Update successor info.
1425  auto UnwindDest = I.getUnwindDest();
1426  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1427  BranchProbability UnwindDestProb =
1428  (BPI && UnwindDest)
1429  ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1431  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1432  for (auto &UnwindDest : UnwindDests) {
1433  UnwindDest.first->setIsEHPad();
1434  addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1435  }
1436  FuncInfo.MBB->normalizeSuccProbs();
1437 
1438  // Create the terminator node.
1439  SDValue Ret =
1440  DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1441  DAG.setRoot(Ret);
1442 }
1443 
1444 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1445  report_fatal_error("visitCatchSwitch not yet implemented!");
1446 }
1447 
1448 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1449  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1450  auto &DL = DAG.getDataLayout();
1451  SDValue Chain = getControlRoot();
1453  SmallVector<SDValue, 8> OutVals;
1454 
1455  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1456  // lower
1457  //
1458  // %val = call <ty> @llvm.experimental.deoptimize()
1459  // ret <ty> %val
1460  //
1461  // differently.
1463  LowerDeoptimizingReturn();
1464  return;
1465  }
1466 
1467  if (!FuncInfo.CanLowerReturn) {
1468  unsigned DemoteReg = FuncInfo.DemoteRegister;
1469  const Function *F = I.getParent()->getParent();
1470 
1471  // Emit a store of the return value through the virtual register.
1472  // Leave Outs empty so that LowerReturn won't try to load return
1473  // registers the usual way.
1474  SmallVector<EVT, 1> PtrValueVTs;
1475  ComputeValueVTs(TLI, DL,
1478  PtrValueVTs);
1479 
1480  SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1481  DemoteReg, PtrValueVTs[0]);
1482  SDValue RetOp = getValue(I.getOperand(0));
1483 
1486  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1487  unsigned NumValues = ValueVTs.size();
1488 
1489  SmallVector<SDValue, 4> Chains(NumValues);
1490  for (unsigned i = 0; i != NumValues; ++i) {
1491  // An aggregate return value cannot wrap around the address space, so
1492  // offsets to its parts don't wrap either.
1493  SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
1494  Chains[i] = DAG.getStore(
1495  Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1496  // FIXME: better loc info would be nice.
1498  }
1499 
1500  Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1501  MVT::Other, Chains);
1502  } else if (I.getNumOperands() != 0) {
1504  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1505  unsigned NumValues = ValueVTs.size();
1506  if (NumValues) {
1507  SDValue RetOp = getValue(I.getOperand(0));
1508 
1509  const Function *F = I.getParent()->getParent();
1510 
1511  ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1512  if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1513  Attribute::SExt))
1514  ExtendKind = ISD::SIGN_EXTEND;
1515  else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1516  Attribute::ZExt))
1517  ExtendKind = ISD::ZERO_EXTEND;
1518 
1519  LLVMContext &Context = F->getContext();
1520  bool RetInReg = F->getAttributes().hasAttribute(
1521  AttributeList::ReturnIndex, Attribute::InReg);
1522 
1523  for (unsigned j = 0; j != NumValues; ++j) {
1524  EVT VT = ValueVTs[j];
1525 
1526  if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1527  VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1528 
1529  unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
1530  MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
1531  SmallVector<SDValue, 4> Parts(NumParts);
1532  getCopyToParts(DAG, getCurSDLoc(),
1533  SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1534  &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
1535 
1536  // 'inreg' on function refers to return value
1537  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1538  if (RetInReg)
1539  Flags.setInReg();
1540 
1541  // Propagate extension type if any
1542  if (ExtendKind == ISD::SIGN_EXTEND)
1543  Flags.setSExt();
1544  else if (ExtendKind == ISD::ZERO_EXTEND)
1545  Flags.setZExt();
1546 
1547  for (unsigned i = 0; i < NumParts; ++i) {
1548  Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1549  VT, /*isfixed=*/true, 0, 0));
1550  OutVals.push_back(Parts[i]);
1551  }
1552  }
1553  }
1554  }
1555 
1556  // Push in swifterror virtual register as the last element of Outs. This makes
1557  // sure swifterror virtual register will be returned in the swifterror
1558  // physical register.
1559  const Function *F = I.getParent()->getParent();
1560  if (TLI.supportSwiftError() &&
1561  F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1562  assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1563  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1564  Flags.setSwiftError();
1565  Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1566  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1567  true /*isfixed*/, 1 /*origidx*/,
1568  0 /*partOffs*/));
1569  // Create SDNode for the swifterror virtual register.
1570  OutVals.push_back(
1571  DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1572  &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1573  EVT(TLI.getPointerTy(DL))));
1574  }
1575 
1576  bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
1577  CallingConv::ID CallConv =
1579  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1580  Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1581 
1582  // Verify that the target's LowerReturn behaved as expected.
1583  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1584  "LowerReturn didn't return a valid chain!");
1585 
1586  // Update the DAG with the new chain value resulting from return lowering.
1587  DAG.setRoot(Chain);
1588 }
1589 
1590 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1591 /// created for it, emit nodes to copy the value into the virtual
1592 /// registers.
1594  // Skip empty types
1595  if (V->getType()->isEmptyTy())
1596  return;
1597 
1598  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1599  if (VMI != FuncInfo.ValueMap.end()) {
1600  assert(!V->use_empty() && "Unused value assigned virtual registers!");
1601  CopyValueToVirtualRegister(V, VMI->second);
1602  }
1603 }
1604 
1605 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1606 /// the current basic block, add it to ValueMap now so that we'll get a
1607 /// CopyTo/FromReg.
1609  // No need to export constants.
1610  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1611 
1612  // Already exported?
1613  if (FuncInfo.isExportedInst(V)) return;
1614 
1615  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1616  CopyValueToVirtualRegister(V, Reg);
1617 }
1618 
1620  const BasicBlock *FromBB) {
1621  // The operands of the setcc have to be in this block. We don't know
1622  // how to export them from some other block.
1623  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1624  // Can export from current BB.
1625  if (VI->getParent() == FromBB)
1626  return true;
1627 
1628  // Is already exported, noop.
1629  return FuncInfo.isExportedInst(V);
1630  }
1631 
1632  // If this is an argument, we can export it if the BB is the entry block or
1633  // if it is already exported.
1634  if (isa<Argument>(V)) {
1635  if (FromBB == &FromBB->getParent()->getEntryBlock())
1636  return true;
1637 
1638  // Otherwise, can only export this if it is already exported.
1639  return FuncInfo.isExportedInst(V);
1640  }
1641 
1642  // Otherwise, constants can always be exported.
1643  return true;
1644 }
1645 
1646 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1648 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1649  const MachineBasicBlock *Dst) const {
1650  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1651  const BasicBlock *SrcBB = Src->getBasicBlock();
1652  const BasicBlock *DstBB = Dst->getBasicBlock();
1653  if (!BPI) {
1654  // If BPI is not available, set the default probability as 1 / N, where N is
1655  // the number of successors.
1656  auto SuccSize = std::max<uint32_t>(
1657  std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
1658  return BranchProbability(1, SuccSize);
1659  }
1660  return BPI->getEdgeProbability(SrcBB, DstBB);
1661 }
1662 
1663 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1664  MachineBasicBlock *Dst,
1665  BranchProbability Prob) {
1666  if (!FuncInfo.BPI)
1667  Src->addSuccessorWithoutProb(Dst);
1668  else {
1669  if (Prob.isUnknown())
1670  Prob = getEdgeProbability(Src, Dst);
1671  Src->addSuccessor(Dst, Prob);
1672  }
1673 }
1674 
1675 static bool InBlock(const Value *V, const BasicBlock *BB) {
1676  if (const Instruction *I = dyn_cast<Instruction>(V))
1677  return I->getParent() == BB;
1678  return true;
1679 }
1680 
1681 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1682 /// This function emits a branch and is used at the leaves of an OR or an
1683 /// AND operator tree.
1684 void
1686  MachineBasicBlock *TBB,
1687  MachineBasicBlock *FBB,
1688  MachineBasicBlock *CurBB,
1689  MachineBasicBlock *SwitchBB,
1690  BranchProbability TProb,
1691  BranchProbability FProb,
1692  bool InvertCond) {
1693  const BasicBlock *BB = CurBB->getBasicBlock();
1694 
1695  // If the leaf of the tree is a comparison, merge the condition into
1696  // the caseblock.
1697  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1698  // The operands of the cmp have to be in this block. We don't know
1699  // how to export them from some other block. If this is the first block
1700  // of the sequence, no exporting is needed.
1701  if (CurBB == SwitchBB ||
1702  (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1703  isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1704  ISD::CondCode Condition;
1705  if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1706  ICmpInst::Predicate Pred =
1707  InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1708  Condition = getICmpCondCode(Pred);
1709  } else {
1710  const FCmpInst *FC = cast<FCmpInst>(Cond);
1711  FCmpInst::Predicate Pred =
1712  InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1713  Condition = getFCmpCondCode(Pred);
1714  if (TM.Options.NoNaNsFPMath)
1715  Condition = getFCmpCodeWithoutNaN(Condition);
1716  }
1717 
1718  CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1719  TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1720  SwitchCases.push_back(CB);
1721  return;
1722  }
1723  }
1724 
1725  // Create a CaseBlock record representing this branch.
1726  ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1727  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1728  nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1729  SwitchCases.push_back(CB);
1730 }
1731 
1732 /// FindMergedConditions - If Cond is an expression like
1734  MachineBasicBlock *TBB,
1735  MachineBasicBlock *FBB,
1736  MachineBasicBlock *CurBB,
1737  MachineBasicBlock *SwitchBB,
1739  BranchProbability TProb,
1740  BranchProbability FProb,
1741  bool InvertCond) {
1742  // Skip over not part of the tree and remember to invert op and operands at
1743  // next level.
1744  if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
1745  const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1746  if (InBlock(CondOp, CurBB->getBasicBlock())) {
1747  FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1748  !InvertCond);
1749  return;
1750  }
1751  }
1752 
1753  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1754  // Compute the effective opcode for Cond, taking into account whether it needs
1755  // to be inverted, e.g.
1756  // and (not (or A, B)), C
1757  // gets lowered as
1758  // and (and (not A, not B), C)
1759  unsigned BOpc = 0;
1760  if (BOp) {
1761  BOpc = BOp->getOpcode();
1762  if (InvertCond) {
1763  if (BOpc == Instruction::And)
1764  BOpc = Instruction::Or;
1765  else if (BOpc == Instruction::Or)
1766  BOpc = Instruction::And;
1767  }
1768  }
1769 
1770  // If this node is not part of the or/and tree, emit it as a branch.
1771  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1772  BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
1773  BOp->getParent() != CurBB->getBasicBlock() ||
1774  !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1775  !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1776  EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1777  TProb, FProb, InvertCond);
1778  return;
1779  }
1780 
1781  // Create TmpBB after CurBB.
1782  MachineFunction::iterator BBI(CurBB);
1783  MachineFunction &MF = DAG.getMachineFunction();
1785  CurBB->getParent()->insert(++BBI, TmpBB);
1786 
1787  if (Opc == Instruction::Or) {
1788  // Codegen X | Y as:
1789  // BB1:
1790  // jmp_if_X TBB
1791  // jmp TmpBB
1792  // TmpBB:
1793  // jmp_if_Y TBB
1794  // jmp FBB
1795  //
1796 
1797  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1798  // The requirement is that
1799  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1800  // = TrueProb for original BB.
1801  // Assuming the original probabilities are A and B, one choice is to set
1802  // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1803  // A/(1+B) and 2B/(1+B). This choice assumes that
1804  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1805  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1806  // TmpBB, but the math is more complicated.
1807 
1808  auto NewTrueProb = TProb / 2;
1809  auto NewFalseProb = TProb / 2 + FProb;
1810  // Emit the LHS condition.
1811  FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1812  NewTrueProb, NewFalseProb, InvertCond);
1813 
1814  // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1815  SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1816  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1817  // Emit the RHS condition into TmpBB.
1818  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1819  Probs[0], Probs[1], InvertCond);
1820  } else {
1821  assert(Opc == Instruction::And && "Unknown merge op!");
1822  // Codegen X & Y as:
1823  // BB1:
1824  // jmp_if_X TmpBB
1825  // jmp FBB
1826  // TmpBB:
1827  // jmp_if_Y TBB
1828  // jmp FBB
1829  //
1830  // This requires creation of TmpBB after CurBB.
1831 
1832  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1833  // The requirement is that
1834  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1835  // = FalseProb for original BB.
1836  // Assuming the original probabilities are A and B, one choice is to set
1837  // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1838  // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1839  // TrueProb for BB1 * FalseProb for TmpBB.
1840 
1841  auto NewTrueProb = TProb + FProb / 2;
1842  auto NewFalseProb = FProb / 2;
1843  // Emit the LHS condition.
1844  FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1845  NewTrueProb, NewFalseProb, InvertCond);
1846 
1847  // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1848  SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1849  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1850  // Emit the RHS condition into TmpBB.
1851  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1852  Probs[0], Probs[1], InvertCond);
1853  }
1854 }
1855 
1856 /// If the set of cases should be emitted as a series of branches, return true.
1857 /// If we should emit this as a bunch of and/or'd together conditions, return
1858 /// false.
1859 bool
1860 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1861  if (Cases.size() != 2) return true;
1862 
1863  // If this is two comparisons of the same values or'd or and'd together, they
1864  // will get folded into a single comparison, so don't emit two blocks.
1865  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1866  Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1867  (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1868  Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1869  return false;
1870  }
1871 
1872  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1873  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1874  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1875  Cases[0].CC == Cases[1].CC &&
1876  isa<Constant>(Cases[0].CmpRHS) &&
1877  cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1878  if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1879  return false;
1880  if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1881  return false;
1882  }
1883 
1884  return true;
1885 }
1886 
1887 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1888  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1889 
1890  // Update machine-CFG edges.
1891  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1892 
1893  if (I.isUnconditional()) {
1894  // Update machine-CFG edges.
1895  BrMBB->addSuccessor(Succ0MBB);
1896 
1897  // If this is not a fall-through branch or optimizations are switched off,
1898  // emit the branch.
1899  if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1900  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1901  MVT::Other, getControlRoot(),
1902  DAG.getBasicBlock(Succ0MBB)));
1903 
1904  return;
1905  }
1906 
1907  // If this condition is one of the special cases we handle, do special stuff
1908  // now.
1909  const Value *CondVal = I.getCondition();
1910  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1911 
1912  // If this is a series of conditions that are or'd or and'd together, emit
1913  // this as a sequence of branches instead of setcc's with and/or operations.
1914  // As long as jumps are not expensive, this should improve performance.
1915  // For example, instead of something like:
1916  // cmp A, B
1917  // C = seteq
1918  // cmp D, E
1919  // F = setle
1920  // or C, F
1921  // jnz foo
1922  // Emit:
1923  // cmp A, B
1924  // je foo
1925  // cmp D, E
1926  // jle foo
1927  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1928  Instruction::BinaryOps Opcode = BOp->getOpcode();
1929  if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
1931  (Opcode == Instruction::And || Opcode == Instruction::Or)) {
1932  FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1933  Opcode,
1934  getEdgeProbability(BrMBB, Succ0MBB),
1935  getEdgeProbability(BrMBB, Succ1MBB),
1936  /*InvertCond=*/false);
1937  // If the compares in later blocks need to use values not currently
1938  // exported from this block, export them now. This block should always
1939  // be the first entry.
1940  assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1941 
1942  // Allow some cases to be rejected.
1943  if (ShouldEmitAsBranches(SwitchCases)) {
1944  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1945  ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1946  ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1947  }
1948 
1949  // Emit the branch for this block.
1950  visitSwitchCase(SwitchCases[0], BrMBB);
1951  SwitchCases.erase(SwitchCases.begin());
1952  return;
1953  }
1954 
1955  // Okay, we decided not to do this, remove any inserted MBB's and clear
1956  // SwitchCases.
1957  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1958  FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1959 
1960  SwitchCases.clear();
1961  }
1962  }
1963 
1964  // Create a CaseBlock record representing this branch.
1965  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1966  nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
1967 
1968  // Use visitSwitchCase to actually insert the fast branch sequence for this
1969  // cond branch.
1970  visitSwitchCase(CB, BrMBB);
1971 }
1972 
1973 /// visitSwitchCase - Emits the necessary code to represent a single node in
1974 /// the binary search tree resulting from lowering a switch instruction.
1976  MachineBasicBlock *SwitchBB) {
1977  SDValue Cond;
1978  SDValue CondLHS = getValue(CB.CmpLHS);
1979  SDLoc dl = CB.DL;
1980 
1981  // Build the setcc now.
1982  if (!CB.CmpMHS) {
1983  // Fold "(X == true)" to X and "(X == false)" to !X to
1984  // handle common cases produced by branch lowering.
1985  if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1986  CB.CC == ISD::SETEQ)
1987  Cond = CondLHS;
1988  else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1989  CB.CC == ISD::SETEQ) {
1990  SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
1991  Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1992  } else
1993  Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1994  } else {
1995  assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
1996 
1997  const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
1998  const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
1999 
2000  SDValue CmpOp = getValue(CB.CmpMHS);
2001  EVT VT = CmpOp.getValueType();
2002 
2003  if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2004  Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2005  ISD::SETLE);
2006  } else {
2007  SDValue SUB = DAG.getNode(ISD::SUB, dl,
2008  VT, CmpOp, DAG.getConstant(Low, dl, VT));
2009  Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2010  DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2011  }
2012  }
2013 
2014  // Update successor info
2015  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2016  // TrueBB and FalseBB are always different unless the incoming IR is
2017  // degenerate. This only happens when running llc on weird IR.
2018  if (CB.TrueBB != CB.FalseBB)
2019  addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2020  SwitchBB->normalizeSuccProbs();
2021 
2022  // If the lhs block is the next block, invert the condition so that we can
2023  // fall through to the lhs instead of the rhs block.
2024  if (CB.TrueBB == NextBlock(SwitchBB)) {
2025  std::swap(CB.TrueBB, CB.FalseBB);
2026  SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2027  Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2028  }
2029 
2030  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2031  MVT::Other, getControlRoot(), Cond,
2032  DAG.getBasicBlock(CB.TrueBB));
2033 
2034  // Insert the false branch. Do this even if it's a fall through branch,
2035  // this makes it easier to do DAG optimizations which require inverting
2036  // the branch condition.
2037  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2038  DAG.getBasicBlock(CB.FalseBB));
2039 
2040  DAG.setRoot(BrCond);
2041 }
2042 
2043 /// visitJumpTable - Emit JumpTable node in the current MBB
2045  // Emit the code for the jump table
2046  assert(JT.Reg != -1U && "Should lower JT Header first!");
2048  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2049  JT.Reg, PTy);
2050  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2051  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2052  MVT::Other, Index.getValue(1),
2053  Table, Index);
2054  DAG.setRoot(BrJumpTable);
2055 }
2056 
2057 /// visitJumpTableHeader - This function emits necessary code to produce index
2058 /// in the JumpTable from switch case.
2060  JumpTableHeader &JTH,
2061  MachineBasicBlock *SwitchBB) {
2062  SDLoc dl = getCurSDLoc();
2063 
2064  // Subtract the lowest switch case value from the value being switched on and
2065  // conditional branch to default mbb if the result is greater than the
2066  // difference between smallest and largest cases.
2067  SDValue SwitchOp = getValue(JTH.SValue);
2068  EVT VT = SwitchOp.getValueType();
2069  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2070  DAG.getConstant(JTH.First, dl, VT));
2071 
2072  // The SDNode we just created, which holds the value being switched on minus
2073  // the smallest case value, needs to be copied to a virtual register so it
2074  // can be used as an index into the jump table in a subsequent basic block.
2075  // This value may be smaller or larger than the target's pointer type, and
2076  // therefore require extension or truncating.
2077  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2078  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2079 
2080  unsigned JumpTableReg =
2081  FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2082  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2083  JumpTableReg, SwitchOp);
2084  JT.Reg = JumpTableReg;
2085 
2086  // Emit the range check for the jump table, and branch to the default block
2087  // for the switch statement if the value being switched on exceeds the largest
2088  // case in the switch.
2089  SDValue CMP = DAG.getSetCC(
2090  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2091  Sub.getValueType()),
2092  Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2093 
2094  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2095  MVT::Other, CopyTo, CMP,
2096  DAG.getBasicBlock(JT.Default));
2097 
2098  // Avoid emitting unnecessary branches to the next block.
2099  if (JT.MBB != NextBlock(SwitchBB))
2100  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2101  DAG.getBasicBlock(JT.MBB));
2102 
2103  DAG.setRoot(BrCond);
2104 }
2105 
2106 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2107 /// variable if there exists one.
2109  SDValue &Chain) {
2110  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2111  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2112  MachineFunction &MF = DAG.getMachineFunction();
2113  Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
2114  MachineSDNode *Node =
2115  DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2116  if (Global) {
2117  MachinePointerInfo MPInfo(Global);
2121  *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
2122  DAG.getEVTAlignment(PtrTy));
2123  Node->setMemRefs(MemRefs, MemRefs + 1);
2124  }
2125  return SDValue(Node, 0);
2126 }
2127 
2128 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2129 /// tail spliced into a stack protector check success bb.
2130 ///
2131 /// For a high level explanation of how this fits into the stack protector
2132 /// generation see the comment on the declaration of class
2133 /// StackProtectorDescriptor.
2134 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2135  MachineBasicBlock *ParentBB) {
2136 
2137  // First create the loads to the guard/stack slot for the comparison.
2138  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2139  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2140 
2141  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2142  int FI = MFI.getStackProtectorIndex();
2143 
2144  SDValue Guard;
2145  SDLoc dl = getCurSDLoc();
2146  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2147  const Module &M = *ParentBB->getParent()->getFunction().getParent();
2148  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2149 
2150  // Generate code to load the content of the guard slot.
2151  SDValue GuardVal = DAG.getLoad(
2152  PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2155 
2156  if (TLI.useStackGuardXorFP())
2157  GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2158 
2159  // Retrieve guard check function, nullptr if instrumentation is inlined.
2160  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2161  // The target provides a guard check function to validate the guard value.
2162  // Generate a call to that function with the content of the guard slot as
2163  // argument.
2164  auto *Fn = cast<Function>(GuardCheck);
2165  FunctionType *FnTy = Fn->getFunctionType();
2166  assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2167 
2170  Entry.Node = GuardVal;
2171  Entry.Ty = FnTy->getParamType(0);
2172  if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2173  Entry.IsInReg = true;
2174  Args.push_back(Entry);
2175 
2177  CLI.setDebugLoc(getCurSDLoc())
2178  .setChain(DAG.getEntryNode())
2179  .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2180  getValue(GuardCheck), std::move(Args));
2181 
2182  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2183  DAG.setRoot(Result.second);
2184  return;
2185  }
2186 
2187  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2188  // Otherwise, emit a volatile load to retrieve the stack guard value.
2189  SDValue Chain = DAG.getEntryNode();
2190  if (TLI.useLoadStackGuardNode()) {
2191  Guard = getLoadStackGuard(DAG, dl, Chain);
2192  } else {
2193  const Value *IRGuard = TLI.getSDagStackGuard(M);
2194  SDValue GuardPtr = getValue(IRGuard);
2195 
2196  Guard =
2197  DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2199  }
2200 
2201  // Perform the comparison via a subtract/getsetcc.
2202  EVT VT = Guard.getValueType();
2203  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
2204 
2205  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2206  *DAG.getContext(),
2207  Sub.getValueType()),
2208  Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2209 
2210  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2211  // branch to failure MBB.
2212  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2213  MVT::Other, GuardVal.getOperand(0),
2214  Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2215  // Otherwise branch to success MBB.
2216  SDValue Br = DAG.getNode(ISD::BR, dl,
2217  MVT::Other, BrCond,
2218  DAG.getBasicBlock(SPD.getSuccessMBB()));
2219 
2220  DAG.setRoot(Br);
2221 }
2222 
2223 /// Codegen the failure basic block for a stack protector check.
2224 ///
2225 /// A failure stack protector machine basic block consists simply of a call to
2226 /// __stack_chk_fail().
2227 ///
2228 /// For a high level explanation of how this fits into the stack protector
2229 /// generation see the comment on the declaration of class
2230 /// StackProtectorDescriptor.
2231 void
2232 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2233  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2234  SDValue Chain =
2235  TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2236  None, false, getCurSDLoc(), false, false).second;
2237  DAG.setRoot(Chain);
2238 }
2239 
2240 /// visitBitTestHeader - This function emits necessary code to produce value
2241 /// suitable for "bit tests"
2243  MachineBasicBlock *SwitchBB) {
2244  SDLoc dl = getCurSDLoc();
2245 
2246  // Subtract the minimum value
2247  SDValue SwitchOp = getValue(B.SValue);
2248  EVT VT = SwitchOp.getValueType();
2249  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2250  DAG.getConstant(B.First, dl, VT));
2251 
2252  // Check range
2253  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2254  SDValue RangeCmp = DAG.getSetCC(
2255  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2256  Sub.getValueType()),
2257  Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2258 
2259  // Determine the type of the test operands.
2260  bool UsePtrType = false;
2261  if (!TLI.isTypeLegal(VT))
2262  UsePtrType = true;
2263  else {
2264  for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2265  if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2266  // Switch table case range are encoded into series of masks.
2267  // Just use pointer type, it's guaranteed to fit.
2268  UsePtrType = true;
2269  break;
2270  }
2271  }
2272  if (UsePtrType) {
2273  VT = TLI.getPointerTy(DAG.getDataLayout());
2274  Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2275  }
2276 
2277  B.RegVT = VT.getSimpleVT();
2278  B.Reg = FuncInfo.CreateReg(B.RegVT);
2279  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2280 
2281  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2282 
2283  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2284  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2285  SwitchBB->normalizeSuccProbs();
2286 
2287  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2288  MVT::Other, CopyTo, RangeCmp,
2289  DAG.getBasicBlock(B.Default));
2290 
2291  // Avoid emitting unnecessary branches to the next block.
2292  if (MBB != NextBlock(SwitchBB))
2293  BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2294  DAG.getBasicBlock(MBB));
2295 
2296  DAG.setRoot(BrRange);
2297 }
2298 
2299 /// visitBitTestCase - this function produces one "bit test"
2301  MachineBasicBlock* NextMBB,
2302  BranchProbability BranchProbToNext,
2303  unsigned Reg,
2304  BitTestCase &B,
2305  MachineBasicBlock *SwitchBB) {
2306  SDLoc dl = getCurSDLoc();
2307  MVT VT = BB.RegVT;
2308  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2309  SDValue Cmp;
2310  unsigned PopCount = countPopulation(B.Mask);
2311  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2312  if (PopCount == 1) {
2313  // Testing for a single bit; just compare the shift count with what it
2314  // would need to be to shift a 1 bit in that position.
2315  Cmp = DAG.getSetCC(
2316  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2317  ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2318  ISD::SETEQ);
2319  } else if (PopCount == BB.Range) {
2320  // There is only one zero bit in the range, test for it directly.
2321  Cmp = DAG.getSetCC(
2322  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2323  ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2324  ISD::SETNE);
2325  } else {
2326  // Make desired shift
2327  SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2328  DAG.getConstant(1, dl, VT), ShiftOp);
2329 
2330  // Emit bit tests and jumps
2331  SDValue AndOp = DAG.getNode(ISD::AND, dl,
2332  VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2333  Cmp = DAG.getSetCC(
2334  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2335  AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2336  }
2337 
2338  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2339  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2340  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2341  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2342  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2343  // one as they are relative probabilities (and thus work more like weights),
2344  // and hence we need to normalize them to let the sum of them become one.
2345  SwitchBB->normalizeSuccProbs();
2346 
2347  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2348  MVT::Other, getControlRoot(),
2349  Cmp, DAG.getBasicBlock(B.TargetBB));
2350 
2351  // Avoid emitting unnecessary branches to the next block.
2352  if (NextMBB != NextBlock(SwitchBB))
2353  BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2354  DAG.getBasicBlock(NextMBB));
2355 
2356  DAG.setRoot(BrAnd);
2357 }
2358 
2359 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2360  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2361 
2362  // Retrieve successors. Look through artificial IR level blocks like
2363  // catchswitch for successors.
2364  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2365  const BasicBlock *EHPadBB = I.getSuccessor(1);
2366 
2367  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2368  // have to do anything here to lower funclet bundles.
2370  {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2371  "Cannot lower invokes with arbitrary operand bundles yet!");
2372 
2373  const Value *Callee(I.getCalledValue());
2374  const Function *Fn = dyn_cast<Function>(Callee);
2375  if (isa<InlineAsm>(Callee))
2376  visitInlineAsm(&I);
2377  else if (Fn && Fn->isIntrinsic()) {
2378  switch (Fn->getIntrinsicID()) {
2379  default:
2380  llvm_unreachable("Cannot invoke this intrinsic");
2381  case Intrinsic::donothing:
2382  // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2383  break;
2384  case Intrinsic::experimental_patchpoint_void:
2385  case Intrinsic::experimental_patchpoint_i64:
2386  visitPatchpoint(&I, EHPadBB);
2387  break;
2388  case Intrinsic::experimental_gc_statepoint:
2389  LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2390  break;
2391  }
2393  // Currently we do not lower any intrinsic calls with deopt operand bundles.
2394  // Eventually we will support lowering the @llvm.experimental.deoptimize
2395  // intrinsic, and right now there are no plans to support other intrinsics
2396  // with deopt state.
2397  LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2398  } else {
2399  LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2400  }
2401 
2402  // If the value of the invoke is used outside of its defining block, make it
2403  // available as a virtual register.
2404  // We already took care of the exported value for the statepoint instruction
2405  // during call to the LowerStatepoint.
2406  if (!isStatepoint(I)) {
2407  CopyToExportRegsIfNeeded(&I);
2408  }
2409 
2411  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2412  BranchProbability EHPadBBProb =
2413  BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2415  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2416 
2417  // Update successor info.
2418  addSuccessorWithProb(InvokeMBB, Return);
2419  for (auto &UnwindDest : UnwindDests) {
2420  UnwindDest.first->setIsEHPad();
2421  addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2422  }
2423  InvokeMBB->normalizeSuccProbs();
2424 
2425  // Drop into normal successor.
2426  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2427  MVT::Other, getControlRoot(),
2428  DAG.getBasicBlock(Return)));
2429 }
2430 
2431 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2432  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2433 }
2434 
2435 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2436  assert(FuncInfo.MBB->isEHPad() &&
2437  "Call to landingpad not in landing pad!");
2438 
2439  MachineBasicBlock *MBB = FuncInfo.MBB;
2440  addLandingPadInfo(LP, *MBB);
2441 
2442  // If there aren't registers to copy the values into (e.g., during SjLj
2443  // exceptions), then don't bother to create these DAG nodes.
2444  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2445  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2446  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2447  TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2448  return;
2449 
2450  // If landingpad's return type is token type, we don't create DAG nodes
2451  // for its exception pointer and selector value. The extraction of exception
2452  // pointer or selector value from token type landingpads is not currently
2453  // supported.
2454  if (LP.getType()->isTokenTy())
2455  return;
2456 
2458  SDLoc dl = getCurSDLoc();
2459  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2460  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2461 
2462  // Get the two live-in registers as SDValues. The physregs have already been
2463  // copied into virtual registers.
2464  SDValue Ops[2];
2465  if (FuncInfo.ExceptionPointerVirtReg) {
2466  Ops[0] = DAG.getZExtOrTrunc(
2467  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2468  FuncInfo.ExceptionPointerVirtReg,
2469  TLI.getPointerTy(DAG.getDataLayout())),
2470  dl, ValueVTs[0]);
2471  } else {
2472  Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2473  }
2474  Ops[1] = DAG.getZExtOrTrunc(
2475  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2476  FuncInfo.ExceptionSelectorVirtReg,
2477  TLI.getPointerTy(DAG.getDataLayout())),
2478  dl, ValueVTs[1]);
2479 
2480  // Merge into one.
2481  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2482  DAG.getVTList(ValueVTs), Ops);
2483  setValue(&LP, Res);
2484 }
2485 
2486 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2487 #ifndef NDEBUG
2488  for (const CaseCluster &CC : Clusters)
2489  assert(CC.Low == CC.High && "Input clusters must be single-case");
2490 #endif
2491 
2492  std::sort(Clusters.begin(), Clusters.end(),
2493  [](const CaseCluster &a, const CaseCluster &b) {
2494  return a.Low->getValue().slt(b.Low->getValue());
2495  });
2496 
2497  // Merge adjacent clusters with the same destination.
2498  const unsigned N = Clusters.size();
2499  unsigned DstIndex = 0;
2500  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2501  CaseCluster &CC = Clusters[SrcIndex];
2502  const ConstantInt *CaseVal = CC.Low;
2503  MachineBasicBlock *Succ = CC.MBB;
2504 
2505  if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2506  (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2507  // If this case has the same successor and is a neighbour, merge it into
2508  // the previous cluster.
2509  Clusters[DstIndex - 1].High = CaseVal;
2510  Clusters[DstIndex - 1].Prob += CC.Prob;
2511  } else {
2512  std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2513  sizeof(Clusters[SrcIndex]));
2514  }
2515  }
2516  Clusters.resize(DstIndex);
2517 }
2518 
2520  MachineBasicBlock *Last) {
2521  // Update JTCases.
2522  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2523  if (JTCases[i].first.HeaderBB == First)
2524  JTCases[i].first.HeaderBB = Last;
2525 
2526  // Update BitTestCases.
2527  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2528  if (BitTestCases[i].Parent == First)
2529  BitTestCases[i].Parent = Last;
2530 }
2531 
2532 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2533  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2534 
2535  // Update machine-CFG edges with unique successors.
2537  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2538  BasicBlock *BB = I.getSuccessor(i);
2539  bool Inserted = Done.insert(BB).second;
2540  if (!Inserted)
2541  continue;
2542 
2543  MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2544  addSuccessorWithProb(IndirectBrMBB, Succ);
2545  }
2546  IndirectBrMBB->normalizeSuccProbs();
2547 
2548  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2549  MVT::Other, getControlRoot(),
2550  getValue(I.getAddress())));
2551 }
2552 
2553 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2554  if (DAG.getTarget().Options.TrapUnreachable)
2555  DAG.setRoot(
2556  DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2557 }
2558 
2559 void SelectionDAGBuilder::visitFSub(const User &I) {
2560  // -0.0 - X --> fneg
2561  Type *Ty = I.getType();
2562  if (isa<Constant>(I.getOperand(0)) &&
2564  SDValue Op2 = getValue(I.getOperand(1));
2565  setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2566  Op2.getValueType(), Op2));
2567  return;
2568  }
2569 
2570  visitBinary(I, ISD::FSUB);
2571 }
2572 
2573 /// Checks if the given instruction performs a vector reduction, in which case
2574 /// we have the freedom to alter the elements in the result as long as the
2575 /// reduction of them stays unchanged.
2576 static bool isVectorReductionOp(const User *I) {
2577  const Instruction *Inst = dyn_cast<Instruction>(I);
2578  if (!Inst || !Inst->getType()->isVectorTy())
2579  return false;
2580 
2581  auto OpCode = Inst->getOpcode();
2582  switch (OpCode) {
2583  case Instruction::Add:
2584  case Instruction::Mul:
2585  case Instruction::And:
2586  case Instruction::Or:
2587  case Instruction::Xor:
2588  break;
2589  case Instruction::FAdd:
2590  case Instruction::FMul:
2591  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2592  if (FPOp->getFastMathFlags().isFast())
2593  break;
2595  default:
2596  return false;
2597  }
2598 
2599  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2600  unsigned ElemNumToReduce = ElemNum;
2601 
2602  // Do DFS search on the def-use chain from the given instruction. We only
2603  // allow four kinds of operations during the search until we reach the
2604  // instruction that extracts the first element from the vector:
2605  //
2606  // 1. The reduction operation of the same opcode as the given instruction.
2607  //
2608  // 2. PHI node.
2609  //
2610  // 3. ShuffleVector instruction together with a reduction operation that
2611  // does a partial reduction.
2612  //
2613  // 4. ExtractElement that extracts the first element from the vector, and we
2614  // stop searching the def-use chain here.
2615  //
2616  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2617  // from 1-3 to the stack to continue the DFS. The given instruction is not
2618  // a reduction operation if we meet any other instructions other than those
2619  // listed above.
2620 
2621  SmallVector<const User *, 16> UsersToVisit{Inst};
2623  bool ReduxExtracted = false;
2624 
2625  while (!UsersToVisit.empty()) {
2626  auto User = UsersToVisit.back();
2627  UsersToVisit.pop_back();
2628  if (!Visited.insert(User).second)
2629  continue;
2630 
2631  for (const auto &U : User->users()) {
2632  auto Inst = dyn_cast<Instruction>(U);
2633  if (!Inst)
2634  return false;
2635 
2636  if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2637  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2638  if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
2639  return false;
2640  UsersToVisit.push_back(U);
2641  } else if (const ShuffleVectorInst *ShufInst =
2642  dyn_cast<ShuffleVectorInst>(U)) {
2643  // Detect the following pattern: A ShuffleVector instruction together
2644  // with a reduction that do partial reduction on the first and second
2645  // ElemNumToReduce / 2 elements, and store the result in
2646  // ElemNumToReduce / 2 elements in another vector.
2647 
2648  unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2649  if (ResultElements < ElemNum)
2650  return false;
2651 
2652  if (ElemNumToReduce == 1)
2653  return false;
2654  if (!isa<UndefValue>(U->getOperand(1)))
2655  return false;
2656  for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2657  if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2658  return false;
2659  for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2660  if (ShufInst->getMaskValue(i) != -1)
2661  return false;
2662 
2663  // There is only one user of this ShuffleVector instruction, which
2664  // must be a reduction operation.
2665  if (!U->hasOneUse())
2666  return false;
2667 
2668  auto U2 = dyn_cast<Instruction>(*U->user_begin());
2669  if (!U2 || U2->getOpcode() != OpCode)
2670  return false;
2671 
2672  // Check operands of the reduction operation.
2673  if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2674  (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2675  UsersToVisit.push_back(U2);
2676  ElemNumToReduce /= 2;
2677  } else
2678  return false;
2679  } else if (isa<ExtractElementInst>(U)) {
2680  // At this moment we should have reduced all elements in the vector.
2681  if (ElemNumToReduce != 1)
2682  return false;
2683 
2684  const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2685  if (!Val || Val->getZExtValue() != 0)
2686  return false;
2687 
2688  ReduxExtracted = true;
2689  } else
2690  return false;
2691  }
2692  }
2693  return ReduxExtracted;
2694 }
2695 
2696 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2697  SDValue Op1 = getValue(I.getOperand(0));
2698  SDValue Op2 = getValue(I.getOperand(1));
2699 
2700  bool nuw = false;
2701  bool nsw = false;
2702  bool exact = false;
2703  bool vec_redux = false;
2704  FastMathFlags FMF;
2705 
2706  if (const OverflowingBinaryOperator *OFBinOp =
2707  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2708  nuw = OFBinOp->hasNoUnsignedWrap();
2709  nsw = OFBinOp->hasNoSignedWrap();
2710  }
2711  if (const PossiblyExactOperator *ExactOp =
2712  dyn_cast<const PossiblyExactOperator>(&I))
2713  exact = ExactOp->isExact();
2714  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
2715  FMF = FPOp->getFastMathFlags();
2716 
2717  if (isVectorReductionOp(&I)) {
2718  vec_redux = true;
2719  DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2720  }
2721 
2722  SDNodeFlags Flags;
2723  Flags.setExact(exact);
2724  Flags.setNoSignedWrap(nsw);
2725  Flags.setNoUnsignedWrap(nuw);
2726  Flags.setVectorReduction(vec_redux);
2727  Flags.setAllowReciprocal(FMF.allowReciprocal());
2728  Flags.setAllowContract(FMF.allowContract());
2729  Flags.setNoInfs(FMF.noInfs());
2730  Flags.setNoNaNs(FMF.noNaNs());
2731  Flags.setNoSignedZeros(FMF.noSignedZeros());
2732  Flags.setUnsafeAlgebra(FMF.isFast());
2733 
2734  SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
2735  Op1, Op2, Flags);
2736  setValue(&I, BinNodeValue);
2737 }
2738 
2739 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2740  SDValue Op1 = getValue(I.getOperand(0));
2741  SDValue Op2 = getValue(I.getOperand(1));
2742 
2743  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2744  Op2.getValueType(), DAG.getDataLayout());
2745 
2746  // Coerce the shift amount to the right type if we can.
2747  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2748  unsigned ShiftSize = ShiftTy.getSizeInBits();
2749  unsigned Op2Size = Op2.getValueSizeInBits();
2750  SDLoc DL = getCurSDLoc();
2751 
2752  // If the operand is smaller than the shift count type, promote it.
2753  if (ShiftSize > Op2Size)
2754  Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2755 
2756  // If the operand is larger than the shift count type but the shift
2757  // count type has enough bits to represent any shift value, truncate
2758  // it now. This is a common case and it exposes the truncate to
2759  // optimization early.
2760  else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2761  Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2762  // Otherwise we'll need to temporarily settle for some other convenient
2763  // type. Type legalization will make adjustments once the shiftee is split.
2764  else
2765  Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2766  }
2767 
2768  bool nuw = false;
2769  bool nsw = false;
2770  bool exact = false;
2771 
2772  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2773 
2774  if (const OverflowingBinaryOperator *OFBinOp =
2775  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2776  nuw = OFBinOp->hasNoUnsignedWrap();
2777  nsw = OFBinOp->hasNoSignedWrap();
2778  }
2779  if (const PossiblyExactOperator *ExactOp =
2780  dyn_cast<const PossiblyExactOperator>(&I))
2781  exact = ExactOp->isExact();
2782  }
2783  SDNodeFlags Flags;
2784  Flags.setExact(exact);
2785  Flags.setNoSignedWrap(nsw);
2786  Flags.setNoUnsignedWrap(nuw);
2787  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2788  Flags);
2789  setValue(&I, Res);
2790 }
2791 
2792 void SelectionDAGBuilder::visitSDiv(const User &I) {
2793  SDValue Op1 = getValue(I.getOperand(0));
2794  SDValue Op2 = getValue(I.getOperand(1));
2795 
2796  SDNodeFlags Flags;
2797  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2798  cast<PossiblyExactOperator>(&I)->isExact());
2799  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2800  Op2, Flags));
2801 }
2802 
2803 void SelectionDAGBuilder::visitICmp(const User &I) {
2805  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2806  predicate = IC->getPredicate();
2807  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2808  predicate = ICmpInst::Predicate(IC->getPredicate());
2809  SDValue Op1 = getValue(I.getOperand(0));
2810  SDValue Op2 = getValue(I.getOperand(1));
2811  ISD::CondCode Opcode = getICmpCondCode(predicate);
2812 
2813  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2814  I.getType());
2815  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2816 }
2817 
2818 void SelectionDAGBuilder::visitFCmp(const User &I) {
2820  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2821  predicate = FC->getPredicate();
2822  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2823  predicate = FCmpInst::Predicate(FC->getPredicate());
2824  SDValue Op1 = getValue(I.getOperand(0));
2825  SDValue Op2 = getValue(I.getOperand(1));
2826  ISD::CondCode Condition = getFCmpCondCode(predicate);
2827 
2828  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
2829  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
2830  // further optimization, but currently FMF is only applicable to binary nodes.
2831  if (TM.Options.NoNaNsFPMath)
2832  Condition = getFCmpCodeWithoutNaN(Condition);
2833  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2834  I.getType());
2835  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2836 }
2837 
2838 // Check if the condition of the select has one use or two users that are both
2839 // selects with the same condition.
2840 static bool hasOnlySelectUsers(const Value *Cond) {
2841  return llvm::all_of(Cond->users(), [](const Value *V) {
2842  return isa<SelectInst>(V);
2843  });
2844 }
2845 
2846 void SelectionDAGBuilder::visitSelect(const User &I) {
2849  ValueVTs);
2850  unsigned NumValues = ValueVTs.size();
2851  if (NumValues == 0) return;
2852 
2853  SmallVector<SDValue, 4> Values(NumValues);
2854  SDValue Cond = getValue(I.getOperand(0));
2855  SDValue LHSVal = getValue(I.getOperand(1));
2856  SDValue RHSVal = getValue(I.getOperand(2));
2857  auto BaseOps = {Cond};
2858  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2860 
2861  // Min/max matching is only viable if all output VTs are the same.
2862  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
2863  EVT VT = ValueVTs[0];
2864  LLVMContext &Ctx = *DAG.getContext();
2865  auto &TLI = DAG.getTargetLoweringInfo();
2866 
2867  // We care about the legality of the operation after it has been type
2868  // legalized.
2869  while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2870  VT != TLI.getTypeToTransformTo(Ctx, VT))
2871  VT = TLI.getTypeToTransformTo(Ctx, VT);
2872 
2873  // If the vselect is legal, assume we want to leave this as a vector setcc +
2874  // vselect. Otherwise, if this is going to be scalarized, we want to see if
2875  // min/max is legal on the scalar type.
2876  bool UseScalarMinMax = VT.isVector() &&
2877  !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2878 
2879  Value *LHS, *RHS;
2880  auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2882  switch (SPR.Flavor) {
2883  case SPF_UMAX: Opc = ISD::UMAX; break;
2884  case SPF_UMIN: Opc = ISD::UMIN; break;
2885  case SPF_SMAX: Opc = ISD::SMAX; break;
2886  case SPF_SMIN: Opc = ISD::SMIN; break;
2887  case SPF_FMINNUM:
2888  switch (SPR.NaNBehavior) {
2889  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2890  case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2891  case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2892  case SPNB_RETURNS_ANY: {
2893  if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2894  Opc = ISD::FMINNUM;
2895  else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
2896  Opc = ISD::FMINNAN;
2897  else if (UseScalarMinMax)
2898  Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2900  break;
2901  }
2902  }
2903  break;
2904  case SPF_FMAXNUM:
2905  switch (SPR.NaNBehavior) {
2906  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2907  case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2908  case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2909  case SPNB_RETURNS_ANY:
2910 
2911  if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2912  Opc = ISD::FMAXNUM;
2913  else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
2914  Opc = ISD::FMAXNAN;
2915  else if (UseScalarMinMax)
2916  Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
2918  break;
2919  }
2920  break;
2921  default: break;
2922  }
2923 
2924  if (Opc != ISD::DELETED_NODE &&
2925  (TLI.isOperationLegalOrCustom(Opc, VT) ||
2926  (UseScalarMinMax &&
2927  TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
2928  // If the underlying comparison instruction is used by any other
2929  // instruction, the consumed instructions won't be destroyed, so it is
2930  // not profitable to convert to a min/max.
2931  hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
2932  OpCode = Opc;
2933  LHSVal = getValue(LHS);
2934  RHSVal = getValue(RHS);
2935  BaseOps = {};
2936  }
2937  }
2938 
2939  for (unsigned i = 0; i != NumValues; ++i) {
2940  SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2941  Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2942  Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2943  Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2944  LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2945  Ops);
2946  }
2947 
2948  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
2949  DAG.getVTList(ValueVTs), Values));
2950 }
2951 
2952 void SelectionDAGBuilder::visitTrunc(const User &I) {
2953  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2954  SDValue N = getValue(I.getOperand(0));
2955  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2956  I.getType());
2957  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
2958 }
2959 
2960 void SelectionDAGBuilder::visitZExt(const User &I) {
2961  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2962  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2963  SDValue N = getValue(I.getOperand(0));
2964  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2965  I.getType());
2966  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
2967 }
2968 
2969 void SelectionDAGBuilder::visitSExt(const User &I) {
2970  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2971  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2972  SDValue N = getValue(I.getOperand(0));
2973  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2974  I.getType());
2975  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
2976 }
2977 
2978 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2979  // FPTrunc is never a no-op cast, no need to check
2980  SDValue N = getValue(I.getOperand(0));
2981  SDLoc dl = getCurSDLoc();
2982  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2983  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
2984  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
2985  DAG.getTargetConstant(
2986  0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
2987 }
2988 
2989 void SelectionDAGBuilder::visitFPExt(const User &I) {
2990  // FPExt is never a no-op cast, no need to check
2991  SDValue N = getValue(I.getOperand(0));
2992  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2993  I.getType());
2994  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
2995 }
2996 
2997 void SelectionDAGBuilder::visitFPToUI(const User &I) {
2998  // FPToUI is never a no-op cast, no need to check
2999  SDValue N = getValue(I.getOperand(0));
3000  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3001  I.getType());
3002  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3003 }
3004 
3005 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3006  // FPToSI is never a no-op cast, no need to check
3007  SDValue N = getValue(I.getOperand(0));
3008  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3009  I.getType());
3010  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3011 }
3012 
3013 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3014  // UIToFP is never a no-op cast, no need to check
3015  SDValue N = getValue(I.getOperand(0));
3016  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3017  I.getType());
3018  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3019 }
3020 
3021 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3022  // SIToFP is never a no-op cast, no need to check
3023  SDValue N = getValue(I.getOperand(0));
3024  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3025  I.getType());
3026  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3027 }
3028 
3029 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3030  // What to do depends on the size of the integer and the size of the pointer.
3031  // We can either truncate, zero extend, or no-op, accordingly.
3032  SDValue N = getValue(I.getOperand(0));
3033  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3034  I.getType());
3035  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3036 }
3037 
3038 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3039  // What to do depends on the size of the integer and the size of the pointer.
3040  // We can either truncate, zero extend, or no-op, accordingly.
3041  SDValue N = getValue(I.getOperand(0));
3042  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3043  I.getType());
3044  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3045 }
3046 
3047 void SelectionDAGBuilder::visitBitCast(const User &I) {
3048  SDValue N = getValue(I.getOperand(0));
3049  SDLoc dl = getCurSDLoc();
3050  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3051  I.getType());
3052 
3053  // BitCast assures us that source and destination are the same size so this is
3054  // either a BITCAST or a no-op.
3055  if (DestVT != N.getValueType())
3056  setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3057  DestVT, N)); // convert types.
3058  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3059  // might fold any kind of constant expression to an integer constant and that
3060  // is not what we are looking for. Only recognize a bitcast of a genuine
3061  // constant integer as an opaque constant.
3062  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3063  setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3064  /*isOpaque*/true));
3065  else
3066  setValue(&I, N); // noop cast.
3067 }
3068 
3069 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3070  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3071  const Value *SV = I.getOperand(0);
3072  SDValue N = getValue(SV);
3073  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3074 
3075  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3076  unsigned DestAS = I.getType()->getPointerAddressSpace();
3077 
3078  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3079  N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3080 
3081  setValue(&I, N);
3082 }
3083 
3084 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3085  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3086  SDValue InVec = getValue(I.getOperand(0));
3087  SDValue InVal = getValue(I.getOperand(1));
3088  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3089  TLI.getVectorIdxTy(DAG.getDataLayout()));
3090  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3091  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3092  InVec, InVal, InIdx));
3093 }
3094 
3095 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3096  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3097  SDValue InVec = getValue(I.getOperand(0));
3098  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3099  TLI.getVectorIdxTy(DAG.getDataLayout()));
3100  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3101  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3102  InVec, InIdx));
3103 }
3104 
3105 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3106  SDValue Src1 = getValue(I.getOperand(0));
3107  SDValue Src2 = getValue(I.getOperand(1));
3108  SDLoc DL = getCurSDLoc();
3109 
3111  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3112  unsigned MaskNumElts = Mask.size();
3113 
3114  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3115  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3116  EVT SrcVT = Src1.getValueType();
3117  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3118 
3119  if (SrcNumElts == MaskNumElts) {
3120  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3121  return;
3122  }
3123 
3124  // Normalize the shuffle vector since mask and vector length don't match.
3125  if (SrcNumElts < MaskNumElts) {
3126  // Mask is longer than the source vectors. We can use concatenate vector to
3127  // make the mask and vectors lengths match.
3128 
3129  if (MaskNumElts % SrcNumElts == 0) {
3130  // Mask length is a multiple of the source vector length.
3131  // Check if the shuffle is some kind of concatenation of the input
3132  // vectors.
3133  unsigned NumConcat = MaskNumElts / SrcNumElts;
3134  bool IsConcat = true;
3135  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3136  for (unsigned i = 0; i != MaskNumElts; ++i) {
3137  int Idx = Mask[i];
3138  if (Idx < 0)
3139  continue;
3140  // Ensure the indices in each SrcVT sized piece are sequential and that
3141  // the same source is used for the whole piece.
3142  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3143  (ConcatSrcs[i / SrcNumElts] >= 0 &&
3144  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3145  IsConcat = false;
3146  break;
3147  }
3148  // Remember which source this index came from.
3149  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3150  }
3151 
3152  // The shuffle is concatenating multiple vectors together. Just emit
3153  // a CONCAT_VECTORS operation.
3154  if (IsConcat) {
3155  SmallVector<SDValue, 8> ConcatOps;
3156  for (auto Src : ConcatSrcs) {
3157  if (Src < 0)
3158  ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3159  else if (Src == 0)
3160  ConcatOps.push_back(Src1);
3161  else
3162  ConcatOps.push_back(Src2);
3163  }
3164  setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3165  return;
3166  }
3167  }
3168 
3169  unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3170  unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3171  EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3172  PaddedMaskNumElts);
3173 
3174  // Pad both vectors with undefs to make them the same length as the mask.
3175  SDValue UndefVal = DAG.getUNDEF(SrcVT);
3176 
3177  SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3178  SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3179  MOps1[0] = Src1;
3180  MOps2[0] = Src2;
3181 
3182  Src1 = Src1.isUndef()
3183  ? DAG.getUNDEF(PaddedVT)
3184  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3185  Src2 = Src2.isUndef()
3186  ? DAG.getUNDEF(PaddedVT)
3187  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3188 
3189  // Readjust mask for new input vector length.
3190  SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3191  for (unsigned i = 0; i != MaskNumElts; ++i) {
3192  int Idx = Mask[i];
3193  if (Idx >= (int)SrcNumElts)
3194  Idx -= SrcNumElts - PaddedMaskNumElts;
3195  MappedOps[i] = Idx;
3196  }
3197 
3198  SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3199 
3200  // If the concatenated vector was padded, extract a subvector with the
3201  // correct number of elements.
3202  if (MaskNumElts != PaddedMaskNumElts)
3203  Result = DAG.getNode(
3204  ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3205  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3206 
3207  setValue(&I, Result);
3208  return;
3209  }
3210 
3211  if (SrcNumElts > MaskNumElts) {
3212  // Analyze the access pattern of the vector to see if we can extract
3213  // two subvectors and do the shuffle.
3214  int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3215  bool CanExtract = true;
3216  for (int Idx : Mask) {
3217  unsigned Input = 0;
3218  if (Idx < 0)
3219  continue;
3220 
3221  if (Idx >= (int)SrcNumElts) {
3222  Input = 1;
3223  Idx -= SrcNumElts;
3224  }
3225 
3226  // If all the indices come from the same MaskNumElts sized portion of
3227  // the sources we can use extract. Also make sure the extract wouldn't
3228  // extract past the end of the source.
3229  int NewStartIdx = alignDown(Idx, MaskNumElts);
3230  if (NewStartIdx + MaskNumElts > SrcNumElts ||
3231  (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3232  CanExtract = false;
3233  // Make sure we always update StartIdx as we use it to track if all
3234  // elements are undef.
3235  StartIdx[Input] = NewStartIdx;
3236  }
3237 
3238  if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3239  setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3240  return;
3241  }
3242  if (CanExtract) {
3243  // Extract appropriate subvector and generate a vector shuffle
3244  for (unsigned Input = 0; Input < 2; ++Input) {
3245  SDValue &Src = Input == 0 ? Src1 : Src2;
3246  if (StartIdx[Input] < 0)
3247  Src = DAG.getUNDEF(VT);
3248  else {
3249  Src = DAG.getNode(
3250  ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3251  DAG.getConstant(StartIdx[Input], DL,
3252  TLI.getVectorIdxTy(DAG.getDataLayout())));
3253  }
3254  }
3255 
3256  // Calculate new mask.
3257  SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3258  for (int &Idx : MappedOps) {
3259  if (Idx >= (int)SrcNumElts)
3260  Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3261  else if (Idx >= 0)
3262  Idx -= StartIdx[0];
3263  }
3264 
3265  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3266  return;
3267  }
3268  }
3269 
3270  // We can't use either concat vectors or extract subvectors so fall back to
3271  // replacing the shuffle with extract and build vector.
3272  // to insert and build vector.
3273  EVT EltVT = VT.getVectorElementType();
3274  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3276  for (int Idx : Mask) {
3277  SDValue Res;
3278 
3279  if (Idx < 0) {
3280  Res = DAG.getUNDEF(EltVT);
3281  } else {
3282  SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3283  if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3284 
3285  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3286  EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3287  }
3288 
3289  Ops.push_back(Res);
3290  }
3291 
3292  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3293 }
3294 
3295 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3296  ArrayRef<unsigned> Indices;
3297  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3298  Indices = IV->getIndices();
3299  else
3300  Indices = cast<ConstantExpr>(&I)->getIndices();
3301 
3302  const Value *Op0 = I.getOperand(0);
3303  const Value *Op1 = I.getOperand(1);
3304  Type *AggTy = I.getType();
3305  Type *ValTy = Op1->getType();
3306  bool IntoUndef = isa<UndefValue>(Op0);
3307  bool FromUndef = isa<UndefValue>(Op1);
3308 
3309  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3310 
3311  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3312  SmallVector<EVT, 4> AggValueVTs;
3313  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3314  SmallVector<EVT, 4> ValValueVTs;
3315  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3316 
3317  unsigned NumAggValues = AggValueVTs.size();
3318  unsigned NumValValues = ValValueVTs.size();
3319  SmallVector<SDValue, 4> Values(NumAggValues);
3320 
3321  // Ignore an insertvalue that produces an empty object
3322  if (!NumAggValues) {
3323  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3324  return;
3325  }
3326 
3327  SDValue Agg = getValue(Op0);
3328  unsigned i = 0;
3329  // Copy the beginning value(s) from the original aggregate.
3330  for (; i != LinearIndex; ++i)
3331  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3332  SDValue(Agg.getNode(), Agg.getResNo() + i);
3333  // Copy values from the inserted value(s).
3334  if (NumValValues) {
3335  SDValue Val = getValue(Op1);
3336  for (; i != LinearIndex + NumValValues; ++i)
3337  Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3338  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3339  }
3340  // Copy remaining value(s) from the original aggregate.
3341  for (; i != NumAggValues; ++i)
3342  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3343  SDValue(Agg.getNode(), Agg.getResNo() + i);
3344 
3345  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3346  DAG.getVTList(AggValueVTs), Values));
3347 }
3348 
3349 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3350  ArrayRef<unsigned> Indices;
3351  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3352  Indices = EV->getIndices();
3353  else
3354  Indices = cast<ConstantExpr>(&I)->getIndices();
3355 
3356  const Value *Op0 = I.getOperand(0);
3357  Type *AggTy = Op0->getType();
3358  Type *ValTy = I.getType();
3359  bool OutOfUndef = isa<UndefValue>(Op0);
3360 
3361  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3362 
3363  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3364  SmallVector<EVT, 4> ValValueVTs;
3365  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3366 
3367  unsigned NumValValues = ValValueVTs.size();
3368 
3369  // Ignore a extractvalue that produces an empty object
3370  if (!NumValValues) {
3371  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3372  return;
3373  }
3374 
3375  SmallVector<SDValue, 4> Values(NumValValues);
3376 
3377  SDValue Agg = getValue(Op0);
3378  // Copy out the selected value(s).
3379  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3380  Values[i - LinearIndex] =
3381  OutOfUndef ?
3382  DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3383  SDValue(Agg.getNode(), Agg.getResNo() + i);
3384 
3385  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3386  DAG.getVTList(ValValueVTs), Values));
3387 }
3388 
3389 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3390  Value *Op0 = I.getOperand(0);
3391  // Note that the pointer operand may be a vector of pointers. Take the scalar
3392  // element which holds a pointer.
3393  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3394  SDValue N = getValue(Op0);
3395  SDLoc dl = getCurSDLoc();
3396 
3397  // Normalize Vector GEP - all scalar operands should be converted to the
3398  // splat vector.
3399  unsigned VectorWidth = I.getType()->isVectorTy() ?
3400  cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3401 
3402  if (VectorWidth && !N.getValueType().isVector()) {
3403  LLVMContext &Context = *DAG.getContext();
3404  EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3405  N = DAG.getSplatBuildVector(VT, dl, N);
3406  }
3407 
3408  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3409  GTI != E; ++GTI) {
3410  const Value *Idx = GTI.getOperand();
3411  if (StructType *StTy = GTI.getStructTypeOrNull()) {
3412  unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3413  if (Field) {
3414  // N = N + Offset
3415  uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3416 
3417  // In an inbounds GEP with an offset that is nonnegative even when
3418  // interpreted as signed, assume there is no unsigned overflow.
3419  SDNodeFlags Flags;
3420  if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3421  Flags.setNoUnsignedWrap(true);
3422 
3423  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3424  DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3425  }
3426  } else {
3427  unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
3428  MVT IdxTy = MVT::getIntegerVT(IdxSize);
3429  APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3430 
3431  // If this is a scalar constant or a splat vector of constants,
3432  // handle it quickly.
3433  const auto *CI = dyn_cast<ConstantInt>(Idx);
3434  if (!CI && isa<ConstantDataVector>(Idx) &&
3435  cast<ConstantDataVector>(Idx)->getSplatValue())
3436  CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3437 
3438  if (CI) {
3439  if (CI->isZero())
3440  continue;
3441  APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
3442  LLVMContext &Context = *DAG.getContext();
3443  SDValue OffsVal = VectorWidth ?
3444  DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
3445  DAG.getConstant(Offs, dl, IdxTy);
3446 
3447  // In an inbouds GEP with an offset that is nonnegative even when
3448  // interpreted as signed, assume there is no unsigned overflow.
3449  SDNodeFlags Flags;
3450  if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3451  Flags.setNoUnsignedWrap(true);
3452 
3453  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3454  continue;
3455  }
3456 
3457  // N = N + Idx * ElementSize;
3458  SDValue IdxN = getValue(Idx);
3459 
3460  if (!IdxN.getValueType().isVector() && VectorWidth) {
3461  EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3462  IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3463  }
3464 
3465  // If the index is smaller or larger than intptr_t, truncate or extend
3466  // it.
3467  IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3468 
3469  // If this is a multiply by a power of two, turn it into a shl
3470  // immediately. This is a very common case.
3471  if (ElementSize != 1) {
3472  if (ElementSize.isPowerOf2()) {
3473  unsigned Amt = ElementSize.logBase2();
3474  IdxN = DAG.getNode(ISD::SHL, dl,
3475  N.getValueType(), IdxN,
3476  DAG.getConstant(Amt, dl, IdxN.getValueType()));
3477  } else {
3478  SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3479  IdxN = DAG.getNode(ISD::MUL, dl,
3480  N.getValueType(), IdxN, Scale);
3481  }
3482  }
3483 
3484  N = DAG.getNode(ISD::ADD, dl,
3485  N.getValueType(), N, IdxN);
3486  }
3487  }
3488 
3489  setValue(&I, N);
3490 }
3491 
3492 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3493  // If this is a fixed sized alloca in the entry block of the function,
3494  // allocate it statically on the stack.
3495  if (FuncInfo.StaticAllocaMap.count(&I))
3496  return; // getValue will auto-populate this.
3497 
3498  SDLoc dl = getCurSDLoc();
3499  Type *Ty = I.getAllocatedType();
3500  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3501  auto &DL = DAG.getDataLayout();
3502  uint64_t TySize = DL.getTypeAllocSize(Ty);
3503  unsigned Align =
3504  std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3505 
3506  SDValue AllocSize = getValue(I.getArraySize());
3507 
3508  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3509  if (AllocSize.getValueType() != IntPtr)
3510  AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3511 
3512  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3513  AllocSize,
3514  DAG.getConstant(TySize, dl, IntPtr));
3515 
3516  // Handle alignment. If the requested alignment is less than or equal to
3517  // the stack alignment, ignore it. If the size is greater than or equal to
3518  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3519  unsigned StackAlign =
3521  if (Align <= StackAlign)
3522  Align = 0;
3523 
3524  // Round the size of the allocation up to the stack alignment size
3525  // by add SA-1 to the size. This doesn't overflow because we're computing
3526  // an address inside an alloca.
3527  SDNodeFlags Flags;
3528  Flags.setNoUnsignedWrap(true);
3529  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3530  DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3531 
3532  // Mask out the low bits for alignment purposes.
3533  AllocSize =
3534  DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3535  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3536 
3537  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3538  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3539  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3540  setValue(&I, DSA);
3541  DAG.setRoot(DSA.getValue(1));
3542 
3543  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3544 }
3545 
3546 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3547  if (I.isAtomic())
3548  return visitAtomicLoad(I);
3549 
3550  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3551  const Value *SV = I.getOperand(0);
3552  if (TLI.supportSwiftError()) {
3553  // Swifterror values can come from either a function parameter with
3554  // swifterror attribute or an alloca with swifterror attribute.
3555  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3556  if (Arg->hasSwiftErrorAttr())
3557  return visitLoadFromSwiftError(I);
3558  }
3559 
3560  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3561  if (Alloca->isSwiftError())
3562  return visitLoadFromSwiftError(I);
3563  }
3564  }
3565 
3566  SDValue Ptr = getValue(SV);
3567 
3568  Type *Ty = I.getType();
3569 
3570  bool isVolatile = I.isVolatile();
3571  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3572  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3573  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3574  unsigned Alignment = I.getAlignment();
3575 
3576  AAMDNodes AAInfo;
3577  I.getAAMetadata(AAInfo);
3578  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3579 
3582  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3583  unsigned NumValues = ValueVTs.size();
3584  if (NumValues == 0)
3585  return;
3586 
3587  SDValue Root;
3588  bool ConstantMemory = false;
3589  if (isVolatile || NumValues > MaxParallelChains)
3590  // Serialize volatile loads with other side effects.
3591  Root = getRoot();
3592  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3593  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3594  // Do not serialize (non-volatile) loads of constant memory with anything.
3595  Root = DAG.getEntryNode();
3596  ConstantMemory = true;
3597  } else {
3598  // Do not serialize non-volatile loads against each other.
3599  Root = DAG.getRoot();
3600  }
3601 
3602  SDLoc dl = getCurSDLoc();
3603 
3604  if (isVolatile)
3605  Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3606 
3607  // An aggregate load cannot wrap around the address space, so offsets to its
3608  // parts don't wrap either.
3609  SDNodeFlags Flags;
3610  Flags.setNoUnsignedWrap(true);
3611 
3612  SmallVector<SDValue, 4> Values(NumValues);
3613  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3614  EVT PtrVT = Ptr.getValueType();
3615  unsigned ChainI = 0;
3616  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3617  // Serializing loads here may result in excessive register pressure, and
3618  // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3619  // could recover a bit by hoisting nodes upward in the chain by recognizing
3620  // they are side-effect free or do not alias. The optimizer should really
3621  // avoid this case by converting large object/array copies to llvm.memcpy
3622  // (MaxParallelChains should always remain as failsafe).
3623  if (ChainI == MaxParallelChains) {
3624  assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3625  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3626  makeArrayRef(Chains.data(), ChainI));
3627  Root = Chain;
3628  ChainI = 0;
3629  }
3630  SDValue A = DAG.getNode(ISD::ADD, dl,
3631  PtrVT, Ptr,
3632  DAG.getConstant(Offsets[i], dl, PtrVT),
3633  Flags);
3634  auto MMOFlags = MachineMemOperand::MONone;
3635  if (isVolatile)
3636  MMOFlags |= MachineMemOperand::MOVolatile;
3637  if (isNonTemporal)
3639  if (isInvariant)
3640  MMOFlags |= MachineMemOperand::MOInvariant;
3641  if (isDereferenceable)
3643  MMOFlags |= TLI.getMMOFlags(I);
3644 
3645  SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3646  MachinePointerInfo(SV, Offsets[i]), Alignment,
3647  MMOFlags, AAInfo, Ranges);
3648 
3649  Values[i] = L;
3650  Chains[ChainI] = L.getValue(1);
3651  }
3652 
3653  if (!ConstantMemory) {
3654  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3655  makeArrayRef(Chains.data(), ChainI));
3656  if (isVolatile)
3657  DAG.setRoot(Chain);
3658  else
3659  PendingLoads.push_back(Chain);
3660  }
3661 
3662  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3663  DAG.getVTList(ValueVTs), Values));
3664 }
3665 
3666 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3668  "call visitStoreToSwiftError when backend supports swifterror");
3669 
3672  const Value *SrcV = I.getOperand(0);
3674  SrcV->getType(), ValueVTs, &Offsets);
3675  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3676  "expect a single EVT for swifterror");
3677 
3678  SDValue Src = getValue(SrcV);
3679  // Create a virtual register, then update the virtual register.
3680  unsigned VReg; bool CreatedVReg;
3681  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3682  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3683  // Chain can be getRoot or getControlRoot.
3684  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3685  SDValue(Src.getNode(), Src.getResNo()));
3686  DAG.setRoot(CopyNode);
3687  if (CreatedVReg)
3688  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3689 }
3690 
3691 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3693  "call visitLoadFromSwiftError when backend supports swifterror");
3694 
3695  assert(!I.isVolatile() &&
3696  I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3698  "Support volatile, non temporal, invariant for load_from_swift_error");
3699 
3700  const Value *SV = I.getOperand(0);
3701  Type *Ty = I.getType();
3702  AAMDNodes AAInfo;
3703  I.getAAMetadata(AAInfo);
3704  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3705  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3706  "load_from_swift_error should not be constant memory");
3707 
3711  ValueVTs, &Offsets);
3712  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3713  "expect a single EVT for swifterror");
3714 
3715  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3716  SDValue L = DAG.getCopyFromReg(
3717  getRoot(), getCurSDLoc(),
3718  FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3719  ValueVTs[0]);
3720 
3721  setValue(&I, L);
3722 }
3723 
3724 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3725  if (I.isAtomic())
3726  return visitAtomicStore(I);
3727 
3728  const Value *SrcV = I.getOperand(0);
3729  const Value *PtrV = I.getOperand(1);
3730 
3731  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3732  if (TLI.supportSwiftError()) {
3733  // Swifterror values can come from either a function parameter with
3734  // swifterror attribute or an alloca with swifterror attribute.
3735  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3736  if (Arg->hasSwiftErrorAttr())
3737  return visitStoreToSwiftError(I);
3738  }
3739 
3740  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3741  if (Alloca->isSwiftError())
3742  return visitStoreToSwiftError(I);
3743  }
3744  }
3745 
3749  SrcV->getType(), ValueVTs, &Offsets);
3750  unsigned NumValues = ValueVTs.size();
3751  if (NumValues == 0)
3752  return;
3753 
3754  // Get the lowered operands. Note that we do this after
3755  // checking if NumResults is zero, because with zero results
3756  // the operands won't have values in the map.
3757  SDValue Src = getValue(SrcV);
3758  SDValue Ptr = getValue(PtrV);
3759 
3760  SDValue Root = getRoot();
3761  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3762  SDLoc dl = getCurSDLoc();
3763  EVT PtrVT = Ptr.getValueType();
3764  unsigned Alignment = I.getAlignment();
3765  AAMDNodes AAInfo;
3766  I.getAAMetadata(AAInfo);
3767 
3768  auto MMOFlags = MachineMemOperand::MONone;
3769  if (I.isVolatile())
3770  MMOFlags |= MachineMemOperand::MOVolatile;
3771  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3773  MMOFlags |= TLI.getMMOFlags(I);
3774 
3775  // An aggregate load cannot wrap around the address space, so offsets to its
3776  // parts don't wrap either.
3777  SDNodeFlags Flags;
3778  Flags.setNoUnsignedWrap(true);
3779 
3780  unsigned ChainI = 0;
3781  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3782  // See visitLoad comments.
3783  if (ChainI == MaxParallelChains) {
3784  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3785  makeArrayRef(Chains.data(), ChainI));
3786  Root = Chain;
3787  ChainI = 0;
3788  }
3789  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3790  DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3791  SDValue St = DAG.getStore(
3792  Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3793  MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3794  Chains[ChainI] = St;
3795  }
3796 
3797  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3798  makeArrayRef(Chains.data(), ChainI));
3799  DAG.setRoot(StoreNode);
3800 }
3801 
3802 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3803  bool IsCompressing) {
3804  SDLoc sdl = getCurSDLoc();
3805 
3806  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3807  unsigned& Alignment) {
3808  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3809  Src0 = I.getArgOperand(0);
3810  Ptr = I.getArgOperand(1);
3811  Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3812  Mask = I.getArgOperand(3);
3813  };
3814  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3815  unsigned& Alignment) {
3816  // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3817  Src0 = I.getArgOperand(0);
3818  Ptr = I.getArgOperand(1);
3819  Mask = I.getArgOperand(2);
3820  Alignment = 0;
3821  };
3822 
3823  Value *PtrOperand, *MaskOperand, *Src0Operand;
3824  unsigned Alignment;
3825  if (IsCompressing)
3826  getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3827  else
3828  getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3829 
3830  SDValue Ptr = getValue(PtrOperand);
3831  SDValue Src0 = getValue(Src0Operand);
3832  SDValue Mask = getValue(MaskOperand);
3833 
3834  EVT VT = Src0.getValueType();
3835  if (!Alignment)
3836  Alignment = DAG.getEVTAlignment(VT);
3837 
3838  AAMDNodes AAInfo;
3839  I.getAAMetadata(AAInfo);
3840 
3841  MachineMemOperand *MMO =
3842  DAG.getMachineFunction().
3843  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3845  Alignment, AAInfo);
3846  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3847  MMO, false /* Truncating */,
3848  IsCompressing);
3849  DAG.setRoot(StoreNode);
3850  setValue(&I, StoreNode);
3851 }
3852 
3853 // Get a uniform base for the Gather/Scatter intrinsic.
3854 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3855 // We try to represent it as a base pointer + vector of indices.
3856 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3857 // The first operand of the GEP may be a single pointer or a vector of pointers
3858 // Example:
3859 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3860 // or
3861 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3862 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3863 //
3864 // When the first GEP operand is a single pointer - it is the uniform base we
3865 // are looking for. If first operand of the GEP is a splat vector - we
3866 // extract the splat value and use it as a uniform base.
3867 // In all other cases the function returns 'false'.
3868 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3869  SDValue &Scale, SelectionDAGBuilder* SDB) {
3870  SelectionDAG& DAG = SDB->DAG;
3871  LLVMContext &Context = *DAG.getContext();
3872 
3873  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3875  if (!GEP)
3876  return false;
3877 
3878  const Value *GEPPtr = GEP->getPointerOperand();
3879  if (!GEPPtr->getType()->isVectorTy())
3880  Ptr = GEPPtr;
3881  else if (!(Ptr = getSplatValue(GEPPtr)))
3882  return false;
3883 
3884  unsigned FinalIndex = GEP->getNumOperands() - 1;
3885  Value *IndexVal = GEP->getOperand(FinalIndex);
3886 
3887  // Ensure all the other indices are 0.
3888  for (unsigned i = 1; i < FinalIndex; ++i) {
3889  auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
3890  if (!C || !C->isZero())
3891  return false;
3892  }
3893 
3894  // The operands of the GEP may be defined in another basic block.
3895  // In this case we'll not find nodes for the operands.
3896  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3897  return false;
3898 
3899  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3900  const DataLayout &DL = DAG.getDataLayout();
3901  Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
3902  SDB->getCurSDLoc(), TLI.getPointerTy(DL));
3903  Base = SDB->getValue(Ptr);
3904  Index = SDB->getValue(IndexVal);
3905 
3906  if (!Index.getValueType().isVector()) {
3907  unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3908  EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3909  Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3910  }
3911  return true;
3912 }
3913 
3914 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3915  SDLoc sdl = getCurSDLoc();
3916 
3917  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3918  const Value *Ptr = I.getArgOperand(1);
3919  SDValue Src0 = getValue(I.getArgOperand(0));
3920  SDValue Mask = getValue(I.getArgOperand(3));
3921  EVT VT = Src0.getValueType();
3922  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3923  if (!Alignment)
3924  Alignment = DAG.getEVTAlignment(VT);
3925  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3926 
3927  AAMDNodes AAInfo;
3928  I.getAAMetadata(AAInfo);
3929 
3930  SDValue Base;
3931  SDValue Index;
3932  SDValue Scale;
3933  const Value *BasePtr = Ptr;
3934  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
3935 
3936  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
3938  getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3939  MachineMemOperand::MOStore, VT.getStoreSize(),
3940  Alignment, AAInfo);
3941  if (!UniformBase) {
3942  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3943  Index = getValue(Ptr);
3944  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3945  }
3946  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
3947  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3948  Ops, MMO);
3949  DAG.setRoot(Scatter);
3950  setValue(&I, Scatter);
3951 }
3952 
3953 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
3954  SDLoc sdl = getCurSDLoc();
3955 
3956  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3957  unsigned& Alignment) {
3958  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
3959  Ptr = I.getArgOperand(0);
3960  Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3961  Mask = I.getArgOperand(2);
3962  Src0 = I.getArgOperand(3);
3963  };
3964  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3965  unsigned& Alignment) {
3966  // @llvm.masked.expandload.*(Ptr, Mask, Src0)
3967  Ptr = I.getArgOperand(0);
3968  Alignment = 0;
3969  Mask = I.getArgOperand(1);
3970  Src0 = I.getArgOperand(2);
3971  };
3972 
3973  Value *PtrOperand, *MaskOperand, *Src0Operand;
3974  unsigned Alignment;
3975  if (IsExpanding)
3976  getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3977  else
3978  getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3979 
3980  SDValue Ptr = getValue(PtrOperand);
3981  SDValue Src0 = getValue(Src0Operand);
3982  SDValue Mask = getValue(MaskOperand);
3983 
3984  EVT VT = Src0.getValueType();
3985  if (!Alignment)
3986  Alignment = DAG.getEVTAlignment(VT);
3987 
3988  AAMDNodes AAInfo;
3989  I.getAAMetadata(AAInfo);
3990  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3991 
3992  // Do not serialize masked loads of constant memory with anything.
3993  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
3994  PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
3995  SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
3996 
3997  MachineMemOperand *MMO =
3998  DAG.getMachineFunction().
3999  getMachineMemOperand(MachinePointerInfo(PtrOperand),
4001  Alignment, AAInfo, Ranges);
4002 
4003  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
4004  ISD::NON_EXTLOAD, IsExpanding);
4005  if (AddToChain) {
4006  SDValue OutChain = Load.getValue(1);
4007  DAG.setRoot(OutChain);
4008  }
4009  setValue(&I, Load);
4010 }
4011 
4012 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4013  SDLoc sdl = getCurSDLoc();
4014 
4015  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4016  const Value *Ptr = I.getArgOperand(0);
4017  SDValue Src0 = getValue(I.getArgOperand(3));
4018  SDValue Mask = getValue(I.getArgOperand(2));
4019 
4020  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4021  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4022  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4023  if (!Alignment)
4024  Alignment = DAG.getEVTAlignment(VT);
4025 
4026  AAMDNodes AAInfo;
4027  I.getAAMetadata(AAInfo);
4028  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4029 
4030  SDValue Root = DAG.getRoot();
4031  SDValue Base;
4032  SDValue Index;
4033  SDValue Scale;
4034  const Value *BasePtr = Ptr;
4035  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4036  bool ConstantMemory = false;
4037  if (UniformBase &&
4038  AA && AA->pointsToConstantMemory(MemoryLocation(
4039  BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4040  AAInfo))) {
4041  // Do not serialize (non-volatile) loads of constant memory with anything.
4042  Root = DAG.getEntryNode();
4043  ConstantMemory = true;
4044  }
4045 
4046  MachineMemOperand *MMO =
4047  DAG.getMachineFunction().
4048  getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
4050  Alignment, AAInfo, Ranges);
4051 
4052  if (!UniformBase) {
4053  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4054  Index = getValue(Ptr);
4055  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4056  }
4057  SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4058  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4059  Ops, MMO);
4060 
4061  SDValue OutChain = Gather.getValue(1);
4062  if (!ConstantMemory)
4063  PendingLoads.push_back(OutChain);
4064  setValue(&I, Gather);
4065 }
4066 
4067 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4068  SDLoc dl = getCurSDLoc();
4069  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4070  AtomicOrdering FailureOrder = I.getFailureOrdering();
4071  SyncScope::ID SSID = I.getSyncScopeID();
4072 
4073  SDValue InChain = getRoot();
4074 
4075  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4076  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4077  SDValue L = DAG.getAtomicCmpSwap(
4078  ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4079  getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4081  /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4082 
4083  SDValue OutChain = L.getValue(2);
4084 
4085  setValue(&I, L);
4086  DAG.setRoot(OutChain);
4087 }
4088 
4089 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4090  SDLoc dl = getCurSDLoc();
4091  ISD::NodeType NT;
4092  switch (I.getOperation()) {
4093  default: llvm_unreachable("Unknown atomicrmw operation");
4094  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4095  case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4096  case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4097  case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4098  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4099  case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4100  case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4101  case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4102  case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4103  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4104  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4105  }
4106  AtomicOrdering Order = I.getOrdering();
4107  SyncScope::ID SSID = I.getSyncScopeID();
4108 
4109  SDValue InChain = getRoot();
4110 
4111  SDValue L =
4112  DAG.getAtomic(NT, dl,
4113  getValue(I.getValOperand()).getSimpleValueType(),
4114  InChain,
4115  getValue(I.getPointerOperand()),
4116  getValue(I.getValOperand()),
4117  I.getPointerOperand(),
4118  /* Alignment=*/ 0, Order, SSID);
4119 
4120  SDValue OutChain = L.getValue(1);
4121 
4122  setValue(&I, L);
4123  DAG.setRoot(OutChain);
4124 }
4125 
4126 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4127  SDLoc dl = getCurSDLoc();
4128  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4129  SDValue Ops[3];
4130  Ops[0] = getRoot();
4131  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4132  TLI.getFenceOperandTy(DAG.getDataLayout()));
4133  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4134  TLI.getFenceOperandTy(DAG.getDataLayout()));
4135  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4136 }
4137 
4138 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4139  SDLoc dl = getCurSDLoc();
4140  AtomicOrdering Order = I.getOrdering();
4141  SyncScope::ID SSID = I.getSyncScopeID();
4142 
4143  SDValue InChain = getRoot();
4144 
4145  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4146  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4147 
4148  if (!TLI.supportsUnalignedAtomics() &&
4149  I.getAlignment() < VT.getStoreSize())
4150  report_fatal_error("Cannot generate unaligned atomic load");
4151 
4152  MachineMemOperand *MMO =
4153  DAG.getMachineFunction().
4154  getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4157  VT.getStoreSize(),
4158  I.getAlignment() ? I.getAlignment() :
4159  DAG.getEVTAlignment(VT),
4160  AAMDNodes(), nullptr, SSID, Order);
4161 
4162  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4163  SDValue L =
4164  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4165  getValue(I.getPointerOperand()), MMO);
4166 
4167  SDValue OutChain = L.getValue(1);
4168 
4169  setValue(&I, L);
4170  DAG.setRoot(OutChain);
4171 }
4172 
4173 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4174  SDLoc dl = getCurSDLoc();
4175 
4176  AtomicOrdering Order = I.getOrdering();
4177  SyncScope::ID SSID = I.getSyncScopeID();
4178 
4179  SDValue InChain = getRoot();
4180 
4181  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4182  EVT VT =
4184 
4185  if (I.getAlignment() < VT.getStoreSize())
4186  report_fatal_error("Cannot generate unaligned atomic store");
4187 
4188  SDValue OutChain =
4189  DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4190  InChain,
4191  getValue(I.getPointerOperand()),
4192  getValue(I.getValueOperand()),
4194  Order, SSID);
4195 
4196  DAG.setRoot(OutChain);
4197 }
4198 
4199 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4200 /// node.
4201 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4202  unsigned Intrinsic) {
4203  // Ignore the callsite's attributes. A specific call site may be marked with
4204  // readnone, but the lowering code will expect the chain based on the
4205  // definition.
4206  const Function *F = I.getCalledFunction();
4207  bool HasChain = !F->doesNotAccessMemory();
4208  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4209 
4210  // Build the operand list.
4212  if (HasChain) { // If this intrinsic has side-effects, chainify it.
4213  if (OnlyLoad) {
4214  // We don't need to serialize loads against other loads.
4215  Ops.push_back(DAG.getRoot());
4216  } else {
4217  Ops.push_back(getRoot());
4218  }
4219  }
4220 
4221  // Info is set by getTgtMemInstrinsic
4222  TargetLowering::IntrinsicInfo Info;
4223  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4224  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4225  DAG.getMachineFunction(),
4226  Intrinsic);
4227 
4228  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4229  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4230  Info.opc == ISD::INTRINSIC_W_CHAIN)
4231  Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4232  TLI.getPointerTy(DAG.getDataLayout())));
4233 
4234  // Add all operands of the call to the operand list.
4235  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4236  SDValue Op = getValue(I.getArgOperand(i));
4237  Ops.push_back(Op);
4238  }
4239 
4241  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4242 
4243  if (HasChain)
4244  ValueVTs.push_back(MVT::Other);
4245 
4246  SDVTList VTs = DAG.getVTList(ValueVTs);
4247 
4248  // Create the node.
4249  SDValue Result;
4250  if (IsTgtIntrinsic) {
4251  // This is target intrinsic that touches memory
4252  Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
4253  Ops, Info.memVT,
4254  MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
4255  Info.flags, Info.size);
4256  } else if (!HasChain) {
4257  Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4258  } else if (!I.getType()->isVoidTy()) {
4259  Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4260  } else {
4261  Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4262  }
4263 
4264  if (HasChain) {
4265  SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4266  if (OnlyLoad)
4267  PendingLoads.push_back(Chain);
4268  else
4269  DAG.setRoot(Chain);
4270  }
4271 
4272  if (!I.getType()->isVoidTy()) {
4273  if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4274  EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4275  Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4276  } else
4277  Result = lowerRangeToAssertZExt(DAG, I, Result);
4278 
4279  setValue(&I, Result);
4280  }
4281 }
4282 
4283 /// GetSignificand - Get the significand and build it into a floating-point
4284 /// number with exponent of 1:
4285 ///
4286 /// Op = (Op & 0x007fffff) | 0x3f800000;
4287 ///
4288 /// where Op is the hexadecimal representation of floating point value.
4290  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4291  DAG.getConstant(0x007fffff, dl, MVT::i32));
4292  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4293  DAG.getConstant(0x3f800000, dl, MVT::i32));
4294  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4295 }
4296 
4297 /// GetExponent - Get the exponent:
4298 ///
4299 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4300 ///
4301 /// where Op is the hexadecimal representation of floating point value.
4303  const TargetLowering &TLI, const SDLoc &dl) {
4304  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4305  DAG.getConstant(0x7f800000, dl, MVT::i32));
4306  SDValue t1 = DAG.getNode(
4307  ISD::SRL, dl, MVT::i32, t0,
4308  DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4309  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4310  DAG.getConstant(127, dl, MVT::i32));
4311  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4312 }
4313 
4314 /// getF32Constant - Get 32-bit floating point constant.
4315 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4316  const SDLoc &dl) {
4317  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4318  MVT::f32);
4319 }
4320 
4322  SelectionDAG &DAG) {
4323  // TODO: What fast-math-flags should be set on the floating-point nodes?
4324 
4325  // IntegerPartOfX = ((int32_t)(t0);
4326  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4327 
4328  // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4329  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4330  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4331 
4332  // IntegerPartOfX <<= 23;
4333  IntegerPartOfX = DAG.getNode(
4334  ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4336  DAG.getDataLayout())));
4337 
4338  SDValue TwoToFractionalPartOfX;
4339  if (LimitFloatPrecision <= 6) {
4340  // For floating-point precision of 6:
4341  //
4342  // TwoToFractionalPartOfX =
4343  // 0.997535578f +
4344  // (0.735607626f + 0.252464424f * x) * x;
4345  //
4346  // error 0.0144103317, which is 6 bits
4347  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4348  getF32Constant(DAG, 0x3e814304, dl));
4349  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4350  getF32Constant(DAG, 0x3f3c50c8, dl));
4351  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4352  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4353  getF32Constant(DAG, 0x3f7f5e7e, dl));
4354  } else if (LimitFloatPrecision <= 12) {
4355  // For floating-point precision of 12:
4356  //
4357  // TwoToFractionalPartOfX =
4358  // 0.999892986f +
4359  // (0.696457318f +
4360  // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4361  //
4362  // error 0.000107046256, which is 13 to 14 bits
4363  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4364  getF32Constant(DAG, 0x3da235e3, dl));
4365  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4366  getF32Constant(DAG, 0x3e65b8f3, dl));
4367  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4368  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4369  getF32Constant(DAG, 0x3f324b07, dl));
4370  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4371  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4372  getF32Constant(DAG, 0x3f7ff8fd, dl));
4373  } else { // LimitFloatPrecision <= 18
4374  // For floating-point precision of 18:
4375  //
4376  // TwoToFractionalPartOfX =
4377  // 0.999999982f +
4378  // (0.693148872f +
4379  // (0.240227044f +
4380  // (0.554906021e-1f +
4381  // (0.961591928e-2f +
4382  // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4383  // error 2.47208000*10^(-7), which is better than 18 bits
4384  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4385  getF32Constant(DAG, 0x3924b03e, dl));
4386  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4387  getF32Constant(DAG, 0x3ab24b87, dl));
4388  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4389  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4390  getF32Constant(DAG, 0x3c1d8c17, dl));
4391  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4392  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4393  getF32Constant(DAG, 0x3d634a1d, dl));
4394  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4395  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4396  getF32Constant(DAG, 0x3e75fe14, dl));
4397  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4398  SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4399  getF32Constant(DAG, 0x3f317234, dl));
4400  SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4401  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4402  getF32Constant(DAG, 0x3f800000, dl));
4403  }
4404 
4405  // Add the exponent into the result in integer domain.
4406  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4407  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4408  DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4409 }
4410 
4411 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4412 /// limited-precision mode.
4413 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4414  const TargetLowering &TLI) {
4415  if (Op.getValueType() == MVT::f32 &&
4416  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4417 
4418  // Put the exponent in the right bit position for later addition to the
4419  // final result:
4420  //
4421  // #define LOG2OFe 1.4426950f
4422  // t0 = Op * LOG2OFe
4423 
4424  // TODO: What fast-math-flags should be set here?
4425  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4426  getF32Constant(DAG, 0x3fb8aa3b, dl));
4427  return getLimitedPrecisionExp2(t0, dl, DAG);
4428  }
4429 
4430  // No special expansion.
4431  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4432 }
4433 
4434 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4435 /// limited-precision mode.
4436 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4437  const TargetLowering &TLI) {
4438  // TODO: What fast-math-flags should be set on the floating-point nodes?
4439 
4440  if (Op.getValueType() == MVT::f32 &&
4441  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4442  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4443 
4444  // Scale the exponent by log(2) [0.69314718f].
4445  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4446  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4447  getF32Constant(DAG, 0x3f317218, dl));
4448 
4449  // Get the significand and build it into a floating-point number with
4450  // exponent of 1.
4451  SDValue X = GetSignificand(DAG, Op1, dl);
4452 
4453  SDValue LogOfMantissa;
4454  if (LimitFloatPrecision <= 6) {
4455  // For floating-point precision of 6:
4456  //
4457  // LogofMantissa =
4458  // -1.1609546f +
4459  // (1.4034025f - 0.23903021f * x) * x;
4460  //
4461  // error 0.0034276066, which is better than 8 bits
4462  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4463  getF32Constant(DAG, 0xbe74c456, dl));
4464  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4465  getF32Constant(DAG, 0x3fb3a2b1, dl));
4466  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4467  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4468  getF32Constant(DAG, 0x3f949a29, dl));
4469  } else if (LimitFloatPrecision <= 12) {
4470  // For floating-point precision of 12:
4471  //
4472  // LogOfMantissa =
4473  // -1.7417939f +
4474  // (2.8212026f +
4475  // (-1.4699568f +
4476  // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4477  //
4478  // error 0.000061011436, which is 14 bits
4479  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4480  getF32Constant(DAG, 0xbd67b6d6, dl));
4481  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4482  getF32Constant(DAG, 0x3ee4f4b8, dl));
4483  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4484  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4485  getF32Constant(DAG, 0x3fbc278b, dl));
4486  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4487  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4488  getF32Constant(DAG, 0x40348e95, dl));
4489  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4490  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4491  getF32Constant(DAG, 0x3fdef31a, dl));
4492  } else { // LimitFloatPrecision <= 18
4493  // For floating-point precision of 18:
4494  //
4495  // LogOfMantissa =
4496  // -2.1072184f +
4497  // (4.2372794f +
4498  // (-3.7029485f +
4499  // (2.2781945f +
4500  // (-0.87823314f +
4501  // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4502  //
4503  // error 0.0000023660568, which is better than 18 bits
4504  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4505  getF32Constant(DAG, 0xbc91e5ac, dl));
4506  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4507  getF32Constant(DAG, 0x3e4350aa, dl));
4508  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4509  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4510  getF32Constant(DAG, 0x3f60d3e3, dl));
4511  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4512  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4513  getF32Constant(DAG, 0x4011cdf0, dl));
4514  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4515  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4516  getF32Constant(DAG, 0x406cfd1c, dl));
4517  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4518  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4519  getF32Constant(DAG, 0x408797cb, dl));
4520  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4521  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4522  getF32Constant(DAG, 0x4006dcab, dl));
4523  }
4524 
4525  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4526  }
4527 
4528  // No special expansion.
4529  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4530 }
4531 
4532 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4533 /// limited-precision mode.
4534 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4535  const TargetLowering &TLI) {
4536  // TODO: What fast-math-flags should be set on the floating-point nodes?
4537 
4538  if (Op.getValueType() == MVT::f32 &&
4539  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4540  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4541 
4542  // Get the exponent.
4543  SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4544 
4545  // Get the significand and build it into a floating-point number with
4546  // exponent of 1.
4547  SDValue X = GetSignificand(DAG, Op1, dl);
4548 
4549  // Different possible minimax approximations of significand in
4550  // floating-point for various degrees of accuracy over [1,2].
4551  SDValue Log2ofMantissa;
4552  if (LimitFloatPrecision <= 6) {
4553  // For floating-point precision of 6:
4554  //
4555  // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4556  //
4557  // error 0.0049451742, which is more than 7 bits
4558  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4559  getF32Constant(DAG, 0xbeb08fe0, dl));
4560  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4561  getF32Constant(DAG, 0x40019463, dl));
4562  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4563  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4564  getF32Constant(DAG, 0x3fd6633d, dl));
4565  } else if (LimitFloatPrecision <= 12) {
4566  // For floating-point precision of 12:
4567  //
4568  // Log2ofMantissa =
4569  // -2.51285454f +
4570  // (4.07009056f +
4571  // (-2.12067489f +
4572  // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4573  //
4574  // error 0.0000876136000, which is better than 13 bits
4575  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4576  getF32Constant(DAG, 0xbda7262e, dl));
4577  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4578  getF32Constant(DAG, 0x3f25280b, dl));
4579  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4580  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4581  getF32Constant(DAG, 0x4007b923, dl));
4582  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4583  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4584  getF32Constant(DAG, 0x40823e2f, dl));
4585  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4586  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4587  getF32Constant(DAG, 0x4020d29c, dl));
4588  } else { // LimitFloatPrecision <= 18
4589  // For floating-point precision of 18:
4590  //
4591  // Log2ofMantissa =
4592  // -3.0400495f +
4593  // (6.1129976f +
4594  // (-5.3420409f +
4595  // (3.2865683f +
4596  // (-1.2669343f +
4597  // (0.27515199f -
4598  // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4599  //
4600  // error 0.0000018516, which is better than 18 bits
4601  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4602  getF32Constant(DAG, 0xbcd2769e, dl));
4603  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4604  getF32Constant(DAG, 0x3e8ce0b9, dl));
4605  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4606  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4607  getF32Constant(DAG, 0x3fa22ae7, dl));
4608  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4609  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4610  getF32Constant(DAG, 0x40525723, dl));
4611  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4612  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4613  getF32Constant(DAG, 0x40aaf200, dl));
4614  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4615  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4616  getF32Constant(DAG, 0x40c39dad, dl));
4617  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4618  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4619  getF32Constant(DAG, 0x4042902c, dl));
4620  }
4621 
4622  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4623  }
4624 
4625  // No special expansion.
4626  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4627 }
4628 
4629 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4630 /// limited-precision mode.
4632  const TargetLowering &TLI) {
4633  // TODO: What fast-math-flags should be set on the floating-point nodes?
4634 
4635  if (Op.getValueType() == MVT::f32 &&
4636  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4637  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4638 
4639  // Scale the exponent by log10(2) [0.30102999f].
4640  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4641  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4642  getF32Constant(DAG, 0x3e9a209a, dl));
4643 
4644  // Get the significand and build it into a floating-point number with
4645  // exponent of 1.
4646  SDValue X = GetSignificand(DAG, Op1, dl);
4647 
4648  SDValue Log10ofMantissa;
4649  if (LimitFloatPrecision <= 6) {
4650  // For floating-point precision of 6:
4651  //
4652  // Log10ofMantissa =
4653  // -0.50419619f +
4654  // (0.60948995f - 0.10380950f * x) * x;
4655  //
4656  // error 0.0014886165, which is 6 bits
4657  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4658  getF32Constant(DAG, 0xbdd49a13, dl));
4659  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4660  getF32Constant(DAG, 0x3f1c0789, dl));
4661  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4662  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4663  getF32Constant(DAG, 0x3f011300, dl));
4664  } else if (LimitFloatPrecision <= 12) {
4665  // For floating-point precision of 12:
4666  //
4667  // Log10ofMantissa =
4668  // -0.64831180f +
4669  // (0.91751397f +
4670  // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4671  //
4672  // error 0.00019228036, which is better than 12 bits
4673  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4674  getF32Constant(DAG, 0x3d431f31, dl));
4675  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4676  getF32Constant(DAG, 0x3ea21fb2, dl));
4677  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4678  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4679  getF32Constant(DAG, 0x3f6ae232, dl));
4680  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4681  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4682  getF32Constant(DAG, 0x3f25f7c3, dl));
4683  } else { // LimitFloatPrecision <= 18
4684  // For floating-point precision of 18:
4685  //
4686  // Log10ofMantissa =
4687  // -0.84299375f +
4688  // (1.5327582f +
4689  // (-1.0688956f +
4690  // (0.49102474f +
4691  // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4692  //
4693  // error 0.0000037995730, which is better than 18 bits
4694  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4695  getF32Constant(DAG, 0x3c5d51ce, dl));
4696  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4697  getF32Constant(DAG, 0x3e00685a, dl));
4698  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4699  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4700  getF32Constant(DAG, 0x3efb6798, dl));
4701  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4702  SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4703  getF32Constant(DAG, 0x3f88d192, dl));
4704  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4705  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4706  getF32Constant(DAG, 0x3fc4316c, dl));
4707  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4708  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4709  getF32Constant(DAG, 0x3f57ce70, dl));
4710  }
4711 
4712  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4713  }
4714 
4715  // No special expansion.
4716  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4717 }
4718 
4719 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4720 /// limited-precision mode.
4721 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4722  const TargetLowering &TLI) {
4723  if (Op.getValueType() == MVT::f32 &&
4724  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4725  return getLimitedPrecisionExp2(Op, dl, DAG);
4726 
4727  // No special expansion.
4728  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4729 }
4730 
4731 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4732 /// limited-precision mode with x == 10.0f.
4733 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4734  SelectionDAG &DAG, const TargetLowering &TLI) {
4735  bool IsExp10 = false;
4736  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4737  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4738  if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4739  APFloat Ten(10.0f);
4740  IsExp10 = LHSC->isExactlyValue(Ten);
4741  }
4742  }
4743 
4744  // TODO: What fast-math-flags should be set on the FMUL node?
4745  if (IsExp10) {
4746  // Put the exponent in the right bit position for later addition to the
4747  // final result:
4748  //
4749  // #define LOG2OF10 3.3219281f
4750  // t0 = Op * LOG2OF10;
4751  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4752  getF32Constant(DAG, 0x40549a78, dl));
4753  return getLimitedPrecisionExp2(t0, dl, DAG);
4754  }
4755 
4756  // No special expansion.
4757  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4758 }
4759 
4760 /// ExpandPowI - Expand a llvm.powi intrinsic.
4761 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4762  SelectionDAG &DAG) {
4763  // If RHS is a constant, we can expand this out to a multiplication tree,
4764  // otherwise we end up lowering to a call to __powidf2 (for example). When
4765  // optimizing for size, we only want to do this if the expansion would produce
4766  // a small number of multiplies, otherwise we do the full expansion.
4767  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4768  // Get the exponent as a positive value.
4769  unsigned Val = RHSC->getSExtValue();
4770  if ((int)Val < 0) Val = -Val;
4771 
4772  // powi(x, 0) -> 1.0
4773  if (Val == 0)
4774  return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4775 
4776  const Function &F = DAG.getMachineFunction().getFunction();
4777  if (!F.optForSize() ||
4778  // If optimizing for size, don't insert too many multiplies.
4779  // This inserts up to 5 multiplies.
4780  countPopulation(Val) + Log2_32(Val) < 7) {
4781  // We use the simple binary decomposition method to generate the multiply
4782  // sequence. There are more optimal ways to do this (for example,
4783  // powi(x,15) generates one more multiply than it should), but this has
4784  // the benefit of being both really simple and much better than a libcall.
4785  SDValue Res; // Logically starts equal to 1.0
4786  SDValue CurSquare = LHS;
4787  // TODO: Intrinsics should have fast-math-flags that propagate to these
4788  // nodes.
4789  while (Val) {
4790  if (Val & 1) {
4791  if (Res.getNode())
4792  Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4793  else
4794  Res = CurSquare; // 1.0*CurSquare.
4795  }
4796 
4797  CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4798  CurSquare, CurSquare);
4799  Val >>= 1;
4800  }
4801 
4802  // If the original was negative, invert the result, producing 1/(x*x*x).
4803  if (RHSC->getSExtValue() < 0)
4804  Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4805  DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4806  return Res;
4807  }
4808  }
4809 
4810  // Otherwise, expand to a libcall.
4811  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4812 }
4813 
4814 // getUnderlyingArgReg - Find underlying register used for a truncated or
4815 // bitcasted argument.
4816 static unsigned getUnderlyingArgReg(const SDValue &N) {
4817  switch (N.getOpcode()) {
4818  case ISD::CopyFromReg:
4819  return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4820  case ISD::BITCAST:
4821  case ISD::AssertZext:
4822  case ISD::AssertSext:
4823  case ISD::TRUNCATE:
4824  return getUnderlyingArgReg(N.getOperand(0));
4825  default:
4826  return 0;
4827  }
4828 }
4829 
4830 /// If the DbgValueInst is a dbg_value of a function argument, create the
4831 /// corresponding DBG_VALUE machine instruction for it now. At the end of
4832 /// instruction selection, they will be inserted to the entry BB.
4833 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4834  const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4835  DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4836  const Argument *Arg = dyn_cast<Argument>(V);
4837  if (!Arg)
4838  return false;
4839 
4840  MachineFunction &MF = DAG.getMachineFunction();
4841  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4842 
4843  bool IsIndirect = false;
4845  // Some arguments' frame index is recorded during argument lowering.
4846  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4847  if (FI != std::numeric_limits<int>::max())
4848  Op = MachineOperand::CreateFI(FI);
4849 
4850  if (!Op && N.getNode()) {
4851  unsigned Reg = getUnderlyingArgReg(N);
4852  if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4853  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4854  unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4855  if (PR)
4856  Reg = PR;
4857  }
4858  if (Reg) {
4859  Op = MachineOperand::CreateReg(Reg, false);
4860  IsIndirect = IsDbgDeclare;
4861  }
4862  }
4863 
4864  if (!Op && N.getNode())
4865  // Check if frame index is available.
4866  if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4867  if (FrameIndexSDNode *FINode =
4868  dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4869  Op = MachineOperand::CreateFI(FINode->getIndex());
4870 
4871  if (!Op) {
4872  // Check if ValueMap has reg number.
4873  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4874  if (VMI != FuncInfo.ValueMap.end()) {
4875  const auto &TLI = DAG.getTargetLoweringInfo();
4876  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4877  V->getType(), isABIRegCopy(V));
4878  unsigned NumRegs =
4879  std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
4880  if (NumRegs > 1) {
4881  unsigned I = 0;
4882  unsigned Offset = 0;
4883  auto RegisterVT = RFV.RegVTs.begin();
4884  for (auto RegCount : RFV.RegCount) {
4885  unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
4886  for (unsigned E = I + RegCount; I != E; ++I) {
4887  // The vregs are guaranteed to be allocated in sequence.
4888  Op = MachineOperand::CreateReg(VMI->second + I, false);
4889  auto FragmentExpr = DIExpression::createFragmentExpression(
4890  Expr, Offset, RegisterSize);
4891  if (!FragmentExpr)
4892  continue;
4893  FuncInfo.ArgDbgValues.push_back(
4894  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4895  Op->getReg(), Variable, *FragmentExpr));
4896  Offset += RegisterSize;
4897  }
4898  }
4899  return true;
4900  }
4901  Op = MachineOperand::CreateReg(VMI->second, false);
4902  IsIndirect = IsDbgDeclare;
4903  }
4904  }
4905 
4906  if (!Op)
4907  return false;
4908 
4909  assert(Variable->isValidLocationForIntrinsic(DL) &&
4910  "Expected inlined-at fields to agree");
4911  if (Op->isReg())
4912  FuncInfo.ArgDbgValues.push_back(
4913  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4914  Op->getReg(), Variable, Expr));
4915  else
4916  FuncInfo.ArgDbgValues.push_back(
4917  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
4918  .add(*Op)
4919  .addImm(0)
4920  .addMetadata(Variable)
4921  .addMetadata(Expr));
4922 
4923  return true;
4924 }
4925 
4926 /// Return the appropriate SDDbgValue based on N.
4927 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4928  DILocalVariable *Variable,
4929  DIExpression *Expr,
4930  const DebugLoc &dl,
4931  unsigned DbgSDNodeOrder) {
4932  if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
4933  // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
4934  // stack slot locations as such instead of as indirectly addressed
4935  // locations.
4936  return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
4937  DbgSDNodeOrder);
4938  }
4939  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
4940  DbgSDNodeOrder);
4941 }
4942 
4943 // VisualStudio defines setjmp as _setjmp
4944 #if defined(_MSC_VER) && defined(setjmp) && \
4945  !defined(setjmp_undefined_for_msvc)
4946 # pragma push_macro("setjmp")
4947 # undef setjmp
4948 # define setjmp_undefined_for_msvc
4949 #endif
4950 
4951 /// Lower the call to the specified intrinsic function. If we want to emit this
4952 /// as a call to a named external function, return the name. Otherwise, lower it
4953 /// and return null.
4954 const char *
4955 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
4956  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4957  SDLoc sdl = getCurSDLoc();
4958  DebugLoc dl = getCurDebugLoc();
4959  SDValue Res;
4960 
4961  switch (Intrinsic) {
4962  default:
4963  // By default, turn this into a target intrinsic node.
4964  visitTargetIntrinsic(I, Intrinsic);
4965  return nullptr;
4966  case Intrinsic::vastart: visitVAStart(I); return nullptr;
4967  case Intrinsic::vaend: visitVAEnd(I); return nullptr;
4968  case Intrinsic::vacopy: visitVACopy(I); return nullptr;
4969  case Intrinsic::returnaddress:
4970  setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
4971  TLI.getPointerTy(DAG.getDataLayout()),
4972  getValue(I.getArgOperand(0))));
4973  return nullptr;
4974  case Intrinsic::addressofreturnaddress:
4975  setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
4976  TLI.getPointerTy(DAG.getDataLayout())));
4977  return nullptr;
4978  case Intrinsic::frameaddress:
4979  setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
4980  TLI.getPointerTy(DAG.getDataLayout()),
4981  getValue(I.getArgOperand(0))));
4982  return nullptr;
4983  case Intrinsic::read_register: {
4984  Value *Reg = I.getArgOperand(0);
4985  SDValue Chain = getRoot();
4986  SDValue RegName =
4987  DAG.