LLVM  6.0.0svn
SelectionDAGBuilder.cpp
Go to the documentation of this file.
1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SelectionDAGBuilder.h"
15 #include "llvm/ADT/APFloat.h"
16 #include "llvm/ADT/APInt.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/BitVector.h"
19 #include "llvm/ADT/DenseMap.h"
20 #include "llvm/ADT/None.h"
21 #include "llvm/ADT/Optional.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallPtrSet.h"
24 #include "llvm/ADT/SmallSet.h"
25 #include "llvm/ADT/SmallVector.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/ADT/Triple.h"
28 #include "llvm/ADT/Twine.h"
33 #include "llvm/Analysis/Loads.h"
38 #include "llvm/CodeGen/Analysis.h"
57 #include "llvm/CodeGen/StackMaps.h"
66 #include "llvm/IR/Argument.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/BasicBlock.h"
69 #include "llvm/IR/CFG.h"
70 #include "llvm/IR/CallSite.h"
71 #include "llvm/IR/CallingConv.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/ConstantRange.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DataLayout.h"
77 #include "llvm/IR/DebugLoc.h"
78 #include "llvm/IR/DerivedTypes.h"
79 #include "llvm/IR/Function.h"
81 #include "llvm/IR/InlineAsm.h"
82 #include "llvm/IR/InstrTypes.h"
83 #include "llvm/IR/Instruction.h"
84 #include "llvm/IR/Instructions.h"
85 #include "llvm/IR/IntrinsicInst.h"
86 #include "llvm/IR/Intrinsics.h"
87 #include "llvm/IR/LLVMContext.h"
88 #include "llvm/IR/Metadata.h"
89 #include "llvm/IR/Module.h"
90 #include "llvm/IR/Operator.h"
91 #include "llvm/IR/Statepoint.h"
92 #include "llvm/IR/Type.h"
93 #include "llvm/IR/User.h"
94 #include "llvm/IR/Value.h"
95 #include "llvm/MC/MCContext.h"
96 #include "llvm/MC/MCSymbol.h"
99 #include "llvm/Support/Casting.h"
100 #include "llvm/Support/CodeGen.h"
102 #include "llvm/Support/Compiler.h"
103 #include "llvm/Support/Debug.h"
105 #include "llvm/Support/MathExtras.h"
110 #include <algorithm>
111 #include <cassert>
112 #include <cstddef>
113 #include <cstdint>
114 #include <cstring>
115 #include <iterator>
116 #include <limits>
117 #include <numeric>
118 #include <tuple>
119 #include <utility>
120 #include <vector>
121 
122 using namespace llvm;
123 
124 #define DEBUG_TYPE "isel"
125 
126 /// LimitFloatPrecision - Generate low-precision inline sequences for
127 /// some float libcalls (6, 8 or 12 bits).
128 static unsigned LimitFloatPrecision;
129 
131 LimitFPPrecision("limit-float-precision",
132  cl::desc("Generate low-precision inline sequences "
133  "for some float libcalls"),
134  cl::location(LimitFloatPrecision),
135  cl::init(0));
136 
138  "switch-peel-threshold", cl::Hidden, cl::init(66),
139  cl::desc("Set the case probability threshold for peeling the case from a "
140  "switch statement. A value greater than 100 will void this "
141  "optimization"));
142 
143 // Limit the width of DAG chains. This is important in general to prevent
144 // DAG-based analysis from blowing up. For example, alias analysis and
145 // load clustering may not complete in reasonable time. It is difficult to
146 // recognize and avoid this situation within each individual analysis, and
147 // future analyses are likely to have the same behavior. Limiting DAG width is
148 // the safe approach and will be especially important with global DAGs.
149 //
150 // MaxParallelChains default is arbitrarily high to avoid affecting
151 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
152 // sequence over this should have been converted to llvm.memcpy by the
153 // frontend. It is easy to induce this behavior with .ll code such as:
154 // %buffer = alloca [4096 x i8]
155 // %data = load [4096 x i8]* %argPtr
156 // store [4096 x i8] %data, [4096 x i8]* %buffer
157 static const unsigned MaxParallelChains = 64;
158 
159 // True if the Value passed requires ABI mangling as it is a parameter to a
160 // function or a return value from a function which is not an intrinsic.
161 static bool isABIRegCopy(const Value *V) {
162  const bool IsRetInst = V && isa<ReturnInst>(V);
163  const bool IsCallInst = V && isa<CallInst>(V);
164  const bool IsInLineAsm =
165  IsCallInst && static_cast<const CallInst *>(V)->isInlineAsm();
166  const bool IsIndirectFunctionCall =
167  IsCallInst && !IsInLineAsm &&
168  !static_cast<const CallInst *>(V)->getCalledFunction();
169  // It is possible that the call instruction is an inline asm statement or an
170  // indirect function call in which case the return value of
171  // getCalledFunction() would be nullptr.
172  const bool IsInstrinsicCall =
173  IsCallInst && !IsInLineAsm && !IsIndirectFunctionCall &&
174  static_cast<const CallInst *>(V)->getCalledFunction()->getIntrinsicID() !=
176 
177  return IsRetInst || (IsCallInst && (!IsInLineAsm && !IsInstrinsicCall));
178 }
179 
180 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
181  const SDValue *Parts, unsigned NumParts,
182  MVT PartVT, EVT ValueVT, const Value *V,
183  bool IsABIRegCopy);
184 
185 /// getCopyFromParts - Create a value that contains the specified legal parts
186 /// combined into the value they represent. If the parts combine to a type
187 /// larger than ValueVT then AssertOp can be used to specify whether the extra
188 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
189 /// (ISD::AssertSext).
191  const SDValue *Parts, unsigned NumParts,
192  MVT PartVT, EVT ValueVT, const Value *V,
193  Optional<ISD::NodeType> AssertOp = None,
194  bool IsABIRegCopy = false) {
195  if (ValueVT.isVector())
196  return getCopyFromPartsVector(DAG, DL, Parts, NumParts,
197  PartVT, ValueVT, V, IsABIRegCopy);
198 
199  assert(NumParts > 0 && "No parts to assemble!");
200  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
201  SDValue Val = Parts[0];
202 
203  if (NumParts > 1) {
204  // Assemble the value from multiple parts.
205  if (ValueVT.isInteger()) {
206  unsigned PartBits = PartVT.getSizeInBits();
207  unsigned ValueBits = ValueVT.getSizeInBits();
208 
209  // Assemble the power of 2 part.
210  unsigned RoundParts = NumParts & (NumParts - 1) ?
211  1 << Log2_32(NumParts) : NumParts;
212  unsigned RoundBits = PartBits * RoundParts;
213  EVT RoundVT = RoundBits == ValueBits ?
214  ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
215  SDValue Lo, Hi;
216 
217  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
218 
219  if (RoundParts > 2) {
220  Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
221  PartVT, HalfVT, V);
222  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
223  RoundParts / 2, PartVT, HalfVT, V);
224  } else {
225  Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
226  Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
227  }
228 
229  if (DAG.getDataLayout().isBigEndian())
230  std::swap(Lo, Hi);
231 
232  Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
233 
234  if (RoundParts < NumParts) {
235  // Assemble the trailing non-power-of-2 part.
236  unsigned OddParts = NumParts - RoundParts;
237  EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
238  Hi = getCopyFromParts(DAG, DL,
239  Parts + RoundParts, OddParts, PartVT, OddVT, V);
240 
241  // Combine the round and odd parts.
242  Lo = Val;
243  if (DAG.getDataLayout().isBigEndian())
244  std::swap(Lo, Hi);
245  EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
246  Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
247  Hi =
248  DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
249  DAG.getConstant(Lo.getValueSizeInBits(), DL,
250  TLI.getPointerTy(DAG.getDataLayout())));
251  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
252  Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
253  }
254  } else if (PartVT.isFloatingPoint()) {
255  // FP split into multiple FP parts (for ppcf128)
256  assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
257  "Unexpected split");
258  SDValue Lo, Hi;
259  Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
260  Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
261  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
262  std::swap(Lo, Hi);
263  Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
264  } else {
265  // FP split into integer parts (soft fp)
266  assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
267  !PartVT.isVector() && "Unexpected split");
268  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
269  Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V);
270  }
271  }
272 
273  // There is now one part, held in Val. Correct it to match ValueVT.
274  // PartEVT is the type of the register class that holds the value.
275  // ValueVT is the type of the inline asm operation.
276  EVT PartEVT = Val.getValueType();
277 
278  if (PartEVT == ValueVT)
279  return Val;
280 
281  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
282  ValueVT.bitsLT(PartEVT)) {
283  // For an FP value in an integer part, we need to truncate to the right
284  // width first.
285  PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
286  Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
287  }
288 
289  // Handle types that have the same size.
290  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
291  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
292 
293  // Handle types with different sizes.
294  if (PartEVT.isInteger() && ValueVT.isInteger()) {
295  if (ValueVT.bitsLT(PartEVT)) {
296  // For a truncate, see if we have any information to
297  // indicate whether the truncated bits will always be
298  // zero or sign-extension.
299  if (AssertOp.hasValue())
300  Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
301  DAG.getValueType(ValueVT));
302  return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
303  }
304  return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
305  }
306 
307  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
308  // FP_ROUND's are always exact here.
309  if (ValueVT.bitsLT(Val.getValueType()))
310  return DAG.getNode(
311  ISD::FP_ROUND, DL, ValueVT, Val,
312  DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
313 
314  return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
315  }
316 
317  llvm_unreachable("Unknown mismatch!");
318 }
319 
321  const Twine &ErrMsg) {
322  const Instruction *I = dyn_cast_or_null<Instruction>(V);
323  if (!V)
324  return Ctx.emitError(ErrMsg);
325 
326  const char *AsmError = ", possible invalid constraint for vector type";
327  if (const CallInst *CI = dyn_cast<CallInst>(I))
328  if (isa<InlineAsm>(CI->getCalledValue()))
329  return Ctx.emitError(I, ErrMsg + AsmError);
330 
331  return Ctx.emitError(I, ErrMsg);
332 }
333 
334 /// getCopyFromPartsVector - Create a value that contains the specified legal
335 /// parts combined into the value they represent. If the parts combine to a
336 /// type larger than ValueVT then AssertOp can be used to specify whether the
337 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
338 /// ValueVT (ISD::AssertSext).
340  const SDValue *Parts, unsigned NumParts,
341  MVT PartVT, EVT ValueVT, const Value *V,
342  bool IsABIRegCopy) {
343  assert(ValueVT.isVector() && "Not a vector value");
344  assert(NumParts > 0 && "No parts to assemble!");
345  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
346  SDValue Val = Parts[0];
347 
348  // Handle a multi-element vector.
349  if (NumParts > 1) {
350  EVT IntermediateVT;
351  MVT RegisterVT;
352  unsigned NumIntermediates;
353  unsigned NumRegs;
354 
355  if (IsABIRegCopy) {
357  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
358  RegisterVT);
359  } else {
360  NumRegs =
361  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
362  NumIntermediates, RegisterVT);
363  }
364 
365  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
366  NumParts = NumRegs; // Silence a compiler warning.
367  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
368  assert(RegisterVT.getSizeInBits() ==
369  Parts[0].getSimpleValueType().getSizeInBits() &&
370  "Part type sizes don't match!");
371 
372  // Assemble the parts into intermediate operands.
373  SmallVector<SDValue, 8> Ops(NumIntermediates);
374  if (NumIntermediates == NumParts) {
375  // If the register was not expanded, truncate or copy the value,
376  // as appropriate.
377  for (unsigned i = 0; i != NumParts; ++i)
378  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
379  PartVT, IntermediateVT, V);
380  } else if (NumParts > 0) {
381  // If the intermediate type was expanded, build the intermediate
382  // operands from the parts.
383  assert(NumParts % NumIntermediates == 0 &&
384  "Must expand into a divisible number of parts!");
385  unsigned Factor = NumParts / NumIntermediates;
386  for (unsigned i = 0; i != NumIntermediates; ++i)
387  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
388  PartVT, IntermediateVT, V);
389  }
390 
391  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
392  // intermediate operands.
393  EVT BuiltVectorTy =
394  EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
395  (IntermediateVT.isVector()
396  ? IntermediateVT.getVectorNumElements() * NumParts
397  : NumIntermediates));
398  Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
400  DL, BuiltVectorTy, Ops);
401  }
402 
403  // There is now one part, held in Val. Correct it to match ValueVT.
404  EVT PartEVT = Val.getValueType();
405 
406  if (PartEVT == ValueVT)
407  return Val;
408 
409  if (PartEVT.isVector()) {
410  // If the element type of the source/dest vectors are the same, but the
411  // parts vector has more elements than the value vector, then we have a
412  // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
413  // elements we want.
414  if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
415  assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
416  "Cannot narrow, it would be a lossy transformation");
417  return DAG.getNode(
418  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
419  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
420  }
421 
422  // Vector/Vector bitcast.
423  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
424  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
425 
426  assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
427  "Cannot handle this kind of promotion");
428  // Promoted vector extract
429  return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
430 
431  }
432 
433  // Trivial bitcast if the types are the same size and the destination
434  // vector type is legal.
435  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
436  TLI.isTypeLegal(ValueVT))
437  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
438 
439  if (ValueVT.getVectorNumElements() != 1) {
440  // Certain ABIs require that vectors are passed as integers. For vectors
441  // are the same size, this is an obvious bitcast.
442  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
443  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
444  } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
445  // Bitcast Val back the original type and extract the corresponding
446  // vector we want.
447  unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
448  EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
449  ValueVT.getVectorElementType(), Elts);
450  Val = DAG.getBitcast(WiderVecType, Val);
451  return DAG.getNode(
452  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
453  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
454  }
455 
457  *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
458  return DAG.getUNDEF(ValueVT);
459  }
460 
461  // Handle cases such as i8 -> <1 x i1>
462  EVT ValueSVT = ValueVT.getVectorElementType();
463  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
464  Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
465  : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
466 
467  return DAG.getBuildVector(ValueVT, DL, Val);
468 }
469 
470 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
471  SDValue Val, SDValue *Parts, unsigned NumParts,
472  MVT PartVT, const Value *V, bool IsABIRegCopy);
473 
474 /// getCopyToParts - Create a series of nodes that contain the specified value
475 /// split into legal parts. If the parts contain more bits than Val, then, for
476 /// integers, ExtendKind can be used to specify how to generate the extra bits.
477 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
478  SDValue *Parts, unsigned NumParts, MVT PartVT,
479  const Value *V,
480  ISD::NodeType ExtendKind = ISD::ANY_EXTEND,
481  bool IsABIRegCopy = false) {
482  EVT ValueVT = Val.getValueType();
483 
484  // Handle the vector case separately.
485  if (ValueVT.isVector())
486  return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
487  IsABIRegCopy);
488 
489  unsigned PartBits = PartVT.getSizeInBits();
490  unsigned OrigNumParts = NumParts;
491  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
492  "Copying to an illegal type!");
493 
494  if (NumParts == 0)
495  return;
496 
497  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
498  EVT PartEVT = PartVT;
499  if (PartEVT == ValueVT) {
500  assert(NumParts == 1 && "No-op copy with multiple parts!");
501  Parts[0] = Val;
502  return;
503  }
504 
505  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
506  // If the parts cover more bits than the value has, promote the value.
507  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
508  assert(NumParts == 1 && "Do not know what to promote to!");
509  Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
510  } else {
511  if (ValueVT.isFloatingPoint()) {
512  // FP values need to be bitcast, then extended if they are being put
513  // into a larger container.
514  ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
515  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
516  }
517  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
518  ValueVT.isInteger() &&
519  "Unknown mismatch!");
520  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
521  Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
522  if (PartVT == MVT::x86mmx)
523  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
524  }
525  } else if (PartBits == ValueVT.getSizeInBits()) {
526  // Different types of the same size.
527  assert(NumParts == 1 && PartEVT != ValueVT);
528  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
529  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
530  // If the parts cover less bits than value has, truncate the value.
531  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
532  ValueVT.isInteger() &&
533  "Unknown mismatch!");
534  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
535  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
536  if (PartVT == MVT::x86mmx)
537  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
538  }
539 
540  // The value may have changed - recompute ValueVT.
541  ValueVT = Val.getValueType();
542  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
543  "Failed to tile the value with PartVT!");
544 
545  if (NumParts == 1) {
546  if (PartEVT != ValueVT) {
548  "scalar-to-vector conversion failed");
549  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
550  }
551 
552  Parts[0] = Val;
553  return;
554  }
555 
556  // Expand the value into multiple parts.
557  if (NumParts & (NumParts - 1)) {
558  // The number of parts is not a power of 2. Split off and copy the tail.
559  assert(PartVT.isInteger() && ValueVT.isInteger() &&
560  "Do not know what to expand to!");
561  unsigned RoundParts = 1 << Log2_32(NumParts);
562  unsigned RoundBits = RoundParts * PartBits;
563  unsigned OddParts = NumParts - RoundParts;
564  SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
565  DAG.getIntPtrConstant(RoundBits, DL));
566  getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V);
567 
568  if (DAG.getDataLayout().isBigEndian())
569  // The odd parts were reversed by getCopyToParts - unreverse them.
570  std::reverse(Parts + RoundParts, Parts + NumParts);
571 
572  NumParts = RoundParts;
573  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
574  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
575  }
576 
577  // The number of parts is a power of 2. Repeatedly bisect the value using
578  // EXTRACT_ELEMENT.
579  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
581  ValueVT.getSizeInBits()),
582  Val);
583 
584  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
585  for (unsigned i = 0; i < NumParts; i += StepSize) {
586  unsigned ThisBits = StepSize * PartBits / 2;
587  EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
588  SDValue &Part0 = Parts[i];
589  SDValue &Part1 = Parts[i+StepSize/2];
590 
591  Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
592  ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
593  Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
594  ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
595 
596  if (ThisBits == PartBits && ThisVT != PartVT) {
597  Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
598  Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
599  }
600  }
601  }
602 
603  if (DAG.getDataLayout().isBigEndian())
604  std::reverse(Parts, Parts + OrigNumParts);
605 }
606 
607 
608 /// getCopyToPartsVector - Create a series of nodes that contain the specified
609 /// value split into legal parts.
610 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
611  SDValue Val, SDValue *Parts, unsigned NumParts,
612  MVT PartVT, const Value *V,
613  bool IsABIRegCopy) {
614  EVT ValueVT = Val.getValueType();
615  assert(ValueVT.isVector() && "Not a vector");
616  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
617 
618  if (NumParts == 1) {
619  EVT PartEVT = PartVT;
620  if (PartEVT == ValueVT) {
621  // Nothing to do.
622  } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
623  // Bitconvert vector->vector case.
624  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
625  } else if (PartVT.isVector() &&
626  PartEVT.getVectorElementType() == ValueVT.getVectorElementType() &&
627  PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) {
628  EVT ElementVT = PartVT.getVectorElementType();
629  // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
630  // undef elements.
632  for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i)
633  Ops.push_back(DAG.getNode(
634  ISD::EXTRACT_VECTOR_ELT, DL, ElementVT, Val,
635  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout()))));
636 
637  for (unsigned i = ValueVT.getVectorNumElements(),
638  e = PartVT.getVectorNumElements(); i != e; ++i)
639  Ops.push_back(DAG.getUNDEF(ElementVT));
640 
641  Val = DAG.getBuildVector(PartVT, DL, Ops);
642 
643  // FIXME: Use CONCAT for 2x -> 4x.
644 
645  //SDValue UndefElts = DAG.getUNDEF(VectorTy);
646  //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts);
647  } else if (PartVT.isVector() &&
648  PartEVT.getVectorElementType().bitsGE(
649  ValueVT.getVectorElementType()) &&
650  PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
651 
652  // Promoted vector extract
653  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
654  } else {
655  if (ValueVT.getVectorNumElements() == 1) {
656  Val = DAG.getNode(
657  ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
658  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
659  } else {
660  assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
661  "lossy conversion of vector to scalar type");
662  EVT IntermediateType =
663  EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
664  Val = DAG.getBitcast(IntermediateType, Val);
665  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
666  }
667  }
668 
669  assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
670  Parts[0] = Val;
671  return;
672  }
673 
674  // Handle a multi-element vector.
675  EVT IntermediateVT;
676  MVT RegisterVT;
677  unsigned NumIntermediates;
678  unsigned NumRegs;
679  if (IsABIRegCopy) {
680  NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
681  *DAG.getContext(), ValueVT, IntermediateVT, NumIntermediates,
682  RegisterVT);
683  } else {
684  NumRegs =
685  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
686  NumIntermediates, RegisterVT);
687  }
688  unsigned NumElements = ValueVT.getVectorNumElements();
689 
690  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
691  NumParts = NumRegs; // Silence a compiler warning.
692  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
693 
694  // Convert the vector to the appropiate type if necessary.
695  unsigned DestVectorNoElts =
696  NumIntermediates *
697  (IntermediateVT.isVector() ? IntermediateVT.getVectorNumElements() : 1);
698  EVT BuiltVectorTy = EVT::getVectorVT(
699  *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
700  if (Val.getValueType() != BuiltVectorTy)
701  Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
702 
703  // Split the vector into intermediate operands.
704  SmallVector<SDValue, 8> Ops(NumIntermediates);
705  for (unsigned i = 0; i != NumIntermediates; ++i) {
706  if (IntermediateVT.isVector())
707  Ops[i] =
708  DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
709  DAG.getConstant(i * (NumElements / NumIntermediates), DL,
710  TLI.getVectorIdxTy(DAG.getDataLayout())));
711  else
712  Ops[i] = DAG.getNode(
713  ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
714  DAG.getConstant(i, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
715  }
716 
717  // Split the intermediate operands into legal parts.
718  if (NumParts == NumIntermediates) {
719  // If the register was not expanded, promote or copy the value,
720  // as appropriate.
721  for (unsigned i = 0; i != NumParts; ++i)
722  getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V);
723  } else if (NumParts > 0) {
724  // If the intermediate type was expanded, split each the value into
725  // legal parts.
726  assert(NumIntermediates != 0 && "division by zero");
727  assert(NumParts % NumIntermediates == 0 &&
728  "Must expand into a divisible number of parts!");
729  unsigned Factor = NumParts / NumIntermediates;
730  for (unsigned i = 0; i != NumIntermediates; ++i)
731  getCopyToParts(DAG, DL, Ops[i], &Parts[i*Factor], Factor, PartVT, V);
732  }
733 }
734 
736  EVT valuevt, bool IsABIMangledValue)
737  : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
738  RegCount(1, regs.size()), IsABIMangled(IsABIMangledValue) {}
739 
741  const DataLayout &DL, unsigned Reg, Type *Ty,
742  bool IsABIMangledValue) {
743  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
744 
745  IsABIMangled = IsABIMangledValue;
746 
747  for (EVT ValueVT : ValueVTs) {
748  unsigned NumRegs = IsABIMangledValue
749  ? TLI.getNumRegistersForCallingConv(Context, ValueVT)
750  : TLI.getNumRegisters(Context, ValueVT);
751  MVT RegisterVT = IsABIMangledValue
752  ? TLI.getRegisterTypeForCallingConv(Context, ValueVT)
753  : TLI.getRegisterType(Context, ValueVT);
754  for (unsigned i = 0; i != NumRegs; ++i)
755  Regs.push_back(Reg + i);
756  RegVTs.push_back(RegisterVT);
757  RegCount.push_back(NumRegs);
758  Reg += NumRegs;
759  }
760 }
761 
763  FunctionLoweringInfo &FuncInfo,
764  const SDLoc &dl, SDValue &Chain,
765  SDValue *Flag, const Value *V) const {
766  // A Value with type {} or [0 x %t] needs no registers.
767  if (ValueVTs.empty())
768  return SDValue();
769 
770  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
771 
772  // Assemble the legal parts into the final values.
773  SmallVector<SDValue, 4> Values(ValueVTs.size());
775  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
776  // Copy the legal parts from the registers.
777  EVT ValueVT = ValueVTs[Value];
778  unsigned NumRegs = RegCount[Value];
779  MVT RegisterVT = IsABIMangled
781  : RegVTs[Value];
782 
783  Parts.resize(NumRegs);
784  for (unsigned i = 0; i != NumRegs; ++i) {
785  SDValue P;
786  if (!Flag) {
787  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
788  } else {
789  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
790  *Flag = P.getValue(2);
791  }
792 
793  Chain = P.getValue(1);
794  Parts[i] = P;
795 
796  // If the source register was virtual and if we know something about it,
797  // add an assert node.
799  !RegisterVT.isInteger() || RegisterVT.isVector())
800  continue;
801 
803  FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
804  if (!LOI)
805  continue;
806 
807  unsigned RegSize = RegisterVT.getSizeInBits();
808  unsigned NumSignBits = LOI->NumSignBits;
809  unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
810 
811  if (NumZeroBits == RegSize) {
812  // The current value is a zero.
813  // Explicitly express that as it would be easier for
814  // optimizations to kick in.
815  Parts[i] = DAG.getConstant(0, dl, RegisterVT);
816  continue;
817  }
818 
819  // FIXME: We capture more information than the dag can represent. For
820  // now, just use the tightest assertzext/assertsext possible.
821  bool isSExt = true;
822  EVT FromVT(MVT::Other);
823  if (NumSignBits == RegSize) {
824  isSExt = true; // ASSERT SEXT 1
825  FromVT = MVT::i1;
826  } else if (NumZeroBits >= RegSize - 1) {
827  isSExt = false; // ASSERT ZEXT 1
828  FromVT = MVT::i1;
829  } else if (NumSignBits > RegSize - 8) {
830  isSExt = true; // ASSERT SEXT 8
831  FromVT = MVT::i8;
832  } else if (NumZeroBits >= RegSize - 8) {
833  isSExt = false; // ASSERT ZEXT 8
834  FromVT = MVT::i8;
835  } else if (NumSignBits > RegSize - 16) {
836  isSExt = true; // ASSERT SEXT 16
837  FromVT = MVT::i16;
838  } else if (NumZeroBits >= RegSize - 16) {
839  isSExt = false; // ASSERT ZEXT 16
840  FromVT = MVT::i16;
841  } else if (NumSignBits > RegSize - 32) {
842  isSExt = true; // ASSERT SEXT 32
843  FromVT = MVT::i32;
844  } else if (NumZeroBits >= RegSize - 32) {
845  isSExt = false; // ASSERT ZEXT 32
846  FromVT = MVT::i32;
847  } else {
848  continue;
849  }
850  // Add an assertion node.
851  assert(FromVT != MVT::Other);
852  Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
853  RegisterVT, P, DAG.getValueType(FromVT));
854  }
855 
856  Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
857  NumRegs, RegisterVT, ValueVT, V);
858  Part += NumRegs;
859  Parts.clear();
860  }
861 
862  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
863 }
864 
866  const SDLoc &dl, SDValue &Chain, SDValue *Flag,
867  const Value *V,
868  ISD::NodeType PreferredExtendType) const {
869  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
870  ISD::NodeType ExtendKind = PreferredExtendType;
871 
872  // Get the list of the values's legal parts.
873  unsigned NumRegs = Regs.size();
874  SmallVector<SDValue, 8> Parts(NumRegs);
875  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
876  unsigned NumParts = RegCount[Value];
877 
878  MVT RegisterVT = IsABIMangled
880  : RegVTs[Value];
881 
882  if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
883  ExtendKind = ISD::ZERO_EXTEND;
884 
885  getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value),
886  &Parts[Part], NumParts, RegisterVT, V, ExtendKind);
887  Part += NumParts;
888  }
889 
890  // Copy the parts into the registers.
891  SmallVector<SDValue, 8> Chains(NumRegs);
892  for (unsigned i = 0; i != NumRegs; ++i) {
893  SDValue Part;
894  if (!Flag) {
895  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
896  } else {
897  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
898  *Flag = Part.getValue(1);
899  }
900 
901  Chains[i] = Part.getValue(0);
902  }
903 
904  if (NumRegs == 1 || Flag)
905  // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
906  // flagged to it. That is the CopyToReg nodes and the user are considered
907  // a single scheduling unit. If we create a TokenFactor and return it as
908  // chain, then the TokenFactor is both a predecessor (operand) of the
909  // user as well as a successor (the TF operands are flagged to the user).
910  // c1, f1 = CopyToReg
911  // c2, f2 = CopyToReg
912  // c3 = TokenFactor c1, c2
913  // ...
914  // = op c3, ..., f2
915  Chain = Chains[NumRegs-1];
916  else
917  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
918 }
919 
920 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
921  unsigned MatchingIdx, const SDLoc &dl,
922  SelectionDAG &DAG,
923  std::vector<SDValue> &Ops) const {
924  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
925 
926  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
927  if (HasMatching)
928  Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
929  else if (!Regs.empty() &&
931  // Put the register class of the virtual registers in the flag word. That
932  // way, later passes can recompute register class constraints for inline
933  // assembly as well as normal instructions.
934  // Don't do this for tied operands that can use the regclass information
935  // from the def.
937  const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
938  Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
939  }
940 
941  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
942  Ops.push_back(Res);
943 
944  if (Code == InlineAsm::Kind_Clobber) {
945  // Clobbers should always have a 1:1 mapping with registers, and may
946  // reference registers that have illegal (e.g. vector) types. Hence, we
947  // shouldn't try to apply any sort of splitting logic to them.
948  assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
949  "No 1:1 mapping from clobbers to regs?");
950  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
951  (void)SP;
952  for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
953  Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
954  assert(
955  (Regs[I] != SP ||
957  "If we clobbered the stack pointer, MFI should know about it.");
958  }
959  return;
960  }
961 
962  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
963  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
964  MVT RegisterVT = RegVTs[Value];
965  for (unsigned i = 0; i != NumRegs; ++i) {
966  assert(Reg < Regs.size() && "Mismatch in # registers expected");
967  unsigned TheReg = Regs[Reg++];
968  Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
969  }
970  }
971 }
972 
974  const TargetLibraryInfo *li) {
975  AA = aa;
976  GFI = gfi;
977  LibInfo = li;
978  DL = &DAG.getDataLayout();
979  Context = DAG.getContext();
980  LPadToCallSiteMap.clear();
981 }
982 
984  NodeMap.clear();
985  UnusedArgNodeMap.clear();
986  PendingLoads.clear();
987  PendingExports.clear();
988  CurInst = nullptr;
989  HasTailCall = false;
990  SDNodeOrder = LowestSDNodeOrder;
991  StatepointLowering.clear();
992 }
993 
995  DanglingDebugInfoMap.clear();
996 }
997 
999  if (PendingLoads.empty())
1000  return DAG.getRoot();
1001 
1002  if (PendingLoads.size() == 1) {
1003  SDValue Root = PendingLoads[0];
1004  DAG.setRoot(Root);
1005  PendingLoads.clear();
1006  return Root;
1007  }
1008 
1009  // Otherwise, we have to make a token factor node.
1010  SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1011  PendingLoads);
1012  PendingLoads.clear();
1013  DAG.setRoot(Root);
1014  return Root;
1015 }
1016 
1018  SDValue Root = DAG.getRoot();
1019 
1020  if (PendingExports.empty())
1021  return Root;
1022 
1023  // Turn all of the CopyToReg chains into one factored node.
1024  if (Root.getOpcode() != ISD::EntryToken) {
1025  unsigned i = 0, e = PendingExports.size();
1026  for (; i != e; ++i) {
1027  assert(PendingExports[i].getNode()->getNumOperands() > 1);
1028  if (PendingExports[i].getNode()->getOperand(0) == Root)
1029  break; // Don't add the root if we already indirectly depend on it.
1030  }
1031 
1032  if (i == e)
1033  PendingExports.push_back(Root);
1034  }
1035 
1036  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1037  PendingExports);
1038  PendingExports.clear();
1039  DAG.setRoot(Root);
1040  return Root;
1041 }
1042 
1044  // Set up outgoing PHI node register values before emitting the terminator.
1045  if (isa<TerminatorInst>(&I)) {
1046  HandlePHINodesInSuccessorBlocks(I.getParent());
1047  }
1048 
1049  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1050  if (!isa<DbgInfoIntrinsic>(I))
1051  ++SDNodeOrder;
1052 
1053  CurInst = &I;
1054 
1055  visit(I.getOpcode(), I);
1056 
1057  if (!isa<TerminatorInst>(&I) && !HasTailCall &&
1058  !isStatepoint(&I)) // statepoints handle their exports internally
1059  CopyToExportRegsIfNeeded(&I);
1060 
1061  CurInst = nullptr;
1062 }
1063 
1064 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1065  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1066 }
1067 
1068 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1069  // Note: this doesn't use InstVisitor, because it has to work with
1070  // ConstantExpr's in addition to instructions.
1071  switch (Opcode) {
1072  default: llvm_unreachable("Unknown instruction type encountered!");
1073  // Build the switch statement using the Instruction.def file.
1074 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1075  case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1076 #include "llvm/IR/Instruction.def"
1077  }
1078 }
1079 
1080 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1081 // generate the debug data structures now that we've seen its definition.
1083  SDValue Val) {
1084  DanglingDebugInfo &DDI = DanglingDebugInfoMap[V];
1085  if (DDI.getDI()) {
1086  const DbgValueInst *DI = DDI.getDI();
1087  DebugLoc dl = DDI.getdl();
1088  unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1089  DILocalVariable *Variable = DI->getVariable();
1090  DIExpression *Expr = DI->getExpression();
1091  assert(Variable->isValidLocationForIntrinsic(dl) &&
1092  "Expected inlined-at fields to agree");
1093  SDDbgValue *SDV;
1094  if (Val.getNode()) {
1095  if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1096  SDV = getDbgValue(Val, Variable, Expr, dl, DbgSDNodeOrder);
1097  DAG.AddDbgValue(SDV, Val.getNode(), false);
1098  }
1099  } else
1100  DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1101  DanglingDebugInfoMap[V] = DanglingDebugInfo();
1102  }
1103 }
1104 
1105 /// getCopyFromRegs - If there was virtual register allocated for the value V
1106 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1108  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1109  SDValue Result;
1110 
1111  if (It != FuncInfo.ValueMap.end()) {
1112  unsigned InReg = It->second;
1113 
1114  RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1115  DAG.getDataLayout(), InReg, Ty, isABIRegCopy(V));
1116  SDValue Chain = DAG.getEntryNode();
1117  Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1118  V);
1119  resolveDanglingDebugInfo(V, Result);
1120  }
1121 
1122  return Result;
1123 }
1124 
1125 /// getValue - Return an SDValue for the given Value.
1127  // If we already have an SDValue for this value, use it. It's important
1128  // to do this first, so that we don't create a CopyFromReg if we already
1129  // have a regular SDValue.
1130  SDValue &N = NodeMap[V];
1131  if (N.getNode()) return N;
1132 
1133  // If there's a virtual register allocated and initialized for this
1134  // value, use it.
1135  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1136  return copyFromReg;
1137 
1138  // Otherwise create a new SDValue and remember it.
1139  SDValue Val = getValueImpl(V);
1140  NodeMap[V] = Val;
1141  resolveDanglingDebugInfo(V, Val);
1142  return Val;
1143 }
1144 
1145 // Return true if SDValue exists for the given Value
1147  return (NodeMap.find(V) != NodeMap.end()) ||
1148  (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1149 }
1150 
1151 /// getNonRegisterValue - Return an SDValue for the given Value, but
1152 /// don't look in FuncInfo.ValueMap for a virtual register.
1154  // If we already have an SDValue for this value, use it.
1155  SDValue &N = NodeMap[V];
1156  if (N.getNode()) {
1157  if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1158  // Remove the debug location from the node as the node is about to be used
1159  // in a location which may differ from the original debug location. This
1160  // is relevant to Constant and ConstantFP nodes because they can appear
1161  // as constant expressions inside PHI nodes.
1162  N->setDebugLoc(DebugLoc());
1163  }
1164  return N;
1165  }
1166 
1167  // Otherwise create a new SDValue and remember it.
1168  SDValue Val = getValueImpl(V);
1169  NodeMap[V] = Val;
1170  resolveDanglingDebugInfo(V, Val);
1171  return Val;
1172 }
1173 
1174 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1175 /// Create an SDValue for the given value.
1177  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1178 
1179  if (const Constant *C = dyn_cast<Constant>(V)) {
1180  EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1181 
1182  if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1183  return DAG.getConstant(*CI, getCurSDLoc(), VT);
1184 
1185  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1186  return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1187 
1188  if (isa<ConstantPointerNull>(C)) {
1189  unsigned AS = V->getType()->getPointerAddressSpace();
1190  return DAG.getConstant(0, getCurSDLoc(),
1191  TLI.getPointerTy(DAG.getDataLayout(), AS));
1192  }
1193 
1194  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1195  return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1196 
1197  if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1198  return DAG.getUNDEF(VT);
1199 
1200  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1201  visit(CE->getOpcode(), *CE);
1202  SDValue N1 = NodeMap[V];
1203  assert(N1.getNode() && "visit didn't populate the NodeMap!");
1204  return N1;
1205  }
1206 
1207  if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1209  for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1210  OI != OE; ++OI) {
1211  SDNode *Val = getValue(*OI).getNode();
1212  // If the operand is an empty aggregate, there are no values.
1213  if (!Val) continue;
1214  // Add each leaf value from the operand to the Constants list
1215  // to form a flattened list of all the values.
1216  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1217  Constants.push_back(SDValue(Val, i));
1218  }
1219 
1220  return DAG.getMergeValues(Constants, getCurSDLoc());
1221  }
1222 
1223  if (const ConstantDataSequential *CDS =
1224  dyn_cast<ConstantDataSequential>(C)) {
1226  for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1227  SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1228  // Add each leaf value from the operand to the Constants list
1229  // to form a flattened list of all the values.
1230  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1231  Ops.push_back(SDValue(Val, i));
1232  }
1233 
1234  if (isa<ArrayType>(CDS->getType()))
1235  return DAG.getMergeValues(Ops, getCurSDLoc());
1236  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1237  }
1238 
1239  if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1240  assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1241  "Unknown struct or array constant!");
1242 
1244  ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1245  unsigned NumElts = ValueVTs.size();
1246  if (NumElts == 0)
1247  return SDValue(); // empty struct
1249  for (unsigned i = 0; i != NumElts; ++i) {
1250  EVT EltVT = ValueVTs[i];
1251  if (isa<UndefValue>(C))
1252  Constants[i] = DAG.getUNDEF(EltVT);
1253  else if (EltVT.isFloatingPoint())
1254  Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1255  else
1256  Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1257  }
1258 
1259  return DAG.getMergeValues(Constants, getCurSDLoc());
1260  }
1261 
1262  if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1263  return DAG.getBlockAddress(BA, VT);
1264 
1265  VectorType *VecTy = cast<VectorType>(V->getType());
1266  unsigned NumElements = VecTy->getNumElements();
1267 
1268  // Now that we know the number and type of the elements, get that number of
1269  // elements into the Ops array based on what kind of constant it is.
1271  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1272  for (unsigned i = 0; i != NumElements; ++i)
1273  Ops.push_back(getValue(CV->getOperand(i)));
1274  } else {
1275  assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1276  EVT EltVT =
1277  TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1278 
1279  SDValue Op;
1280  if (EltVT.isFloatingPoint())
1281  Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1282  else
1283  Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1284  Ops.assign(NumElements, Op);
1285  }
1286 
1287  // Create a BUILD_VECTOR node.
1288  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1289  }
1290 
1291  // If this is a static alloca, generate it as the frameindex instead of
1292  // computation.
1293  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1295  FuncInfo.StaticAllocaMap.find(AI);
1296  if (SI != FuncInfo.StaticAllocaMap.end())
1297  return DAG.getFrameIndex(SI->second,
1298  TLI.getFrameIndexTy(DAG.getDataLayout()));
1299  }
1300 
1301  // If this is an instruction which fast-isel has deferred, select it now.
1302  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1303  unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1304 
1305  RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1306  Inst->getType(), isABIRegCopy(V));
1307  SDValue Chain = DAG.getEntryNode();
1308  return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1309  }
1310 
1311  llvm_unreachable("Can't get register for value!");
1312 }
1313 
1314 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1315  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1316  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1317  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1318  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1319  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1320  if (IsMSVCCXX || IsCoreCLR)
1321  CatchPadMBB->setIsEHFuncletEntry();
1322 
1323  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other, getControlRoot()));
1324 }
1325 
1326 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1327  // Update machine-CFG edge.
1328  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1329  FuncInfo.MBB->addSuccessor(TargetMBB);
1330 
1331  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1332  bool IsSEH = isAsynchronousEHPersonality(Pers);
1333  if (IsSEH) {
1334  // If this is not a fall-through branch or optimizations are switched off,
1335  // emit the branch.
1336  if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1337  TM.getOptLevel() == CodeGenOpt::None)
1338  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1339  getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1340  return;
1341  }
1342 
1343  // Figure out the funclet membership for the catchret's successor.
1344  // This will be used by the FuncletLayout pass to determine how to order the
1345  // BB's.
1346  // A 'catchret' returns to the outer scope's color.
1347  Value *ParentPad = I.getCatchSwitchParentPad();
1348  const BasicBlock *SuccessorColor;
1349  if (isa<ConstantTokenNone>(ParentPad))
1350  SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1351  else
1352  SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1353  assert(SuccessorColor && "No parent funclet for catchret!");
1354  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1355  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1356 
1357  // Create the terminator node.
1358  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1359  getControlRoot(), DAG.getBasicBlock(TargetMBB),
1360  DAG.getBasicBlock(SuccessorColorMBB));
1361  DAG.setRoot(Ret);
1362 }
1363 
1364 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1365  // Don't emit any special code for the cleanuppad instruction. It just marks
1366  // the start of a funclet.
1367  FuncInfo.MBB->setIsEHFuncletEntry();
1368  FuncInfo.MBB->setIsCleanupFuncletEntry();
1369 }
1370 
1371 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1372 /// many places it could ultimately go. In the IR, we have a single unwind
1373 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1374 /// This function skips over imaginary basic blocks that hold catchswitch
1375 /// instructions, and finds all the "real" machine
1376 /// basic block destinations. As those destinations may not be successors of
1377 /// EHPadBB, here we also calculate the edge probability to those destinations.
1378 /// The passed-in Prob is the edge probability to EHPadBB.
1380  FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1381  BranchProbability Prob,
1382  SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1383  &UnwindDests) {
1384  EHPersonality Personality =
1386  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1387  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1388 
1389  while (EHPadBB) {
1390  const Instruction *Pad = EHPadBB->getFirstNonPHI();
1391  BasicBlock *NewEHPadBB = nullptr;
1392  if (isa<LandingPadInst>(Pad)) {
1393  // Stop on landingpads. They are not funclets.
1394  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1395  break;
1396  } else if (isa<CleanupPadInst>(Pad)) {
1397  // Stop on cleanup pads. Cleanups are always funclet entries for all known
1398  // personalities.
1399  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1400  UnwindDests.back().first->setIsEHFuncletEntry();
1401  break;
1402  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1403  // Add the catchpad handlers to the possible destinations.
1404  for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1405  UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1406  // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1407  if (IsMSVCCXX || IsCoreCLR)
1408  UnwindDests.back().first->setIsEHFuncletEntry();
1409  }
1410  NewEHPadBB = CatchSwitch->getUnwindDest();
1411  } else {
1412  continue;
1413  }
1414 
1415  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1416  if (BPI && NewEHPadBB)
1417  Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1418  EHPadBB = NewEHPadBB;
1419  }
1420 }
1421 
1422 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1423  // Update successor info.
1425  auto UnwindDest = I.getUnwindDest();
1426  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1427  BranchProbability UnwindDestProb =
1428  (BPI && UnwindDest)
1429  ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1431  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1432  for (auto &UnwindDest : UnwindDests) {
1433  UnwindDest.first->setIsEHPad();
1434  addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1435  }
1436  FuncInfo.MBB->normalizeSuccProbs();
1437 
1438  // Create the terminator node.
1439  SDValue Ret =
1440  DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1441  DAG.setRoot(Ret);
1442 }
1443 
1444 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1445  report_fatal_error("visitCatchSwitch not yet implemented!");
1446 }
1447 
1448 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1449  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1450  auto &DL = DAG.getDataLayout();
1451  SDValue Chain = getControlRoot();
1453  SmallVector<SDValue, 8> OutVals;
1454 
1455  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1456  // lower
1457  //
1458  // %val = call <ty> @llvm.experimental.deoptimize()
1459  // ret <ty> %val
1460  //
1461  // differently.
1463  LowerDeoptimizingReturn();
1464  return;
1465  }
1466 
1467  if (!FuncInfo.CanLowerReturn) {
1468  unsigned DemoteReg = FuncInfo.DemoteRegister;
1469  const Function *F = I.getParent()->getParent();
1470 
1471  // Emit a store of the return value through the virtual register.
1472  // Leave Outs empty so that LowerReturn won't try to load return
1473  // registers the usual way.
1474  SmallVector<EVT, 1> PtrValueVTs;
1476  PtrValueVTs);
1477 
1478  SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1479  DemoteReg, PtrValueVTs[0]);
1480  SDValue RetOp = getValue(I.getOperand(0));
1481 
1484  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1485  unsigned NumValues = ValueVTs.size();
1486 
1487  // An aggregate return value cannot wrap around the address space, so
1488  // offsets to its parts don't wrap either.
1489  SDNodeFlags Flags;
1490  Flags.setNoUnsignedWrap(true);
1491 
1492  SmallVector<SDValue, 4> Chains(NumValues);
1493  for (unsigned i = 0; i != NumValues; ++i) {
1494  SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(),
1495  RetPtr.getValueType(), RetPtr,
1496  DAG.getIntPtrConstant(Offsets[i],
1497  getCurSDLoc()),
1498  Flags);
1499  Chains[i] = DAG.getStore(Chain, getCurSDLoc(),
1500  SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1501  // FIXME: better loc info would be nice.
1502  Add, MachinePointerInfo());
1503  }
1504 
1505  Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1506  MVT::Other, Chains);
1507  } else if (I.getNumOperands() != 0) {
1509  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1510  unsigned NumValues = ValueVTs.size();
1511  if (NumValues) {
1512  SDValue RetOp = getValue(I.getOperand(0));
1513 
1514  const Function *F = I.getParent()->getParent();
1515 
1516  ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1517  if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1518  Attribute::SExt))
1519  ExtendKind = ISD::SIGN_EXTEND;
1520  else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1521  Attribute::ZExt))
1522  ExtendKind = ISD::ZERO_EXTEND;
1523 
1524  LLVMContext &Context = F->getContext();
1525  bool RetInReg = F->getAttributes().hasAttribute(
1526  AttributeList::ReturnIndex, Attribute::InReg);
1527 
1528  for (unsigned j = 0; j != NumValues; ++j) {
1529  EVT VT = ValueVTs[j];
1530 
1531  if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1532  VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1533 
1534  unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, VT);
1535  MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, VT);
1536  SmallVector<SDValue, 4> Parts(NumParts);
1537  getCopyToParts(DAG, getCurSDLoc(),
1538  SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1539  &Parts[0], NumParts, PartVT, &I, ExtendKind, true);
1540 
1541  // 'inreg' on function refers to return value
1542  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1543  if (RetInReg)
1544  Flags.setInReg();
1545 
1546  // Propagate extension type if any
1547  if (ExtendKind == ISD::SIGN_EXTEND)
1548  Flags.setSExt();
1549  else if (ExtendKind == ISD::ZERO_EXTEND)
1550  Flags.setZExt();
1551 
1552  for (unsigned i = 0; i < NumParts; ++i) {
1553  Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1554  VT, /*isfixed=*/true, 0, 0));
1555  OutVals.push_back(Parts[i]);
1556  }
1557  }
1558  }
1559  }
1560 
1561  // Push in swifterror virtual register as the last element of Outs. This makes
1562  // sure swifterror virtual register will be returned in the swifterror
1563  // physical register.
1564  const Function *F = I.getParent()->getParent();
1565  if (TLI.supportSwiftError() &&
1566  F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1567  assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1568  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1569  Flags.setSwiftError();
1570  Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1571  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1572  true /*isfixed*/, 1 /*origidx*/,
1573  0 /*partOffs*/));
1574  // Create SDNode for the swifterror virtual register.
1575  OutVals.push_back(
1576  DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1577  &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1578  EVT(TLI.getPointerTy(DL))));
1579  }
1580 
1581  bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg();
1582  CallingConv::ID CallConv =
1584  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1585  Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1586 
1587  // Verify that the target's LowerReturn behaved as expected.
1588  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1589  "LowerReturn didn't return a valid chain!");
1590 
1591  // Update the DAG with the new chain value resulting from return lowering.
1592  DAG.setRoot(Chain);
1593 }
1594 
1595 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1596 /// created for it, emit nodes to copy the value into the virtual
1597 /// registers.
1599  // Skip empty types
1600  if (V->getType()->isEmptyTy())
1601  return;
1602 
1603  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1604  if (VMI != FuncInfo.ValueMap.end()) {
1605  assert(!V->use_empty() && "Unused value assigned virtual registers!");
1606  CopyValueToVirtualRegister(V, VMI->second);
1607  }
1608 }
1609 
1610 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1611 /// the current basic block, add it to ValueMap now so that we'll get a
1612 /// CopyTo/FromReg.
1614  // No need to export constants.
1615  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1616 
1617  // Already exported?
1618  if (FuncInfo.isExportedInst(V)) return;
1619 
1620  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1621  CopyValueToVirtualRegister(V, Reg);
1622 }
1623 
1625  const BasicBlock *FromBB) {
1626  // The operands of the setcc have to be in this block. We don't know
1627  // how to export them from some other block.
1628  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1629  // Can export from current BB.
1630  if (VI->getParent() == FromBB)
1631  return true;
1632 
1633  // Is already exported, noop.
1634  return FuncInfo.isExportedInst(V);
1635  }
1636 
1637  // If this is an argument, we can export it if the BB is the entry block or
1638  // if it is already exported.
1639  if (isa<Argument>(V)) {
1640  if (FromBB == &FromBB->getParent()->getEntryBlock())
1641  return true;
1642 
1643  // Otherwise, can only export this if it is already exported.
1644  return FuncInfo.isExportedInst(V);
1645  }
1646 
1647  // Otherwise, constants can always be exported.
1648  return true;
1649 }
1650 
1651 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1653 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1654  const MachineBasicBlock *Dst) const {
1655  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1656  const BasicBlock *SrcBB = Src->getBasicBlock();
1657  const BasicBlock *DstBB = Dst->getBasicBlock();
1658  if (!BPI) {
1659  // If BPI is not available, set the default probability as 1 / N, where N is
1660  // the number of successors.
1661  auto SuccSize = std::max<uint32_t>(
1662  std::distance(succ_begin(SrcBB), succ_end(SrcBB)), 1);
1663  return BranchProbability(1, SuccSize);
1664  }
1665  return BPI->getEdgeProbability(SrcBB, DstBB);
1666 }
1667 
1668 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1669  MachineBasicBlock *Dst,
1670  BranchProbability Prob) {
1671  if (!FuncInfo.BPI)
1672  Src->addSuccessorWithoutProb(Dst);
1673  else {
1674  if (Prob.isUnknown())
1675  Prob = getEdgeProbability(Src, Dst);
1676  Src->addSuccessor(Dst, Prob);
1677  }
1678 }
1679 
1680 static bool InBlock(const Value *V, const BasicBlock *BB) {
1681  if (const Instruction *I = dyn_cast<Instruction>(V))
1682  return I->getParent() == BB;
1683  return true;
1684 }
1685 
1686 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1687 /// This function emits a branch and is used at the leaves of an OR or an
1688 /// AND operator tree.
1689 void
1691  MachineBasicBlock *TBB,
1692  MachineBasicBlock *FBB,
1693  MachineBasicBlock *CurBB,
1694  MachineBasicBlock *SwitchBB,
1695  BranchProbability TProb,
1696  BranchProbability FProb,
1697  bool InvertCond) {
1698  const BasicBlock *BB = CurBB->getBasicBlock();
1699 
1700  // If the leaf of the tree is a comparison, merge the condition into
1701  // the caseblock.
1702  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1703  // The operands of the cmp have to be in this block. We don't know
1704  // how to export them from some other block. If this is the first block
1705  // of the sequence, no exporting is needed.
1706  if (CurBB == SwitchBB ||
1707  (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1708  isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1709  ISD::CondCode Condition;
1710  if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1711  ICmpInst::Predicate Pred =
1712  InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1713  Condition = getICmpCondCode(Pred);
1714  } else {
1715  const FCmpInst *FC = cast<FCmpInst>(Cond);
1716  FCmpInst::Predicate Pred =
1717  InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1718  Condition = getFCmpCondCode(Pred);
1719  if (TM.Options.NoNaNsFPMath)
1720  Condition = getFCmpCodeWithoutNaN(Condition);
1721  }
1722 
1723  CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1724  TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1725  SwitchCases.push_back(CB);
1726  return;
1727  }
1728  }
1729 
1730  // Create a CaseBlock record representing this branch.
1731  ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1732  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1733  nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1734  SwitchCases.push_back(CB);
1735 }
1736 
1737 /// FindMergedConditions - If Cond is an expression like
1739  MachineBasicBlock *TBB,
1740  MachineBasicBlock *FBB,
1741  MachineBasicBlock *CurBB,
1742  MachineBasicBlock *SwitchBB,
1744  BranchProbability TProb,
1745  BranchProbability FProb,
1746  bool InvertCond) {
1747  // Skip over not part of the tree and remember to invert op and operands at
1748  // next level.
1749  if (BinaryOperator::isNot(Cond) && Cond->hasOneUse()) {
1750  const Value *CondOp = BinaryOperator::getNotArgument(Cond);
1751  if (InBlock(CondOp, CurBB->getBasicBlock())) {
1752  FindMergedConditions(CondOp, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1753  !InvertCond);
1754  return;
1755  }
1756  }
1757 
1758  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1759  // Compute the effective opcode for Cond, taking into account whether it needs
1760  // to be inverted, e.g.
1761  // and (not (or A, B)), C
1762  // gets lowered as
1763  // and (and (not A, not B), C)
1764  unsigned BOpc = 0;
1765  if (BOp) {
1766  BOpc = BOp->getOpcode();
1767  if (InvertCond) {
1768  if (BOpc == Instruction::And)
1769  BOpc = Instruction::Or;
1770  else if (BOpc == Instruction::Or)
1771  BOpc = Instruction::And;
1772  }
1773  }
1774 
1775  // If this node is not part of the or/and tree, emit it as a branch.
1776  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1777  BOpc != Opc || !BOp->hasOneUse() ||
1778  BOp->getParent() != CurBB->getBasicBlock() ||
1779  !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1780  !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1781  EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1782  TProb, FProb, InvertCond);
1783  return;
1784  }
1785 
1786  // Create TmpBB after CurBB.
1787  MachineFunction::iterator BBI(CurBB);
1788  MachineFunction &MF = DAG.getMachineFunction();
1790  CurBB->getParent()->insert(++BBI, TmpBB);
1791 
1792  if (Opc == Instruction::Or) {
1793  // Codegen X | Y as:
1794  // BB1:
1795  // jmp_if_X TBB
1796  // jmp TmpBB
1797  // TmpBB:
1798  // jmp_if_Y TBB
1799  // jmp FBB
1800  //
1801 
1802  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1803  // The requirement is that
1804  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1805  // = TrueProb for original BB.
1806  // Assuming the original probabilities are A and B, one choice is to set
1807  // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1808  // A/(1+B) and 2B/(1+B). This choice assumes that
1809  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1810  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1811  // TmpBB, but the math is more complicated.
1812 
1813  auto NewTrueProb = TProb / 2;
1814  auto NewFalseProb = TProb / 2 + FProb;
1815  // Emit the LHS condition.
1816  FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1817  NewTrueProb, NewFalseProb, InvertCond);
1818 
1819  // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1820  SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1821  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1822  // Emit the RHS condition into TmpBB.
1823  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1824  Probs[0], Probs[1], InvertCond);
1825  } else {
1826  assert(Opc == Instruction::And && "Unknown merge op!");
1827  // Codegen X & Y as:
1828  // BB1:
1829  // jmp_if_X TmpBB
1830  // jmp FBB
1831  // TmpBB:
1832  // jmp_if_Y TBB
1833  // jmp FBB
1834  //
1835  // This requires creation of TmpBB after CurBB.
1836 
1837  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1838  // The requirement is that
1839  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1840  // = FalseProb for original BB.
1841  // Assuming the original probabilities are A and B, one choice is to set
1842  // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1843  // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1844  // TrueProb for BB1 * FalseProb for TmpBB.
1845 
1846  auto NewTrueProb = TProb + FProb / 2;
1847  auto NewFalseProb = FProb / 2;
1848  // Emit the LHS condition.
1849  FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1850  NewTrueProb, NewFalseProb, InvertCond);
1851 
1852  // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1853  SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1854  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1855  // Emit the RHS condition into TmpBB.
1856  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1857  Probs[0], Probs[1], InvertCond);
1858  }
1859 }
1860 
1861 /// If the set of cases should be emitted as a series of branches, return true.
1862 /// If we should emit this as a bunch of and/or'd together conditions, return
1863 /// false.
1864 bool
1865 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1866  if (Cases.size() != 2) return true;
1867 
1868  // If this is two comparisons of the same values or'd or and'd together, they
1869  // will get folded into a single comparison, so don't emit two blocks.
1870  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1871  Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1872  (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1873  Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1874  return false;
1875  }
1876 
1877  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1878  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1879  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1880  Cases[0].CC == Cases[1].CC &&
1881  isa<Constant>(Cases[0].CmpRHS) &&
1882  cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1883  if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1884  return false;
1885  if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1886  return false;
1887  }
1888 
1889  return true;
1890 }
1891 
1892 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1893  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1894 
1895  // Update machine-CFG edges.
1896  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1897 
1898  if (I.isUnconditional()) {
1899  // Update machine-CFG edges.
1900  BrMBB->addSuccessor(Succ0MBB);
1901 
1902  // If this is not a fall-through branch or optimizations are switched off,
1903  // emit the branch.
1904  if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
1905  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
1906  MVT::Other, getControlRoot(),
1907  DAG.getBasicBlock(Succ0MBB)));
1908 
1909  return;
1910  }
1911 
1912  // If this condition is one of the special cases we handle, do special stuff
1913  // now.
1914  const Value *CondVal = I.getCondition();
1915  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
1916 
1917  // If this is a series of conditions that are or'd or and'd together, emit
1918  // this as a sequence of branches instead of setcc's with and/or operations.
1919  // As long as jumps are not expensive, this should improve performance.
1920  // For example, instead of something like:
1921  // cmp A, B
1922  // C = seteq
1923  // cmp D, E
1924  // F = setle
1925  // or C, F
1926  // jnz foo
1927  // Emit:
1928  // cmp A, B
1929  // je foo
1930  // cmp D, E
1931  // jle foo
1932  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
1933  Instruction::BinaryOps Opcode = BOp->getOpcode();
1934  if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
1936  (Opcode == Instruction::And || Opcode == Instruction::Or)) {
1937  FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
1938  Opcode,
1939  getEdgeProbability(BrMBB, Succ0MBB),
1940  getEdgeProbability(BrMBB, Succ1MBB),
1941  /*InvertCond=*/false);
1942  // If the compares in later blocks need to use values not currently
1943  // exported from this block, export them now. This block should always
1944  // be the first entry.
1945  assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
1946 
1947  // Allow some cases to be rejected.
1948  if (ShouldEmitAsBranches(SwitchCases)) {
1949  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
1950  ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
1951  ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
1952  }
1953 
1954  // Emit the branch for this block.
1955  visitSwitchCase(SwitchCases[0], BrMBB);
1956  SwitchCases.erase(SwitchCases.begin());
1957  return;
1958  }
1959 
1960  // Okay, we decided not to do this, remove any inserted MBB's and clear
1961  // SwitchCases.
1962  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
1963  FuncInfo.MF->erase(SwitchCases[i].ThisBB);
1964 
1965  SwitchCases.clear();
1966  }
1967  }
1968 
1969  // Create a CaseBlock record representing this branch.
1970  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
1971  nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
1972 
1973  // Use visitSwitchCase to actually insert the fast branch sequence for this
1974  // cond branch.
1975  visitSwitchCase(CB, BrMBB);
1976 }
1977 
1978 /// visitSwitchCase - Emits the necessary code to represent a single node in
1979 /// the binary search tree resulting from lowering a switch instruction.
1981  MachineBasicBlock *SwitchBB) {
1982  SDValue Cond;
1983  SDValue CondLHS = getValue(CB.CmpLHS);
1984  SDLoc dl = CB.DL;
1985 
1986  // Build the setcc now.
1987  if (!CB.CmpMHS) {
1988  // Fold "(X == true)" to X and "(X == false)" to !X to
1989  // handle common cases produced by branch lowering.
1990  if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
1991  CB.CC == ISD::SETEQ)
1992  Cond = CondLHS;
1993  else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
1994  CB.CC == ISD::SETEQ) {
1995  SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
1996  Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
1997  } else
1998  Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
1999  } else {
2000  assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2001 
2002  const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
2003  const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
2004 
2005  SDValue CmpOp = getValue(CB.CmpMHS);
2006  EVT VT = CmpOp.getValueType();
2007 
2008  if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2009  Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2010  ISD::SETLE);
2011  } else {
2012  SDValue SUB = DAG.getNode(ISD::SUB, dl,
2013  VT, CmpOp, DAG.getConstant(Low, dl, VT));
2014  Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2015  DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2016  }
2017  }
2018 
2019  // Update successor info
2020  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2021  // TrueBB and FalseBB are always different unless the incoming IR is
2022  // degenerate. This only happens when running llc on weird IR.
2023  if (CB.TrueBB != CB.FalseBB)
2024  addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2025  SwitchBB->normalizeSuccProbs();
2026 
2027  // If the lhs block is the next block, invert the condition so that we can
2028  // fall through to the lhs instead of the rhs block.
2029  if (CB.TrueBB == NextBlock(SwitchBB)) {
2030  std::swap(CB.TrueBB, CB.FalseBB);
2031  SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2032  Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2033  }
2034 
2035  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2036  MVT::Other, getControlRoot(), Cond,
2037  DAG.getBasicBlock(CB.TrueBB));
2038 
2039  // Insert the false branch. Do this even if it's a fall through branch,
2040  // this makes it easier to do DAG optimizations which require inverting
2041  // the branch condition.
2042  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2043  DAG.getBasicBlock(CB.FalseBB));
2044 
2045  DAG.setRoot(BrCond);
2046 }
2047 
2048 /// visitJumpTable - Emit JumpTable node in the current MBB
2050  // Emit the code for the jump table
2051  assert(JT.Reg != -1U && "Should lower JT Header first!");
2053  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2054  JT.Reg, PTy);
2055  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2056  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2057  MVT::Other, Index.getValue(1),
2058  Table, Index);
2059  DAG.setRoot(BrJumpTable);
2060 }
2061 
2062 /// visitJumpTableHeader - This function emits necessary code to produce index
2063 /// in the JumpTable from switch case.
2065  JumpTableHeader &JTH,
2066  MachineBasicBlock *SwitchBB) {
2067  SDLoc dl = getCurSDLoc();
2068 
2069  // Subtract the lowest switch case value from the value being switched on and
2070  // conditional branch to default mbb if the result is greater than the
2071  // difference between smallest and largest cases.
2072  SDValue SwitchOp = getValue(JTH.SValue);
2073  EVT VT = SwitchOp.getValueType();
2074  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2075  DAG.getConstant(JTH.First, dl, VT));
2076 
2077  // The SDNode we just created, which holds the value being switched on minus
2078  // the smallest case value, needs to be copied to a virtual register so it
2079  // can be used as an index into the jump table in a subsequent basic block.
2080  // This value may be smaller or larger than the target's pointer type, and
2081  // therefore require extension or truncating.
2082  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2083  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2084 
2085  unsigned JumpTableReg =
2086  FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2087  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2088  JumpTableReg, SwitchOp);
2089  JT.Reg = JumpTableReg;
2090 
2091  // Emit the range check for the jump table, and branch to the default block
2092  // for the switch statement if the value being switched on exceeds the largest
2093  // case in the switch.
2094  SDValue CMP = DAG.getSetCC(
2095  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2096  Sub.getValueType()),
2097  Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2098 
2099  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2100  MVT::Other, CopyTo, CMP,
2101  DAG.getBasicBlock(JT.Default));
2102 
2103  // Avoid emitting unnecessary branches to the next block.
2104  if (JT.MBB != NextBlock(SwitchBB))
2105  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2106  DAG.getBasicBlock(JT.MBB));
2107 
2108  DAG.setRoot(BrCond);
2109 }
2110 
2111 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2112 /// variable if there exists one.
2114  SDValue &Chain) {
2115  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2116  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2117  MachineFunction &MF = DAG.getMachineFunction();
2118  Value *Global = TLI.getSDagStackGuard(*MF.getFunction()->getParent());
2119  MachineSDNode *Node =
2120  DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2121  if (Global) {
2122  MachinePointerInfo MPInfo(Global);
2126  *MemRefs = MF.getMachineMemOperand(MPInfo, Flags, PtrTy.getSizeInBits() / 8,
2127  DAG.getEVTAlignment(PtrTy));
2128  Node->setMemRefs(MemRefs, MemRefs + 1);
2129  }
2130  return SDValue(Node, 0);
2131 }
2132 
2133 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2134 /// tail spliced into a stack protector check success bb.
2135 ///
2136 /// For a high level explanation of how this fits into the stack protector
2137 /// generation see the comment on the declaration of class
2138 /// StackProtectorDescriptor.
2139 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2140  MachineBasicBlock *ParentBB) {
2141 
2142  // First create the loads to the guard/stack slot for the comparison.
2143  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2144  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2145 
2146  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2147  int FI = MFI.getStackProtectorIndex();
2148 
2149  SDValue Guard;
2150  SDLoc dl = getCurSDLoc();
2151  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2152  const Module &M = *ParentBB->getParent()->getFunction()->getParent();
2153  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2154 
2155  // Generate code to load the content of the guard slot.
2156  SDValue StackSlot = DAG.getLoad(
2157  PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2160 
2161  // Retrieve guard check function, nullptr if instrumentation is inlined.
2162  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2163  // The target provides a guard check function to validate the guard value.
2164  // Generate a call to that function with the content of the guard slot as
2165  // argument.
2166  auto *Fn = cast<Function>(GuardCheck);
2167  FunctionType *FnTy = Fn->getFunctionType();
2168  assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2169 
2172  Entry.Node = StackSlot;
2173  Entry.Ty = FnTy->getParamType(0);
2174  if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2175  Entry.IsInReg = true;
2176  Args.push_back(Entry);
2177 
2179  CLI.setDebugLoc(getCurSDLoc())
2180  .setChain(DAG.getEntryNode())
2181  .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2182  getValue(GuardCheck), std::move(Args));
2183 
2184  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2185  DAG.setRoot(Result.second);
2186  return;
2187  }
2188 
2189  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2190  // Otherwise, emit a volatile load to retrieve the stack guard value.
2191  SDValue Chain = DAG.getEntryNode();
2192  if (TLI.useLoadStackGuardNode()) {
2193  Guard = getLoadStackGuard(DAG, dl, Chain);
2194  } else {
2195  const Value *IRGuard = TLI.getSDagStackGuard(M);
2196  SDValue GuardPtr = getValue(IRGuard);
2197 
2198  Guard =
2199  DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2201  }
2202 
2203  // Perform the comparison via a subtract/getsetcc.
2204  EVT VT = Guard.getValueType();
2205  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, StackSlot);
2206 
2207  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2208  *DAG.getContext(),
2209  Sub.getValueType()),
2210  Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2211 
2212  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2213  // branch to failure MBB.
2214  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2215  MVT::Other, StackSlot.getOperand(0),
2216  Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2217  // Otherwise branch to success MBB.
2218  SDValue Br = DAG.getNode(ISD::BR, dl,
2219  MVT::Other, BrCond,
2220  DAG.getBasicBlock(SPD.getSuccessMBB()));
2221 
2222  DAG.setRoot(Br);
2223 }
2224 
2225 /// Codegen the failure basic block for a stack protector check.
2226 ///
2227 /// A failure stack protector machine basic block consists simply of a call to
2228 /// __stack_chk_fail().
2229 ///
2230 /// For a high level explanation of how this fits into the stack protector
2231 /// generation see the comment on the declaration of class
2232 /// StackProtectorDescriptor.
2233 void
2234 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2235  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2236  SDValue Chain =
2237  TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2238  None, false, getCurSDLoc(), false, false).second;
2239  DAG.setRoot(Chain);
2240 }
2241 
2242 /// visitBitTestHeader - This function emits necessary code to produce value
2243 /// suitable for "bit tests"
2245  MachineBasicBlock *SwitchBB) {
2246  SDLoc dl = getCurSDLoc();
2247 
2248  // Subtract the minimum value
2249  SDValue SwitchOp = getValue(B.SValue);
2250  EVT VT = SwitchOp.getValueType();
2251  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2252  DAG.getConstant(B.First, dl, VT));
2253 
2254  // Check range
2255  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2256  SDValue RangeCmp = DAG.getSetCC(
2257  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2258  Sub.getValueType()),
2259  Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2260 
2261  // Determine the type of the test operands.
2262  bool UsePtrType = false;
2263  if (!TLI.isTypeLegal(VT))
2264  UsePtrType = true;
2265  else {
2266  for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2267  if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2268  // Switch table case range are encoded into series of masks.
2269  // Just use pointer type, it's guaranteed to fit.
2270  UsePtrType = true;
2271  break;
2272  }
2273  }
2274  if (UsePtrType) {
2275  VT = TLI.getPointerTy(DAG.getDataLayout());
2276  Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2277  }
2278 
2279  B.RegVT = VT.getSimpleVT();
2280  B.Reg = FuncInfo.CreateReg(B.RegVT);
2281  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2282 
2283  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2284 
2285  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2286  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2287  SwitchBB->normalizeSuccProbs();
2288 
2289  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2290  MVT::Other, CopyTo, RangeCmp,
2291  DAG.getBasicBlock(B.Default));
2292 
2293  // Avoid emitting unnecessary branches to the next block.
2294  if (MBB != NextBlock(SwitchBB))
2295  BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2296  DAG.getBasicBlock(MBB));
2297 
2298  DAG.setRoot(BrRange);
2299 }
2300 
2301 /// visitBitTestCase - this function produces one "bit test"
2303  MachineBasicBlock* NextMBB,
2304  BranchProbability BranchProbToNext,
2305  unsigned Reg,
2306  BitTestCase &B,
2307  MachineBasicBlock *SwitchBB) {
2308  SDLoc dl = getCurSDLoc();
2309  MVT VT = BB.RegVT;
2310  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2311  SDValue Cmp;
2312  unsigned PopCount = countPopulation(B.Mask);
2313  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2314  if (PopCount == 1) {
2315  // Testing for a single bit; just compare the shift count with what it
2316  // would need to be to shift a 1 bit in that position.
2317  Cmp = DAG.getSetCC(
2318  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2319  ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2320  ISD::SETEQ);
2321  } else if (PopCount == BB.Range) {
2322  // There is only one zero bit in the range, test for it directly.
2323  Cmp = DAG.getSetCC(
2324  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2325  ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2326  ISD::SETNE);
2327  } else {
2328  // Make desired shift
2329  SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2330  DAG.getConstant(1, dl, VT), ShiftOp);
2331 
2332  // Emit bit tests and jumps
2333  SDValue AndOp = DAG.getNode(ISD::AND, dl,
2334  VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2335  Cmp = DAG.getSetCC(
2336  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2337  AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2338  }
2339 
2340  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2341  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2342  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2343  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2344  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2345  // one as they are relative probabilities (and thus work more like weights),
2346  // and hence we need to normalize them to let the sum of them become one.
2347  SwitchBB->normalizeSuccProbs();
2348 
2349  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2350  MVT::Other, getControlRoot(),
2351  Cmp, DAG.getBasicBlock(B.TargetBB));
2352 
2353  // Avoid emitting unnecessary branches to the next block.
2354  if (NextMBB != NextBlock(SwitchBB))
2355  BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2356  DAG.getBasicBlock(NextMBB));
2357 
2358  DAG.setRoot(BrAnd);
2359 }
2360 
2361 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2362  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2363 
2364  // Retrieve successors. Look through artificial IR level blocks like
2365  // catchswitch for successors.
2366  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2367  const BasicBlock *EHPadBB = I.getSuccessor(1);
2368 
2369  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2370  // have to do anything here to lower funclet bundles.
2372  {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2373  "Cannot lower invokes with arbitrary operand bundles yet!");
2374 
2375  const Value *Callee(I.getCalledValue());
2376  const Function *Fn = dyn_cast<Function>(Callee);
2377  if (isa<InlineAsm>(Callee))
2378  visitInlineAsm(&I);
2379  else if (Fn && Fn->isIntrinsic()) {
2380  switch (Fn->getIntrinsicID()) {
2381  default:
2382  llvm_unreachable("Cannot invoke this intrinsic");
2383  case Intrinsic::donothing:
2384  // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2385  break;
2386  case Intrinsic::experimental_patchpoint_void:
2387  case Intrinsic::experimental_patchpoint_i64:
2388  visitPatchpoint(&I, EHPadBB);
2389  break;
2390  case Intrinsic::experimental_gc_statepoint:
2391  LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2392  break;
2393  }
2395  // Currently we do not lower any intrinsic calls with deopt operand bundles.
2396  // Eventually we will support lowering the @llvm.experimental.deoptimize
2397  // intrinsic, and right now there are no plans to support other intrinsics
2398  // with deopt state.
2399  LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2400  } else {
2401  LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2402  }
2403 
2404  // If the value of the invoke is used outside of its defining block, make it
2405  // available as a virtual register.
2406  // We already took care of the exported value for the statepoint instruction
2407  // during call to the LowerStatepoint.
2408  if (!isStatepoint(I)) {
2409  CopyToExportRegsIfNeeded(&I);
2410  }
2411 
2413  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2414  BranchProbability EHPadBBProb =
2415  BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2417  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2418 
2419  // Update successor info.
2420  addSuccessorWithProb(InvokeMBB, Return);
2421  for (auto &UnwindDest : UnwindDests) {
2422  UnwindDest.first->setIsEHPad();
2423  addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2424  }
2425  InvokeMBB->normalizeSuccProbs();
2426 
2427  // Drop into normal successor.
2428  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2429  MVT::Other, getControlRoot(),
2430  DAG.getBasicBlock(Return)));
2431 }
2432 
2433 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2434  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2435 }
2436 
2437 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2438  assert(FuncInfo.MBB->isEHPad() &&
2439  "Call to landingpad not in landing pad!");
2440 
2441  MachineBasicBlock *MBB = FuncInfo.MBB;
2442  addLandingPadInfo(LP, *MBB);
2443 
2444  // If there aren't registers to copy the values into (e.g., during SjLj
2445  // exceptions), then don't bother to create these DAG nodes.
2446  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2447  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2448  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2449  TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2450  return;
2451 
2452  // If landingpad's return type is token type, we don't create DAG nodes
2453  // for its exception pointer and selector value. The extraction of exception
2454  // pointer or selector value from token type landingpads is not currently
2455  // supported.
2456  if (LP.getType()->isTokenTy())
2457  return;
2458 
2460  SDLoc dl = getCurSDLoc();
2461  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2462  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2463 
2464  // Get the two live-in registers as SDValues. The physregs have already been
2465  // copied into virtual registers.
2466  SDValue Ops[2];
2467  if (FuncInfo.ExceptionPointerVirtReg) {
2468  Ops[0] = DAG.getZExtOrTrunc(
2469  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2470  FuncInfo.ExceptionPointerVirtReg,
2471  TLI.getPointerTy(DAG.getDataLayout())),
2472  dl, ValueVTs[0]);
2473  } else {
2474  Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2475  }
2476  Ops[1] = DAG.getZExtOrTrunc(
2477  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2478  FuncInfo.ExceptionSelectorVirtReg,
2479  TLI.getPointerTy(DAG.getDataLayout())),
2480  dl, ValueVTs[1]);
2481 
2482  // Merge into one.
2483  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2484  DAG.getVTList(ValueVTs), Ops);
2485  setValue(&LP, Res);
2486 }
2487 
2488 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2489 #ifndef NDEBUG
2490  for (const CaseCluster &CC : Clusters)
2491  assert(CC.Low == CC.High && "Input clusters must be single-case");
2492 #endif
2493 
2494  std::sort(Clusters.begin(), Clusters.end(),
2495  [](const CaseCluster &a, const CaseCluster &b) {
2496  return a.Low->getValue().slt(b.Low->getValue());
2497  });
2498 
2499  // Merge adjacent clusters with the same destination.
2500  const unsigned N = Clusters.size();
2501  unsigned DstIndex = 0;
2502  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2503  CaseCluster &CC = Clusters[SrcIndex];
2504  const ConstantInt *CaseVal = CC.Low;
2505  MachineBasicBlock *Succ = CC.MBB;
2506 
2507  if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2508  (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2509  // If this case has the same successor and is a neighbour, merge it into
2510  // the previous cluster.
2511  Clusters[DstIndex - 1].High = CaseVal;
2512  Clusters[DstIndex - 1].Prob += CC.Prob;
2513  } else {
2514  std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2515  sizeof(Clusters[SrcIndex]));
2516  }
2517  }
2518  Clusters.resize(DstIndex);
2519 }
2520 
2522  MachineBasicBlock *Last) {
2523  // Update JTCases.
2524  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2525  if (JTCases[i].first.HeaderBB == First)
2526  JTCases[i].first.HeaderBB = Last;
2527 
2528  // Update BitTestCases.
2529  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2530  if (BitTestCases[i].Parent == First)
2531  BitTestCases[i].Parent = Last;
2532 }
2533 
2534 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2535  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2536 
2537  // Update machine-CFG edges with unique successors.
2539  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2540  BasicBlock *BB = I.getSuccessor(i);
2541  bool Inserted = Done.insert(BB).second;
2542  if (!Inserted)
2543  continue;
2544 
2545  MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2546  addSuccessorWithProb(IndirectBrMBB, Succ);
2547  }
2548  IndirectBrMBB->normalizeSuccProbs();
2549 
2550  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2551  MVT::Other, getControlRoot(),
2552  getValue(I.getAddress())));
2553 }
2554 
2555 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2556  if (DAG.getTarget().Options.TrapUnreachable)
2557  DAG.setRoot(
2558  DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2559 }
2560 
2561 void SelectionDAGBuilder::visitFSub(const User &I) {
2562  // -0.0 - X --> fneg
2563  Type *Ty = I.getType();
2564  if (isa<Constant>(I.getOperand(0)) &&
2566  SDValue Op2 = getValue(I.getOperand(1));
2567  setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2568  Op2.getValueType(), Op2));
2569  return;
2570  }
2571 
2572  visitBinary(I, ISD::FSUB);
2573 }
2574 
2575 /// Checks if the given instruction performs a vector reduction, in which case
2576 /// we have the freedom to alter the elements in the result as long as the
2577 /// reduction of them stays unchanged.
2578 static bool isVectorReductionOp(const User *I) {
2579  const Instruction *Inst = dyn_cast<Instruction>(I);
2580  if (!Inst || !Inst->getType()->isVectorTy())
2581  return false;
2582 
2583  auto OpCode = Inst->getOpcode();
2584  switch (OpCode) {
2585  case Instruction::Add:
2586  case Instruction::Mul:
2587  case Instruction::And:
2588  case Instruction::Or:
2589  case Instruction::Xor:
2590  break;
2591  case Instruction::FAdd:
2592  case Instruction::FMul:
2593  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2594  if (FPOp->getFastMathFlags().isFast())
2595  break;
2597  default:
2598  return false;
2599  }
2600 
2601  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2602  unsigned ElemNumToReduce = ElemNum;
2603 
2604  // Do DFS search on the def-use chain from the given instruction. We only
2605  // allow four kinds of operations during the search until we reach the
2606  // instruction that extracts the first element from the vector:
2607  //
2608  // 1. The reduction operation of the same opcode as the given instruction.
2609  //
2610  // 2. PHI node.
2611  //
2612  // 3. ShuffleVector instruction together with a reduction operation that
2613  // does a partial reduction.
2614  //
2615  // 4. ExtractElement that extracts the first element from the vector, and we
2616  // stop searching the def-use chain here.
2617  //
2618  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2619  // from 1-3 to the stack to continue the DFS. The given instruction is not
2620  // a reduction operation if we meet any other instructions other than those
2621  // listed above.
2622 
2623  SmallVector<const User *, 16> UsersToVisit{Inst};
2625  bool ReduxExtracted = false;
2626 
2627  while (!UsersToVisit.empty()) {
2628  auto User = UsersToVisit.back();
2629  UsersToVisit.pop_back();
2630  if (!Visited.insert(User).second)
2631  continue;
2632 
2633  for (const auto &U : User->users()) {
2634  auto Inst = dyn_cast<Instruction>(U);
2635  if (!Inst)
2636  return false;
2637 
2638  if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2639  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2640  if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
2641  return false;
2642  UsersToVisit.push_back(U);
2643  } else if (const ShuffleVectorInst *ShufInst =
2644  dyn_cast<ShuffleVectorInst>(U)) {
2645  // Detect the following pattern: A ShuffleVector instruction together
2646  // with a reduction that do partial reduction on the first and second
2647  // ElemNumToReduce / 2 elements, and store the result in
2648  // ElemNumToReduce / 2 elements in another vector.
2649 
2650  unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2651  if (ResultElements < ElemNum)
2652  return false;
2653 
2654  if (ElemNumToReduce == 1)
2655  return false;
2656  if (!isa<UndefValue>(U->getOperand(1)))
2657  return false;
2658  for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2659  if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2660  return false;
2661  for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2662  if (ShufInst->getMaskValue(i) != -1)
2663  return false;
2664 
2665  // There is only one user of this ShuffleVector instruction, which
2666  // must be a reduction operation.
2667  if (!U->hasOneUse())
2668  return false;
2669 
2670  auto U2 = dyn_cast<Instruction>(*U->user_begin());
2671  if (!U2 || U2->getOpcode() != OpCode)
2672  return false;
2673 
2674  // Check operands of the reduction operation.
2675  if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2676  (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2677  UsersToVisit.push_back(U2);
2678  ElemNumToReduce /= 2;
2679  } else
2680  return false;
2681  } else if (isa<ExtractElementInst>(U)) {
2682  // At this moment we should have reduced all elements in the vector.
2683  if (ElemNumToReduce != 1)
2684  return false;
2685 
2686  const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2687  if (!Val || Val->getZExtValue() != 0)
2688  return false;
2689 
2690  ReduxExtracted = true;
2691  } else
2692  return false;
2693  }
2694  }
2695  return ReduxExtracted;
2696 }
2697 
2698 void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) {
2699  SDValue Op1 = getValue(I.getOperand(0));
2700  SDValue Op2 = getValue(I.getOperand(1));
2701 
2702  bool nuw = false;
2703  bool nsw = false;
2704  bool exact = false;
2705  bool vec_redux = false;
2706  FastMathFlags FMF;
2707 
2708  if (const OverflowingBinaryOperator *OFBinOp =
2709  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2710  nuw = OFBinOp->hasNoUnsignedWrap();
2711  nsw = OFBinOp->hasNoSignedWrap();
2712  }
2713  if (const PossiblyExactOperator *ExactOp =
2714  dyn_cast<const PossiblyExactOperator>(&I))
2715  exact = ExactOp->isExact();
2716  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(&I))
2717  FMF = FPOp->getFastMathFlags();
2718 
2719  if (isVectorReductionOp(&I)) {
2720  vec_redux = true;
2721  DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2722  }
2723 
2724  SDNodeFlags Flags;
2725  Flags.setExact(exact);
2726  Flags.setNoSignedWrap(nsw);
2727  Flags.setNoUnsignedWrap(nuw);
2728  Flags.setVectorReduction(vec_redux);
2729  Flags.setAllowReciprocal(FMF.allowReciprocal());
2730  Flags.setAllowContract(FMF.allowContract());
2731  Flags.setNoInfs(FMF.noInfs());
2732  Flags.setNoNaNs(FMF.noNaNs());
2733  Flags.setNoSignedZeros(FMF.noSignedZeros());
2734  Flags.setUnsafeAlgebra(FMF.isFast());
2735 
2736  SDValue BinNodeValue = DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(),
2737  Op1, Op2, Flags);
2738  setValue(&I, BinNodeValue);
2739 }
2740 
2741 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2742  SDValue Op1 = getValue(I.getOperand(0));
2743  SDValue Op2 = getValue(I.getOperand(1));
2744 
2745  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2746  Op2.getValueType(), DAG.getDataLayout());
2747 
2748  // Coerce the shift amount to the right type if we can.
2749  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2750  unsigned ShiftSize = ShiftTy.getSizeInBits();
2751  unsigned Op2Size = Op2.getValueSizeInBits();
2752  SDLoc DL = getCurSDLoc();
2753 
2754  // If the operand is smaller than the shift count type, promote it.
2755  if (ShiftSize > Op2Size)
2756  Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2757 
2758  // If the operand is larger than the shift count type but the shift
2759  // count type has enough bits to represent any shift value, truncate
2760  // it now. This is a common case and it exposes the truncate to
2761  // optimization early.
2762  else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2763  Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2764  // Otherwise we'll need to temporarily settle for some other convenient
2765  // type. Type legalization will make adjustments once the shiftee is split.
2766  else
2767  Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2768  }
2769 
2770  bool nuw = false;
2771  bool nsw = false;
2772  bool exact = false;
2773 
2774  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2775 
2776  if (const OverflowingBinaryOperator *OFBinOp =
2777  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2778  nuw = OFBinOp->hasNoUnsignedWrap();
2779  nsw = OFBinOp->hasNoSignedWrap();
2780  }
2781  if (const PossiblyExactOperator *ExactOp =
2782  dyn_cast<const PossiblyExactOperator>(&I))
2783  exact = ExactOp->isExact();
2784  }
2785  SDNodeFlags Flags;
2786  Flags.setExact(exact);
2787  Flags.setNoSignedWrap(nsw);
2788  Flags.setNoUnsignedWrap(nuw);
2789  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2790  Flags);
2791  setValue(&I, Res);
2792 }
2793 
2794 void SelectionDAGBuilder::visitSDiv(const User &I) {
2795  SDValue Op1 = getValue(I.getOperand(0));
2796  SDValue Op2 = getValue(I.getOperand(1));
2797 
2798  SDNodeFlags Flags;
2799  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2800  cast<PossiblyExactOperator>(&I)->isExact());
2801  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2802  Op2, Flags));
2803 }
2804 
2805 void SelectionDAGBuilder::visitICmp(const User &I) {
2807  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2808  predicate = IC->getPredicate();
2809  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2810  predicate = ICmpInst::Predicate(IC->getPredicate());
2811  SDValue Op1 = getValue(I.getOperand(0));
2812  SDValue Op2 = getValue(I.getOperand(1));
2813  ISD::CondCode Opcode = getICmpCondCode(predicate);
2814 
2815  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2816  I.getType());
2817  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2818 }
2819 
2820 void SelectionDAGBuilder::visitFCmp(const User &I) {
2822  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2823  predicate = FC->getPredicate();
2824  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2825  predicate = FCmpInst::Predicate(FC->getPredicate());
2826  SDValue Op1 = getValue(I.getOperand(0));
2827  SDValue Op2 = getValue(I.getOperand(1));
2828  ISD::CondCode Condition = getFCmpCondCode(predicate);
2829 
2830  // FIXME: Fcmp instructions have fast-math-flags in IR, so we should use them.
2831  // FIXME: We should propagate the fast-math-flags to the DAG node itself for
2832  // further optimization, but currently FMF is only applicable to binary nodes.
2833  if (TM.Options.NoNaNsFPMath)
2834  Condition = getFCmpCodeWithoutNaN(Condition);
2835  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2836  I.getType());
2837  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2838 }
2839 
2840 // Check if the condition of the select has one use or two users that are both
2841 // selects with the same condition.
2842 static bool hasOnlySelectUsers(const Value *Cond) {
2843  return llvm::all_of(Cond->users(), [](const Value *V) {
2844  return isa<SelectInst>(V);
2845  });
2846 }
2847 
2848 void SelectionDAGBuilder::visitSelect(const User &I) {
2851  ValueVTs);
2852  unsigned NumValues = ValueVTs.size();
2853  if (NumValues == 0) return;
2854 
2855  SmallVector<SDValue, 4> Values(NumValues);
2856  SDValue Cond = getValue(I.getOperand(0));
2857  SDValue LHSVal = getValue(I.getOperand(1));
2858  SDValue RHSVal = getValue(I.getOperand(2));
2859  auto BaseOps = {Cond};
2860  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2862 
2863  // Min/max matching is only viable if all output VTs are the same.
2864  if (std::equal(ValueVTs.begin(), ValueVTs.end(), ValueVTs.begin())) {
2865  EVT VT = ValueVTs[0];
2866  LLVMContext &Ctx = *DAG.getContext();
2867  auto &TLI = DAG.getTargetLoweringInfo();
2868 
2869  // We care about the legality of the operation after it has been type
2870  // legalized.
2871  while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2872  VT != TLI.getTypeToTransformTo(Ctx, VT))
2873  VT = TLI.getTypeToTransformTo(Ctx, VT);
2874 
2875  // If the vselect is legal, assume we want to leave this as a vector setcc +
2876  // vselect. Otherwise, if this is going to be scalarized, we want to see if
2877  // min/max is legal on the scalar type.
2878  bool UseScalarMinMax = VT.isVector() &&
2879  !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2880 
2881  Value *LHS, *RHS;
2882  auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2884  switch (SPR.Flavor) {
2885  case SPF_UMAX: Opc = ISD::UMAX; break;
2886  case SPF_UMIN: Opc = ISD::UMIN; break;
2887  case SPF_SMAX: Opc = ISD::SMAX; break;
2888  case SPF_SMIN: Opc = ISD::SMIN; break;
2889  case SPF_FMINNUM:
2890  switch (SPR.NaNBehavior) {
2891  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2892  case SPNB_RETURNS_NAN: Opc = ISD::FMINNAN; break;
2893  case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2894  case SPNB_RETURNS_ANY: {
2895  if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2896  Opc = ISD::FMINNUM;
2897  else if (TLI.isOperationLegalOrCustom(ISD::FMINNAN, VT))
2898  Opc = ISD::FMINNAN;
2899  else if (UseScalarMinMax)
2900  Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
2902  break;
2903  }
2904  }
2905  break;
2906  case SPF_FMAXNUM:
2907  switch (SPR.NaNBehavior) {
2908  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2909  case SPNB_RETURNS_NAN: Opc = ISD::FMAXNAN; break;
2910  case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
2911  case SPNB_RETURNS_ANY:
2912 
2913  if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
2914  Opc = ISD::FMAXNUM;
2915  else if (TLI.isOperationLegalOrCustom(ISD::FMAXNAN, VT))
2916  Opc = ISD::FMAXNAN;
2917  else if (UseScalarMinMax)
2918  Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
2920  break;
2921  }
2922  break;
2923  default: break;
2924  }
2925 
2926  if (Opc != ISD::DELETED_NODE &&
2927  (TLI.isOperationLegalOrCustom(Opc, VT) ||
2928  (UseScalarMinMax &&
2929  TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
2930  // If the underlying comparison instruction is used by any other
2931  // instruction, the consumed instructions won't be destroyed, so it is
2932  // not profitable to convert to a min/max.
2933  hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
2934  OpCode = Opc;
2935  LHSVal = getValue(LHS);
2936  RHSVal = getValue(RHS);
2937  BaseOps = {};
2938  }
2939  }
2940 
2941  for (unsigned i = 0; i != NumValues; ++i) {
2942  SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
2943  Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
2944  Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
2945  Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
2946  LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
2947  Ops);
2948  }
2949 
2950  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
2951  DAG.getVTList(ValueVTs), Values));
2952 }
2953 
2954 void SelectionDAGBuilder::visitTrunc(const User &I) {
2955  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
2956  SDValue N = getValue(I.getOperand(0));
2957  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2958  I.getType());
2959  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
2960 }
2961 
2962 void SelectionDAGBuilder::visitZExt(const User &I) {
2963  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2964  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
2965  SDValue N = getValue(I.getOperand(0));
2966  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2967  I.getType());
2968  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
2969 }
2970 
2971 void SelectionDAGBuilder::visitSExt(const User &I) {
2972  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
2973  // SExt also can't be a cast to bool for same reason. So, nothing much to do
2974  SDValue N = getValue(I.getOperand(0));
2975  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2976  I.getType());
2977  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
2978 }
2979 
2980 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
2981  // FPTrunc is never a no-op cast, no need to check
2982  SDValue N = getValue(I.getOperand(0));
2983  SDLoc dl = getCurSDLoc();
2984  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2985  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
2986  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
2987  DAG.getTargetConstant(
2988  0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
2989 }
2990 
2991 void SelectionDAGBuilder::visitFPExt(const User &I) {
2992  // FPExt is never a no-op cast, no need to check
2993  SDValue N = getValue(I.getOperand(0));
2994  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2995  I.getType());
2996  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
2997 }
2998 
2999 void SelectionDAGBuilder::visitFPToUI(const User &I) {
3000  // FPToUI is never a no-op cast, no need to check
3001  SDValue N = getValue(I.getOperand(0));
3002  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3003  I.getType());
3004  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3005 }
3006 
3007 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3008  // FPToSI is never a no-op cast, no need to check
3009  SDValue N = getValue(I.getOperand(0));
3010  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3011  I.getType());
3012  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3013 }
3014 
3015 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3016  // UIToFP is never a no-op cast, no need to check
3017  SDValue N = getValue(I.getOperand(0));
3018  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3019  I.getType());
3020  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3021 }
3022 
3023 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3024  // SIToFP is never a no-op cast, no need to check
3025  SDValue N = getValue(I.getOperand(0));
3026  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3027  I.getType());
3028  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3029 }
3030 
3031 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3032  // What to do depends on the size of the integer and the size of the pointer.
3033  // We can either truncate, zero extend, or no-op, accordingly.
3034  SDValue N = getValue(I.getOperand(0));
3035  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3036  I.getType());
3037  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3038 }
3039 
3040 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3041  // What to do depends on the size of the integer and the size of the pointer.
3042  // We can either truncate, zero extend, or no-op, accordingly.
3043  SDValue N = getValue(I.getOperand(0));
3044  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3045  I.getType());
3046  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3047 }
3048 
3049 void SelectionDAGBuilder::visitBitCast(const User &I) {
3050  SDValue N = getValue(I.getOperand(0));
3051  SDLoc dl = getCurSDLoc();
3052  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3053  I.getType());
3054 
3055  // BitCast assures us that source and destination are the same size so this is
3056  // either a BITCAST or a no-op.
3057  if (DestVT != N.getValueType())
3058  setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3059  DestVT, N)); // convert types.
3060  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3061  // might fold any kind of constant expression to an integer constant and that
3062  // is not what we are looking for. Only recognize a bitcast of a genuine
3063  // constant integer as an opaque constant.
3064  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3065  setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3066  /*isOpaque*/true));
3067  else
3068  setValue(&I, N); // noop cast.
3069 }
3070 
3071 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3072  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3073  const Value *SV = I.getOperand(0);
3074  SDValue N = getValue(SV);
3075  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3076 
3077  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3078  unsigned DestAS = I.getType()->getPointerAddressSpace();
3079 
3080  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3081  N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3082 
3083  setValue(&I, N);
3084 }
3085 
3086 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3087  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3088  SDValue InVec = getValue(I.getOperand(0));
3089  SDValue InVal = getValue(I.getOperand(1));
3090  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3091  TLI.getVectorIdxTy(DAG.getDataLayout()));
3092  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3093  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3094  InVec, InVal, InIdx));
3095 }
3096 
3097 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3098  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3099  SDValue InVec = getValue(I.getOperand(0));
3100  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3101  TLI.getVectorIdxTy(DAG.getDataLayout()));
3102  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3103  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3104  InVec, InIdx));
3105 }
3106 
3107 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3108  SDValue Src1 = getValue(I.getOperand(0));
3109  SDValue Src2 = getValue(I.getOperand(1));
3110  SDLoc DL = getCurSDLoc();
3111 
3113  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3114  unsigned MaskNumElts = Mask.size();
3115 
3116  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3117  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3118  EVT SrcVT = Src1.getValueType();
3119  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3120 
3121  if (SrcNumElts == MaskNumElts) {
3122  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3123  return;
3124  }
3125 
3126  // Normalize the shuffle vector since mask and vector length don't match.
3127  if (SrcNumElts < MaskNumElts) {
3128  // Mask is longer than the source vectors. We can use concatenate vector to
3129  // make the mask and vectors lengths match.
3130 
3131  if (MaskNumElts % SrcNumElts == 0) {
3132  // Mask length is a multiple of the source vector length.
3133  // Check if the shuffle is some kind of concatenation of the input
3134  // vectors.
3135  unsigned NumConcat = MaskNumElts / SrcNumElts;
3136  bool IsConcat = true;
3137  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3138  for (unsigned i = 0; i != MaskNumElts; ++i) {
3139  int Idx = Mask[i];
3140  if (Idx < 0)
3141  continue;
3142  // Ensure the indices in each SrcVT sized piece are sequential and that
3143  // the same source is used for the whole piece.
3144  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3145  (ConcatSrcs[i / SrcNumElts] >= 0 &&
3146  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3147  IsConcat = false;
3148  break;
3149  }
3150  // Remember which source this index came from.
3151  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3152  }
3153 
3154  // The shuffle is concatenating multiple vectors together. Just emit
3155  // a CONCAT_VECTORS operation.
3156  if (IsConcat) {
3157  SmallVector<SDValue, 8> ConcatOps;
3158  for (auto Src : ConcatSrcs) {
3159  if (Src < 0)
3160  ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3161  else if (Src == 0)
3162  ConcatOps.push_back(Src1);
3163  else
3164  ConcatOps.push_back(Src2);
3165  }
3166  setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3167  return;
3168  }
3169  }
3170 
3171  unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3172  unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3173  EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3174  PaddedMaskNumElts);
3175 
3176  // Pad both vectors with undefs to make them the same length as the mask.
3177  SDValue UndefVal = DAG.getUNDEF(SrcVT);
3178 
3179  SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3180  SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3181  MOps1[0] = Src1;
3182  MOps2[0] = Src2;
3183 
3184  Src1 = Src1.isUndef()
3185  ? DAG.getUNDEF(PaddedVT)
3186  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3187  Src2 = Src2.isUndef()
3188  ? DAG.getUNDEF(PaddedVT)
3189  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3190 
3191  // Readjust mask for new input vector length.
3192  SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3193  for (unsigned i = 0; i != MaskNumElts; ++i) {
3194  int Idx = Mask[i];
3195  if (Idx >= (int)SrcNumElts)
3196  Idx -= SrcNumElts - PaddedMaskNumElts;
3197  MappedOps[i] = Idx;
3198  }
3199 
3200  SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3201 
3202  // If the concatenated vector was padded, extract a subvector with the
3203  // correct number of elements.
3204  if (MaskNumElts != PaddedMaskNumElts)
3205  Result = DAG.getNode(
3206  ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3207  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3208 
3209  setValue(&I, Result);
3210  return;
3211  }
3212 
3213  if (SrcNumElts > MaskNumElts) {
3214  // Analyze the access pattern of the vector to see if we can extract
3215  // two subvectors and do the shuffle.
3216  int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3217  bool CanExtract = true;
3218  for (int Idx : Mask) {
3219  unsigned Input = 0;
3220  if (Idx < 0)
3221  continue;
3222 
3223  if (Idx >= (int)SrcNumElts) {
3224  Input = 1;
3225  Idx -= SrcNumElts;
3226  }
3227 
3228  // If all the indices come from the same MaskNumElts sized portion of
3229  // the sources we can use extract. Also make sure the extract wouldn't
3230  // extract past the end of the source.
3231  int NewStartIdx = alignDown(Idx, MaskNumElts);
3232  if (NewStartIdx + MaskNumElts > SrcNumElts ||
3233  (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3234  CanExtract = false;
3235  // Make sure we always update StartIdx as we use it to track if all
3236  // elements are undef.
3237  StartIdx[Input] = NewStartIdx;
3238  }
3239 
3240  if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3241  setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3242  return;
3243  }
3244  if (CanExtract) {
3245  // Extract appropriate subvector and generate a vector shuffle
3246  for (unsigned Input = 0; Input < 2; ++Input) {
3247  SDValue &Src = Input == 0 ? Src1 : Src2;
3248  if (StartIdx[Input] < 0)
3249  Src = DAG.getUNDEF(VT);
3250  else {
3251  Src = DAG.getNode(
3252  ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3253  DAG.getConstant(StartIdx[Input], DL,
3254  TLI.getVectorIdxTy(DAG.getDataLayout())));
3255  }
3256  }
3257 
3258  // Calculate new mask.
3259  SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3260  for (int &Idx : MappedOps) {
3261  if (Idx >= (int)SrcNumElts)
3262  Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3263  else if (Idx >= 0)
3264  Idx -= StartIdx[0];
3265  }
3266 
3267  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3268  return;
3269  }
3270  }
3271 
3272  // We can't use either concat vectors or extract subvectors so fall back to
3273  // replacing the shuffle with extract and build vector.
3274  // to insert and build vector.
3275  EVT EltVT = VT.getVectorElementType();
3276  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3278  for (int Idx : Mask) {
3279  SDValue Res;
3280 
3281  if (Idx < 0) {
3282  Res = DAG.getUNDEF(EltVT);
3283  } else {
3284  SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3285  if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3286 
3287  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3288  EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3289  }
3290 
3291  Ops.push_back(Res);
3292  }
3293 
3294  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3295 }
3296 
3297 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3298  ArrayRef<unsigned> Indices;
3299  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3300  Indices = IV->getIndices();
3301  else
3302  Indices = cast<ConstantExpr>(&I)->getIndices();
3303 
3304  const Value *Op0 = I.getOperand(0);
3305  const Value *Op1 = I.getOperand(1);
3306  Type *AggTy = I.getType();
3307  Type *ValTy = Op1->getType();
3308  bool IntoUndef = isa<UndefValue>(Op0);
3309  bool FromUndef = isa<UndefValue>(Op1);
3310 
3311  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3312 
3313  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3314  SmallVector<EVT, 4> AggValueVTs;
3315  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3316  SmallVector<EVT, 4> ValValueVTs;
3317  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3318 
3319  unsigned NumAggValues = AggValueVTs.size();
3320  unsigned NumValValues = ValValueVTs.size();
3321  SmallVector<SDValue, 4> Values(NumAggValues);
3322 
3323  // Ignore an insertvalue that produces an empty object
3324  if (!NumAggValues) {
3325  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3326  return;
3327  }
3328 
3329  SDValue Agg = getValue(Op0);
3330  unsigned i = 0;
3331  // Copy the beginning value(s) from the original aggregate.
3332  for (; i != LinearIndex; ++i)
3333  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3334  SDValue(Agg.getNode(), Agg.getResNo() + i);
3335  // Copy values from the inserted value(s).
3336  if (NumValValues) {
3337  SDValue Val = getValue(Op1);
3338  for (; i != LinearIndex + NumValValues; ++i)
3339  Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3340  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3341  }
3342  // Copy remaining value(s) from the original aggregate.
3343  for (; i != NumAggValues; ++i)
3344  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3345  SDValue(Agg.getNode(), Agg.getResNo() + i);
3346 
3347  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3348  DAG.getVTList(AggValueVTs), Values));
3349 }
3350 
3351 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3352  ArrayRef<unsigned> Indices;
3353  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3354  Indices = EV->getIndices();
3355  else
3356  Indices = cast<ConstantExpr>(&I)->getIndices();
3357 
3358  const Value *Op0 = I.getOperand(0);
3359  Type *AggTy = Op0->getType();
3360  Type *ValTy = I.getType();
3361  bool OutOfUndef = isa<UndefValue>(Op0);
3362 
3363  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3364 
3365  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3366  SmallVector<EVT, 4> ValValueVTs;
3367  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3368 
3369  unsigned NumValValues = ValValueVTs.size();
3370 
3371  // Ignore a extractvalue that produces an empty object
3372  if (!NumValValues) {
3373  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3374  return;
3375  }
3376 
3377  SmallVector<SDValue, 4> Values(NumValValues);
3378 
3379  SDValue Agg = getValue(Op0);
3380  // Copy out the selected value(s).
3381  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3382  Values[i - LinearIndex] =
3383  OutOfUndef ?
3384  DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3385  SDValue(Agg.getNode(), Agg.getResNo() + i);
3386 
3387  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3388  DAG.getVTList(ValValueVTs), Values));
3389 }
3390 
3391 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3392  Value *Op0 = I.getOperand(0);
3393  // Note that the pointer operand may be a vector of pointers. Take the scalar
3394  // element which holds a pointer.
3395  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3396  SDValue N = getValue(Op0);
3397  SDLoc dl = getCurSDLoc();
3398 
3399  // Normalize Vector GEP - all scalar operands should be converted to the
3400  // splat vector.
3401  unsigned VectorWidth = I.getType()->isVectorTy() ?
3402  cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3403 
3404  if (VectorWidth && !N.getValueType().isVector()) {
3405  LLVMContext &Context = *DAG.getContext();
3406  EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3407  N = DAG.getSplatBuildVector(VT, dl, N);
3408  }
3409 
3410  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3411  GTI != E; ++GTI) {
3412  const Value *Idx = GTI.getOperand();
3413  if (StructType *StTy = GTI.getStructTypeOrNull()) {
3414  unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3415  if (Field) {
3416  // N = N + Offset
3417  uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3418 
3419  // In an inbounds GEP with an offset that is nonnegative even when
3420  // interpreted as signed, assume there is no unsigned overflow.
3421  SDNodeFlags Flags;
3422  if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3423  Flags.setNoUnsignedWrap(true);
3424 
3425  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3426  DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3427  }
3428  } else {
3429  MVT PtrTy =
3431  unsigned PtrSize = PtrTy.getSizeInBits();
3432  APInt ElementSize(PtrSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3433 
3434  // If this is a scalar constant or a splat vector of constants,
3435  // handle it quickly.
3436  const auto *CI = dyn_cast<ConstantInt>(Idx);
3437  if (!CI && isa<ConstantDataVector>(Idx) &&
3438  cast<ConstantDataVector>(Idx)->getSplatValue())
3439  CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3440 
3441  if (CI) {
3442  if (CI->isZero())
3443  continue;
3444  APInt Offs = ElementSize * CI->getValue().sextOrTrunc(PtrSize);
3445  LLVMContext &Context = *DAG.getContext();
3446  SDValue OffsVal = VectorWidth ?
3447  DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, PtrTy, VectorWidth)) :
3448  DAG.getConstant(Offs, dl, PtrTy);
3449 
3450  // In an inbouds GEP with an offset that is nonnegative even when
3451  // interpreted as signed, assume there is no unsigned overflow.
3452  SDNodeFlags Flags;
3453  if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3454  Flags.setNoUnsignedWrap(true);
3455 
3456  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3457  continue;
3458  }
3459 
3460  // N = N + Idx * ElementSize;
3461  SDValue IdxN = getValue(Idx);
3462 
3463  if (!IdxN.getValueType().isVector() && VectorWidth) {
3464  EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3465  IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3466  }
3467 
3468  // If the index is smaller or larger than intptr_t, truncate or extend
3469  // it.
3470  IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3471 
3472  // If this is a multiply by a power of two, turn it into a shl
3473  // immediately. This is a very common case.
3474  if (ElementSize != 1) {
3475  if (ElementSize.isPowerOf2()) {
3476  unsigned Amt = ElementSize.logBase2();
3477  IdxN = DAG.getNode(ISD::SHL, dl,
3478  N.getValueType(), IdxN,
3479  DAG.getConstant(Amt, dl, IdxN.getValueType()));
3480  } else {
3481  SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3482  IdxN = DAG.getNode(ISD::MUL, dl,
3483  N.getValueType(), IdxN, Scale);
3484  }
3485  }
3486 
3487  N = DAG.getNode(ISD::ADD, dl,
3488  N.getValueType(), N, IdxN);
3489  }
3490  }
3491 
3492  setValue(&I, N);
3493 }
3494 
3495 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3496  // If this is a fixed sized alloca in the entry block of the function,
3497  // allocate it statically on the stack.
3498  if (FuncInfo.StaticAllocaMap.count(&I))
3499  return; // getValue will auto-populate this.
3500 
3501  SDLoc dl = getCurSDLoc();
3502  Type *Ty = I.getAllocatedType();
3503  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3504  auto &DL = DAG.getDataLayout();
3505  uint64_t TySize = DL.getTypeAllocSize(Ty);
3506  unsigned Align =
3507  std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3508 
3509  SDValue AllocSize = getValue(I.getArraySize());
3510 
3511  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3512  if (AllocSize.getValueType() != IntPtr)
3513  AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3514 
3515  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3516  AllocSize,
3517  DAG.getConstant(TySize, dl, IntPtr));
3518 
3519  // Handle alignment. If the requested alignment is less than or equal to
3520  // the stack alignment, ignore it. If the size is greater than or equal to
3521  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3522  unsigned StackAlign =
3524  if (Align <= StackAlign)
3525  Align = 0;
3526 
3527  // Round the size of the allocation up to the stack alignment size
3528  // by add SA-1 to the size. This doesn't overflow because we're computing
3529  // an address inside an alloca.
3530  SDNodeFlags Flags;
3531  Flags.setNoUnsignedWrap(true);
3532  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3533  DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3534 
3535  // Mask out the low bits for alignment purposes.
3536  AllocSize =
3537  DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3538  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3539 
3540  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3541  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3542  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3543  setValue(&I, DSA);
3544  DAG.setRoot(DSA.getValue(1));
3545 
3546  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3547 }
3548 
3549 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3550  if (I.isAtomic())
3551  return visitAtomicLoad(I);
3552 
3553  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3554  const Value *SV = I.getOperand(0);
3555  if (TLI.supportSwiftError()) {
3556  // Swifterror values can come from either a function parameter with
3557  // swifterror attribute or an alloca with swifterror attribute.
3558  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3559  if (Arg->hasSwiftErrorAttr())
3560  return visitLoadFromSwiftError(I);
3561  }
3562 
3563  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3564  if (Alloca->isSwiftError())
3565  return visitLoadFromSwiftError(I);
3566  }
3567  }
3568 
3569  SDValue Ptr = getValue(SV);
3570 
3571  Type *Ty = I.getType();
3572 
3573  bool isVolatile = I.isVolatile();
3574  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3575  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3576  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3577  unsigned Alignment = I.getAlignment();
3578 
3579  AAMDNodes AAInfo;
3580  I.getAAMetadata(AAInfo);
3581  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3582 
3585  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3586  unsigned NumValues = ValueVTs.size();
3587  if (NumValues == 0)
3588  return;
3589 
3590  SDValue Root;
3591  bool ConstantMemory = false;
3592  if (isVolatile || NumValues > MaxParallelChains)
3593  // Serialize volatile loads with other side effects.
3594  Root = getRoot();
3595  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3596  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3597  // Do not serialize (non-volatile) loads of constant memory with anything.
3598  Root = DAG.getEntryNode();
3599  ConstantMemory = true;
3600  } else {
3601  // Do not serialize non-volatile loads against each other.
3602  Root = DAG.getRoot();
3603  }
3604 
3605  SDLoc dl = getCurSDLoc();
3606 
3607  if (isVolatile)
3608  Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3609 
3610  // An aggregate load cannot wrap around the address space, so offsets to its
3611  // parts don't wrap either.
3612  SDNodeFlags Flags;
3613  Flags.setNoUnsignedWrap(true);
3614 
3615  SmallVector<SDValue, 4> Values(NumValues);
3616  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3617  EVT PtrVT = Ptr.getValueType();
3618  unsigned ChainI = 0;
3619  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3620  // Serializing loads here may result in excessive register pressure, and
3621  // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3622  // could recover a bit by hoisting nodes upward in the chain by recognizing
3623  // they are side-effect free or do not alias. The optimizer should really
3624  // avoid this case by converting large object/array copies to llvm.memcpy
3625  // (MaxParallelChains should always remain as failsafe).
3626  if (ChainI == MaxParallelChains) {
3627  assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3628  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3629  makeArrayRef(Chains.data(), ChainI));
3630  Root = Chain;
3631  ChainI = 0;
3632  }
3633  SDValue A = DAG.getNode(ISD::ADD, dl,
3634  PtrVT, Ptr,
3635  DAG.getConstant(Offsets[i], dl, PtrVT),
3636  Flags);
3637  auto MMOFlags = MachineMemOperand::MONone;
3638  if (isVolatile)
3639  MMOFlags |= MachineMemOperand::MOVolatile;
3640  if (isNonTemporal)
3642  if (isInvariant)
3643  MMOFlags |= MachineMemOperand::MOInvariant;
3644  if (isDereferenceable)
3646  MMOFlags |= TLI.getMMOFlags(I);
3647 
3648  SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3649  MachinePointerInfo(SV, Offsets[i]), Alignment,
3650  MMOFlags, AAInfo, Ranges);
3651 
3652  Values[i] = L;
3653  Chains[ChainI] = L.getValue(1);
3654  }
3655 
3656  if (!ConstantMemory) {
3657  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3658  makeArrayRef(Chains.data(), ChainI));
3659  if (isVolatile)
3660  DAG.setRoot(Chain);
3661  else
3662  PendingLoads.push_back(Chain);
3663  }
3664 
3665  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3666  DAG.getVTList(ValueVTs), Values));
3667 }
3668 
3669 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3671  "call visitStoreToSwiftError when backend supports swifterror");
3672 
3675  const Value *SrcV = I.getOperand(0);
3677  SrcV->getType(), ValueVTs, &Offsets);
3678  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3679  "expect a single EVT for swifterror");
3680 
3681  SDValue Src = getValue(SrcV);
3682  // Create a virtual register, then update the virtual register.
3683  unsigned VReg; bool CreatedVReg;
3684  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3685  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3686  // Chain can be getRoot or getControlRoot.
3687  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3688  SDValue(Src.getNode(), Src.getResNo()));
3689  DAG.setRoot(CopyNode);
3690  if (CreatedVReg)
3691  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3692 }
3693 
3694 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3696  "call visitLoadFromSwiftError when backend supports swifterror");
3697 
3698  assert(!I.isVolatile() &&
3699  I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3701  "Support volatile, non temporal, invariant for load_from_swift_error");
3702 
3703  const Value *SV = I.getOperand(0);
3704  Type *Ty = I.getType();
3705  AAMDNodes AAInfo;
3706  I.getAAMetadata(AAInfo);
3707  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3708  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3709  "load_from_swift_error should not be constant memory");
3710 
3714  ValueVTs, &Offsets);
3715  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3716  "expect a single EVT for swifterror");
3717 
3718  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3719  SDValue L = DAG.getCopyFromReg(
3720  getRoot(), getCurSDLoc(),
3721  FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3722  ValueVTs[0]);
3723 
3724  setValue(&I, L);
3725 }
3726 
3727 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3728  if (I.isAtomic())
3729  return visitAtomicStore(I);
3730 
3731  const Value *SrcV = I.getOperand(0);
3732  const Value *PtrV = I.getOperand(1);
3733 
3734  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3735  if (TLI.supportSwiftError()) {
3736  // Swifterror values can come from either a function parameter with
3737  // swifterror attribute or an alloca with swifterror attribute.
3738  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3739  if (Arg->hasSwiftErrorAttr())
3740  return visitStoreToSwiftError(I);
3741  }
3742 
3743  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3744  if (Alloca->isSwiftError())
3745  return visitStoreToSwiftError(I);
3746  }
3747  }
3748 
3752  SrcV->getType(), ValueVTs, &Offsets);
3753  unsigned NumValues = ValueVTs.size();
3754  if (NumValues == 0)
3755  return;
3756 
3757  // Get the lowered operands. Note that we do this after
3758  // checking if NumResults is zero, because with zero results
3759  // the operands won't have values in the map.
3760  SDValue Src = getValue(SrcV);
3761  SDValue Ptr = getValue(PtrV);
3762 
3763  SDValue Root = getRoot();
3764  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3765  SDLoc dl = getCurSDLoc();
3766  EVT PtrVT = Ptr.getValueType();
3767  unsigned Alignment = I.getAlignment();
3768  AAMDNodes AAInfo;
3769  I.getAAMetadata(AAInfo);
3770 
3771  auto MMOFlags = MachineMemOperand::MONone;
3772  if (I.isVolatile())
3773  MMOFlags |= MachineMemOperand::MOVolatile;
3774  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3776  MMOFlags |= TLI.getMMOFlags(I);
3777 
3778  // An aggregate load cannot wrap around the address space, so offsets to its
3779  // parts don't wrap either.
3780  SDNodeFlags Flags;
3781  Flags.setNoUnsignedWrap(true);
3782 
3783  unsigned ChainI = 0;
3784  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3785  // See visitLoad comments.
3786  if (ChainI == MaxParallelChains) {
3787  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3788  makeArrayRef(Chains.data(), ChainI));
3789  Root = Chain;
3790  ChainI = 0;
3791  }
3792  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3793  DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3794  SDValue St = DAG.getStore(
3795  Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3796  MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3797  Chains[ChainI] = St;
3798  }
3799 
3800  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3801  makeArrayRef(Chains.data(), ChainI));
3802  DAG.setRoot(StoreNode);
3803 }
3804 
3805 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3806  bool IsCompressing) {
3807  SDLoc sdl = getCurSDLoc();
3808 
3809  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3810  unsigned& Alignment) {
3811  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3812  Src0 = I.getArgOperand(0);
3813  Ptr = I.getArgOperand(1);
3814  Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3815  Mask = I.getArgOperand(3);
3816  };
3817  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3818  unsigned& Alignment) {
3819  // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3820  Src0 = I.getArgOperand(0);
3821  Ptr = I.getArgOperand(1);
3822  Mask = I.getArgOperand(2);
3823  Alignment = 0;
3824  };
3825 
3826  Value *PtrOperand, *MaskOperand, *Src0Operand;
3827  unsigned Alignment;
3828  if (IsCompressing)
3829  getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3830  else
3831  getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3832 
3833  SDValue Ptr = getValue(PtrOperand);
3834  SDValue Src0 = getValue(Src0Operand);
3835  SDValue Mask = getValue(MaskOperand);
3836 
3837  EVT VT = Src0.getValueType();
3838  if (!Alignment)
3839  Alignment = DAG.getEVTAlignment(VT);
3840 
3841  AAMDNodes AAInfo;
3842  I.getAAMetadata(AAInfo);
3843 
3844  MachineMemOperand *MMO =
3845  DAG.getMachineFunction().
3846  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3848  Alignment, AAInfo);
3849  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3850  MMO, false /* Truncating */,
3851  IsCompressing);
3852  DAG.setRoot(StoreNode);
3853  setValue(&I, StoreNode);
3854 }
3855 
3856 // Get a uniform base for the Gather/Scatter intrinsic.
3857 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3858 // We try to represent it as a base pointer + vector of indices.
3859 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3860 // The first operand of the GEP may be a single pointer or a vector of pointers
3861 // Example:
3862 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3863 // or
3864 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3865 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3866 //
3867 // When the first GEP operand is a single pointer - it is the uniform base we
3868 // are looking for. If first operand of the GEP is a splat vector - we
3869 // extract the splat value and use it as a uniform base.
3870 // In all other cases the function returns 'false'.
3871 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3872  SelectionDAGBuilder* SDB) {
3873  SelectionDAG& DAG = SDB->DAG;
3874  LLVMContext &Context = *DAG.getContext();
3875 
3876  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3878  if (!GEP)
3879  return false;
3880 
3881  const Value *GEPPtr = GEP->getPointerOperand();
3882  if (!GEPPtr->getType()->isVectorTy())
3883  Ptr = GEPPtr;
3884  else if (!(Ptr = getSplatValue(GEPPtr)))
3885  return false;
3886 
3887  unsigned FinalIndex = GEP->getNumOperands() - 1;
3888  Value *IndexVal = GEP->getOperand(FinalIndex);
3889 
3890  // Ensure all the other indices are 0.
3891  for (unsigned i = 1; i < FinalIndex; ++i) {
3892  auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
3893  if (!C || !C->isZero())
3894  return false;
3895  }
3896 
3897  // The operands of the GEP may be defined in another basic block.
3898  // In this case we'll not find nodes for the operands.
3899  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
3900  return false;
3901 
3902  Base = SDB->getValue(Ptr);
3903  Index = SDB->getValue(IndexVal);
3904 
3905  if (!Index.getValueType().isVector()) {
3906  unsigned GEPWidth = GEP->getType()->getVectorNumElements();
3907  EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
3908  Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
3909  }
3910  return true;
3911 }
3912 
3913 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
3914  SDLoc sdl = getCurSDLoc();
3915 
3916  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
3917  const Value *Ptr = I.getArgOperand(1);
3918  SDValue Src0 = getValue(I.getArgOperand(0));
3919  SDValue Mask = getValue(I.getArgOperand(3));
3920  EVT VT = Src0.getValueType();
3921  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
3922  if (!Alignment)
3923  Alignment = DAG.getEVTAlignment(VT);
3924  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3925 
3926  AAMDNodes AAInfo;
3927  I.getAAMetadata(AAInfo);
3928 
3929  SDValue Base;
3930  SDValue Index;
3931  const Value *BasePtr = Ptr;
3932  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
3933 
3934  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
3936  getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
3937  MachineMemOperand::MOStore, VT.getStoreSize(),
3938  Alignment, AAInfo);
3939  if (!UniformBase) {
3940  Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
3941  Index = getValue(Ptr);
3942  }
3943  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index };
3944  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
3945  Ops, MMO);
3946  DAG.setRoot(Scatter);
3947  setValue(&I, Scatter);
3948 }
3949 
3950 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
3951  SDLoc sdl = getCurSDLoc();
3952 
3953  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3954  unsigned& Alignment) {
3955  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
3956  Ptr = I.getArgOperand(0);
3957  Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
3958  Mask = I.getArgOperand(2);
3959  Src0 = I.getArgOperand(3);
3960  };
3961  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3962  unsigned& Alignment) {
3963  // @llvm.masked.expandload.*(Ptr, Mask, Src0)
3964  Ptr = I.getArgOperand(0);
3965  Alignment = 0;
3966  Mask = I.getArgOperand(1);
3967  Src0 = I.getArgOperand(2);
3968  };
3969 
3970  Value *PtrOperand, *MaskOperand, *Src0Operand;
3971  unsigned Alignment;
3972  if (IsExpanding)
3973  getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3974  else
3975  getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3976 
3977  SDValue Ptr = getValue(PtrOperand);
3978  SDValue Src0 = getValue(Src0Operand);
3979  SDValue Mask = getValue(MaskOperand);
3980 
3981  EVT VT = Src0.getValueType();
3982  if (!Alignment)
3983  Alignment = DAG.getEVTAlignment(VT);
3984 
3985  AAMDNodes AAInfo;
3986  I.getAAMetadata(AAInfo);
3987  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3988 
3989  // Do not serialize masked loads of constant memory with anything.
3990  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
3991  PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
3992  SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
3993 
3994  MachineMemOperand *MMO =
3995  DAG.getMachineFunction().
3996  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3998  Alignment, AAInfo, Ranges);
3999 
4000  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
4001  ISD::NON_EXTLOAD, IsExpanding);
4002  if (AddToChain) {
4003  SDValue OutChain = Load.getValue(1);
4004  DAG.setRoot(OutChain);
4005  }
4006  setValue(&I, Load);
4007 }
4008 
4009 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4010  SDLoc sdl = getCurSDLoc();
4011 
4012  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4013  const Value *Ptr = I.getArgOperand(0);
4014  SDValue Src0 = getValue(I.getArgOperand(3));
4015  SDValue Mask = getValue(I.getArgOperand(2));
4016 
4017  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4018  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4019  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4020  if (!Alignment)
4021  Alignment = DAG.getEVTAlignment(VT);
4022 
4023  AAMDNodes AAInfo;
4024  I.getAAMetadata(AAInfo);
4025  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4026 
4027  SDValue Root = DAG.getRoot();
4028  SDValue Base;
4029  SDValue Index;
4030  const Value *BasePtr = Ptr;
4031  bool UniformBase = getUniformBase(BasePtr, Base, Index, this);
4032  bool ConstantMemory = false;
4033  if (UniformBase &&
4034  AA && AA->pointsToConstantMemory(MemoryLocation(
4035  BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4036  AAInfo))) {
4037  // Do not serialize (non-volatile) loads of constant memory with anything.
4038  Root = DAG.getEntryNode();
4039  ConstantMemory = true;
4040  }
4041 
4042  MachineMemOperand *MMO =
4043  DAG.getMachineFunction().
4044  getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
4046  Alignment, AAInfo, Ranges);
4047 
4048  if (!UniformBase) {
4049  Base = DAG.getTargetConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4050  Index = getValue(Ptr);
4051  }
4052  SDValue Ops[] = { Root, Src0, Mask, Base, Index };
4053  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4054  Ops, MMO);
4055 
4056  SDValue OutChain = Gather.getValue(1);
4057  if (!ConstantMemory)
4058  PendingLoads.push_back(OutChain);
4059  setValue(&I, Gather);
4060 }
4061 
4062 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4063  SDLoc dl = getCurSDLoc();
4064  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4065  AtomicOrdering FailureOrder = I.getFailureOrdering();
4066  SyncScope::ID SSID = I.getSyncScopeID();
4067 
4068  SDValue InChain = getRoot();
4069 
4070  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4071  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4072  SDValue L = DAG.getAtomicCmpSwap(
4073  ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4074  getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4076  /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4077 
4078  SDValue OutChain = L.getValue(2);
4079 
4080  setValue(&I, L);
4081  DAG.setRoot(OutChain);
4082 }
4083 
4084 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4085  SDLoc dl = getCurSDLoc();
4086  ISD::NodeType NT;
4087  switch (I.getOperation()) {
4088  default: llvm_unreachable("Unknown atomicrmw operation");
4089  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4090  case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4091  case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4092  case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4093  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4094  case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4095  case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4096  case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4097  case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4098  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4099  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4100  }
4101  AtomicOrdering Order = I.getOrdering();
4102  SyncScope::ID SSID = I.getSyncScopeID();
4103 
4104  SDValue InChain = getRoot();
4105 
4106  SDValue L =
4107  DAG.getAtomic(NT, dl,
4108  getValue(I.getValOperand()).getSimpleValueType(),
4109  InChain,
4110  getValue(I.getPointerOperand()),
4111  getValue(I.getValOperand()),
4112  I.getPointerOperand(),
4113  /* Alignment=*/ 0, Order, SSID);
4114 
4115  SDValue OutChain = L.getValue(1);
4116 
4117  setValue(&I, L);
4118  DAG.setRoot(OutChain);
4119 }
4120 
4121 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4122  SDLoc dl = getCurSDLoc();
4123  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4124  SDValue Ops[3];
4125  Ops[0] = getRoot();
4126  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4127  TLI.getFenceOperandTy(DAG.getDataLayout()));
4128  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4129  TLI.getFenceOperandTy(DAG.getDataLayout()));
4130  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4131 }
4132 
4133 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4134  SDLoc dl = getCurSDLoc();
4135  AtomicOrdering Order = I.getOrdering();
4136  SyncScope::ID SSID = I.getSyncScopeID();
4137 
4138  SDValue InChain = getRoot();
4139 
4140  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4141  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4142 
4143  if (I.getAlignment() < VT.getSizeInBits() / 8)
4144  report_fatal_error("Cannot generate unaligned atomic load");
4145 
4146  MachineMemOperand *MMO =
4147  DAG.getMachineFunction().
4148  getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4151  VT.getStoreSize(),
4152  I.getAlignment() ? I.getAlignment() :
4153  DAG.getEVTAlignment(VT),
4154  AAMDNodes(), nullptr, SSID, Order);
4155 
4156  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4157  SDValue L =
4158  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4159  getValue(I.getPointerOperand()), MMO);
4160 
4161  SDValue OutChain = L.getValue(1);
4162 
4163  setValue(&I, L);
4164  DAG.setRoot(OutChain);
4165 }
4166 
4167 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4168  SDLoc dl = getCurSDLoc();
4169 
4170  AtomicOrdering Order = I.getOrdering();
4171  SyncScope::ID SSID = I.getSyncScopeID();
4172 
4173  SDValue InChain = getRoot();
4174 
4175  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4176  EVT VT =
4178 
4179  if (I.getAlignment() < VT.getSizeInBits() / 8)
4180  report_fatal_error("Cannot generate unaligned atomic store");
4181 
4182  SDValue OutChain =
4183  DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4184  InChain,
4185  getValue(I.getPointerOperand()),
4186  getValue(I.getValueOperand()),
4188  Order, SSID);
4189 
4190  DAG.setRoot(OutChain);
4191 }
4192 
4193 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4194 /// node.
4195 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4196  unsigned Intrinsic) {
4197  // Ignore the callsite's attributes. A specific call site may be marked with
4198  // readnone, but the lowering code will expect the chain based on the
4199  // definition.
4200  const Function *F = I.getCalledFunction();
4201  bool HasChain = !F->doesNotAccessMemory();
4202  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4203 
4204  // Build the operand list.
4206  if (HasChain) { // If this intrinsic has side-effects, chainify it.
4207  if (OnlyLoad) {
4208  // We don't need to serialize loads against other loads.
4209  Ops.push_back(DAG.getRoot());
4210  } else {
4211  Ops.push_back(getRoot());
4212  }
4213  }
4214 
4215  // Info is set by getTgtMemInstrinsic
4216  TargetLowering::IntrinsicInfo Info;
4217  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4218  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic);
4219 
4220  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4221  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4222  Info.opc == ISD::INTRINSIC_W_CHAIN)
4223  Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4224  TLI.getPointerTy(DAG.getDataLayout())));
4225 
4226  // Add all operands of the call to the operand list.
4227  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4228  SDValue Op = getValue(I.getArgOperand(i));
4229  Ops.push_back(Op);
4230  }
4231 
4233  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4234 
4235  if (HasChain)
4236  ValueVTs.push_back(MVT::Other);
4237 
4238  SDVTList VTs = DAG.getVTList(ValueVTs);
4239 
4240  // Create the node.
4241  SDValue Result;
4242  if (IsTgtIntrinsic) {
4243  // This is target intrinsic that touches memory
4244  Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(),
4245  VTs, Ops, Info.memVT,
4246  MachinePointerInfo(Info.ptrVal, Info.offset),
4247  Info.align, Info.vol,
4248  Info.readMem, Info.writeMem, Info.size);
4249  } else if (!HasChain) {
4250  Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4251  } else if (!I.getType()->isVoidTy()) {
4252  Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4253  } else {
4254  Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4255  }
4256 
4257  if (HasChain) {
4258  SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4259  if (OnlyLoad)
4260  PendingLoads.push_back(Chain);
4261  else
4262  DAG.setRoot(Chain);
4263  }
4264 
4265  if (!I.getType()->isVoidTy()) {
4266  if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4267  EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4268  Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4269  } else
4270  Result = lowerRangeToAssertZExt(DAG, I, Result);
4271 
4272  setValue(&I, Result);
4273  }
4274 }
4275 
4276 /// GetSignificand - Get the significand and build it into a floating-point
4277 /// number with exponent of 1:
4278 ///
4279 /// Op = (Op & 0x007fffff) | 0x3f800000;
4280 ///
4281 /// where Op is the hexadecimal representation of floating point value.
4283  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4284  DAG.getConstant(0x007fffff, dl, MVT::i32));
4285  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4286  DAG.getConstant(0x3f800000, dl, MVT::i32));
4287  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4288 }
4289 
4290 /// GetExponent - Get the exponent:
4291 ///
4292 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4293 ///
4294 /// where Op is the hexadecimal representation of floating point value.
4296  const TargetLowering &TLI, const SDLoc &dl) {
4297  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4298  DAG.getConstant(0x7f800000, dl, MVT::i32));
4299  SDValue t1 = DAG.getNode(
4300  ISD::SRL, dl, MVT::i32, t0,
4301  DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4302  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4303  DAG.getConstant(127, dl, MVT::i32));
4304  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4305 }
4306 
4307 /// getF32Constant - Get 32-bit floating point constant.
4308 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4309  const SDLoc &dl) {
4310  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4311  MVT::f32);
4312 }
4313 
4315  SelectionDAG &DAG) {
4316  // TODO: What fast-math-flags should be set on the floating-point nodes?
4317 
4318  // IntegerPartOfX = ((int32_t)(t0);
4319  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4320 
4321  // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4322  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4323  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4324 
4325  // IntegerPartOfX <<= 23;
4326  IntegerPartOfX = DAG.getNode(
4327  ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4329  DAG.getDataLayout())));
4330 
4331  SDValue TwoToFractionalPartOfX;
4332  if (LimitFloatPrecision <= 6) {
4333  // For floating-point precision of 6:
4334  //
4335  // TwoToFractionalPartOfX =
4336  // 0.997535578f +
4337  // (0.735607626f + 0.252464424f * x) * x;
4338  //
4339  // error 0.0144103317, which is 6 bits
4340  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4341  getF32Constant(DAG, 0x3e814304, dl));
4342  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4343  getF32Constant(DAG, 0x3f3c50c8, dl));
4344  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4345  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4346  getF32Constant(DAG, 0x3f7f5e7e, dl));
4347  } else if (LimitFloatPrecision <= 12) {
4348  // For floating-point precision of 12:
4349  //
4350  // TwoToFractionalPartOfX =
4351  // 0.999892986f +
4352  // (0.696457318f +
4353  // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4354  //
4355  // error 0.000107046256, which is 13 to 14 bits
4356  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4357  getF32Constant(DAG, 0x3da235e3, dl));
4358  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4359  getF32Constant(DAG, 0x3e65b8f3, dl));
4360  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4361  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4362  getF32Constant(DAG, 0x3f324b07, dl));
4363  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4364  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4365  getF32Constant(DAG, 0x3f7ff8fd, dl));
4366  } else { // LimitFloatPrecision <= 18
4367  // For floating-point precision of 18:
4368  //
4369  // TwoToFractionalPartOfX =
4370  // 0.999999982f +
4371  // (0.693148872f +
4372  // (0.240227044f +
4373  // (0.554906021e-1f +
4374  // (0.961591928e-2f +
4375  // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4376  // error 2.47208000*10^(-7), which is better than 18 bits
4377  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4378  getF32Constant(DAG, 0x3924b03e, dl));
4379  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4380  getF32Constant(DAG, 0x3ab24b87, dl));
4381  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4382  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4383  getF32Constant(DAG, 0x3c1d8c17, dl));
4384  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4385  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4386  getF32Constant(DAG, 0x3d634a1d, dl));
4387  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4388  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4389  getF32Constant(DAG, 0x3e75fe14, dl));
4390  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4391  SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4392  getF32Constant(DAG, 0x3f317234, dl));
4393  SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4394  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4395  getF32Constant(DAG, 0x3f800000, dl));
4396  }
4397 
4398  // Add the exponent into the result in integer domain.
4399  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4400  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4401  DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4402 }
4403 
4404 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4405 /// limited-precision mode.
4406 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4407  const TargetLowering &TLI) {
4408  if (Op.getValueType() == MVT::f32 &&
4409  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4410 
4411  // Put the exponent in the right bit position for later addition to the
4412  // final result:
4413  //
4414  // #define LOG2OFe 1.4426950f
4415  // t0 = Op * LOG2OFe
4416 
4417  // TODO: What fast-math-flags should be set here?
4418  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4419  getF32Constant(DAG, 0x3fb8aa3b, dl));
4420  return getLimitedPrecisionExp2(t0, dl, DAG);
4421  }
4422 
4423  // No special expansion.
4424  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4425 }
4426 
4427 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4428 /// limited-precision mode.
4429 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4430  const TargetLowering &TLI) {
4431  // TODO: What fast-math-flags should be set on the floating-point nodes?
4432 
4433  if (Op.getValueType() == MVT::f32 &&
4434  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4435  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4436 
4437  // Scale the exponent by log(2) [0.69314718f].
4438  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4439  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4440  getF32Constant(DAG, 0x3f317218, dl));
4441 
4442  // Get the significand and build it into a floating-point number with
4443  // exponent of 1.
4444  SDValue X = GetSignificand(DAG, Op1, dl);
4445 
4446  SDValue LogOfMantissa;
4447  if (LimitFloatPrecision <= 6) {
4448  // For floating-point precision of 6:
4449  //
4450  // LogofMantissa =
4451  // -1.1609546f +
4452  // (1.4034025f - 0.23903021f * x) * x;
4453  //
4454  // error 0.0034276066, which is better than 8 bits
4455  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4456  getF32Constant(DAG, 0xbe74c456, dl));
4457  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4458  getF32Constant(DAG, 0x3fb3a2b1, dl));
4459  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4460  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4461  getF32Constant(DAG, 0x3f949a29, dl));
4462  } else if (LimitFloatPrecision <= 12) {
4463  // For floating-point precision of 12:
4464  //
4465  // LogOfMantissa =
4466  // -1.7417939f +
4467  // (2.8212026f +
4468  // (-1.4699568f +
4469  // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4470  //
4471  // error 0.000061011436, which is 14 bits
4472  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4473  getF32Constant(DAG, 0xbd67b6d6, dl));
4474  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4475  getF32Constant(DAG, 0x3ee4f4b8, dl));
4476  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4477  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4478  getF32Constant(DAG, 0x3fbc278b, dl));
4479  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4480  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4481  getF32Constant(DAG, 0x40348e95, dl));
4482  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4483  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4484  getF32Constant(DAG, 0x3fdef31a, dl));
4485  } else { // LimitFloatPrecision <= 18
4486  // For floating-point precision of 18:
4487  //
4488  // LogOfMantissa =
4489  // -2.1072184f +
4490  // (4.2372794f +
4491  // (-3.7029485f +
4492  // (2.2781945f +
4493  // (-0.87823314f +
4494  // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4495  //
4496  // error 0.0000023660568, which is better than 18 bits
4497  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4498  getF32Constant(DAG, 0xbc91e5ac, dl));
4499  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4500  getF32Constant(DAG, 0x3e4350aa, dl));
4501  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4502  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4503  getF32Constant(DAG, 0x3f60d3e3, dl));
4504  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4505  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4506  getF32Constant(DAG, 0x4011cdf0, dl));
4507  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4508  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4509  getF32Constant(DAG, 0x406cfd1c, dl));
4510  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4511  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4512  getF32Constant(DAG, 0x408797cb, dl));
4513  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4514  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4515  getF32Constant(DAG, 0x4006dcab, dl));
4516  }
4517 
4518  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4519  }
4520 
4521  // No special expansion.
4522  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4523 }
4524 
4525 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4526 /// limited-precision mode.
4527 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4528  const TargetLowering &TLI) {
4529  // TODO: What fast-math-flags should be set on the floating-point nodes?
4530 
4531  if (Op.getValueType() == MVT::f32 &&
4532  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4533  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4534 
4535  // Get the exponent.
4536  SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4537 
4538  // Get the significand and build it into a floating-point number with
4539  // exponent of 1.
4540  SDValue X = GetSignificand(DAG, Op1, dl);
4541 
4542  // Different possible minimax approximations of significand in
4543  // floating-point for various degrees of accuracy over [1,2].
4544  SDValue Log2ofMantissa;
4545  if (LimitFloatPrecision <= 6) {
4546  // For floating-point precision of 6:
4547  //
4548  // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4549  //
4550  // error 0.0049451742, which is more than 7 bits
4551  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4552  getF32Constant(DAG, 0xbeb08fe0, dl));
4553  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4554  getF32Constant(DAG, 0x40019463, dl));
4555  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4556  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4557  getF32Constant(DAG, 0x3fd6633d, dl));
4558  } else if (LimitFloatPrecision <= 12) {
4559  // For floating-point precision of 12:
4560  //
4561  // Log2ofMantissa =
4562  // -2.51285454f +
4563  // (4.07009056f +
4564  // (-2.12067489f +
4565  // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4566  //
4567  // error 0.0000876136000, which is better than 13 bits
4568  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4569  getF32Constant(DAG, 0xbda7262e, dl));
4570  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4571  getF32Constant(DAG, 0x3f25280b, dl));
4572  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4573  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4574  getF32Constant(DAG, 0x4007b923, dl));
4575  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4576  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4577  getF32Constant(DAG, 0x40823e2f, dl));
4578  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4579  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4580  getF32Constant(DAG, 0x4020d29c, dl));
4581  } else { // LimitFloatPrecision <= 18
4582  // For floating-point precision of 18:
4583  //
4584  // Log2ofMantissa =
4585  // -3.0400495f +
4586  // (6.1129976f +
4587  // (-5.3420409f +
4588  // (3.2865683f +
4589  // (-1.2669343f +
4590  // (0.27515199f -
4591  // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4592  //
4593  // error 0.0000018516, which is better than 18 bits
4594  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4595  getF32Constant(DAG, 0xbcd2769e, dl));
4596  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4597  getF32Constant(DAG, 0x3e8ce0b9, dl));
4598  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4599  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4600  getF32Constant(DAG, 0x3fa22ae7, dl));
4601  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4602  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4603  getF32Constant(DAG, 0x40525723, dl));
4604  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4605  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4606  getF32Constant(DAG, 0x40aaf200, dl));
4607  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4608  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4609  getF32Constant(DAG, 0x40c39dad, dl));
4610  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4611  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4612  getF32Constant(DAG, 0x4042902c, dl));
4613  }
4614 
4615  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4616  }
4617 
4618  // No special expansion.
4619  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4620 }
4621 
4622 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4623 /// limited-precision mode.
4625  const TargetLowering &TLI) {
4626  // TODO: What fast-math-flags should be set on the floating-point nodes?
4627 
4628  if (Op.getValueType() == MVT::f32 &&
4629  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4630  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4631 
4632  // Scale the exponent by log10(2) [0.30102999f].
4633  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4634  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4635  getF32Constant(DAG, 0x3e9a209a, dl));
4636 
4637  // Get the significand and build it into a floating-point number with
4638  // exponent of 1.
4639  SDValue X = GetSignificand(DAG, Op1, dl);
4640 
4641  SDValue Log10ofMantissa;
4642  if (LimitFloatPrecision <= 6) {
4643  // For floating-point precision of 6:
4644  //
4645  // Log10ofMantissa =
4646  // -0.50419619f +
4647  // (0.60948995f - 0.10380950f * x) * x;
4648  //
4649  // error 0.0014886165, which is 6 bits
4650  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4651  getF32Constant(DAG, 0xbdd49a13, dl));
4652  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4653  getF32Constant(DAG, 0x3f1c0789, dl));
4654  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4655  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4656  getF32Constant(DAG, 0x3f011300, dl));
4657  } else if (LimitFloatPrecision <= 12) {
4658  // For floating-point precision of 12:
4659  //
4660  // Log10ofMantissa =
4661  // -0.64831180f +
4662  // (0.91751397f +
4663  // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4664  //
4665  // error 0.00019228036, which is better than 12 bits
4666  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4667  getF32Constant(DAG, 0x3d431f31, dl));
4668  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4669  getF32Constant(DAG, 0x3ea21fb2, dl));
4670  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4671  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4672  getF32Constant(DAG, 0x3f6ae232, dl));
4673  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4674  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4675  getF32Constant(DAG, 0x3f25f7c3, dl));
4676  } else { // LimitFloatPrecision <= 18
4677  // For floating-point precision of 18:
4678  //
4679  // Log10ofMantissa =
4680  // -0.84299375f +
4681  // (1.5327582f +
4682  // (-1.0688956f +
4683  // (0.49102474f +
4684  // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4685  //
4686  // error 0.0000037995730, which is better than 18 bits
4687  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4688  getF32Constant(DAG, 0x3c5d51ce, dl));
4689  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4690  getF32Constant(DAG, 0x3e00685a, dl));
4691  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4692  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4693  getF32Constant(DAG, 0x3efb6798, dl));
4694  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4695  SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4696  getF32Constant(DAG, 0x3f88d192, dl));
4697  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4698  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4699  getF32Constant(DAG, 0x3fc4316c, dl));
4700  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4701  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4702  getF32Constant(DAG, 0x3f57ce70, dl));
4703  }
4704 
4705  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4706  }
4707 
4708  // No special expansion.
4709  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4710 }
4711 
4712 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4713 /// limited-precision mode.
4714 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4715  const TargetLowering &TLI) {
4716  if (Op.getValueType() == MVT::f32 &&
4717  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4718  return getLimitedPrecisionExp2(Op, dl, DAG);
4719 
4720  // No special expansion.
4721  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4722 }
4723 
4724 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4725 /// limited-precision mode with x == 10.0f.
4726 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4727  SelectionDAG &DAG, const TargetLowering &TLI) {
4728  bool IsExp10 = false;
4729  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4730  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4731  if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4732  APFloat Ten(10.0f);
4733  IsExp10 = LHSC->isExactlyValue(Ten);
4734  }
4735  }
4736 
4737  // TODO: What fast-math-flags should be set on the FMUL node?
4738  if (IsExp10) {
4739  // Put the exponent in the right bit position for later addition to the
4740  // final result:
4741  //
4742  // #define LOG2OF10 3.3219281f
4743  // t0 = Op * LOG2OF10;
4744  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4745  getF32Constant(DAG, 0x40549a78, dl));
4746  return getLimitedPrecisionExp2(t0, dl, DAG);
4747  }
4748 
4749  // No special expansion.
4750  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4751 }
4752 
4753 /// ExpandPowI - Expand a llvm.powi intrinsic.
4754 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4755  SelectionDAG &DAG) {
4756  // If RHS is a constant, we can expand this out to a multiplication tree,
4757  // otherwise we end up lowering to a call to __powidf2 (for example). When
4758  // optimizing for size, we only want to do this if the expansion would produce
4759  // a small number of multiplies, otherwise we do the full expansion.
4760  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4761  // Get the exponent as a positive value.
4762  unsigned Val = RHSC->getSExtValue();
4763  if ((int)Val < 0) Val = -Val;
4764 
4765  // powi(x, 0) -> 1.0
4766  if (Val == 0)
4767  return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4768 
4769  const Function *F = DAG.getMachineFunction().getFunction();
4770  if (!F->optForSize() ||
4771  // If optimizing for size, don't insert too many multiplies.
4772  // This inserts up to 5 multiplies.
4773  countPopulation(Val) + Log2_32(Val) < 7) {
4774  // We use the simple binary decomposition method to generate the multiply
4775  // sequence. There are more optimal ways to do this (for example,
4776  // powi(x,15) generates one more multiply than it should), but this has
4777  // the benefit of being both really simple and much better than a libcall.
4778  SDValue Res; // Logically starts equal to 1.0
4779  SDValue CurSquare = LHS;
4780  // TODO: Intrinsics should have fast-math-flags that propagate to these
4781  // nodes.
4782  while (Val) {
4783  if (Val & 1) {
4784  if (Res.getNode())
4785  Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4786  else
4787  Res = CurSquare; // 1.0*CurSquare.
4788  }
4789 
4790  CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4791  CurSquare, CurSquare);
4792  Val >>= 1;
4793  }
4794 
4795  // If the original was negative, invert the result, producing 1/(x*x*x).
4796  if (RHSC->getSExtValue() < 0)
4797  Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4798  DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4799  return Res;
4800  }
4801  }
4802 
4803  // Otherwise, expand to a libcall.
4804  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4805 }
4806 
4807 // getUnderlyingArgReg - Find underlying register used for a truncated or
4808 // bitcasted argument.
4809 static unsigned getUnderlyingArgReg(const SDValue &N) {
4810  switch (N.getOpcode()) {
4811  case ISD::CopyFromReg:
4812  return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4813  case ISD::BITCAST:
4814  case ISD::AssertZext:
4815  case ISD::AssertSext:
4816  case ISD::TRUNCATE:
4817  return getUnderlyingArgReg(N.getOperand(0));
4818  default:
4819  return 0;
4820  }
4821 }
4822 
4823 /// If the DbgValueInst is a dbg_value of a function argument, create the
4824 /// corresponding DBG_VALUE machine instruction for it now. At the end of
4825 /// instruction selection, they will be inserted to the entry BB.
4826 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4827  const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4828  DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4829  const Argument *Arg = dyn_cast<Argument>(V);
4830  if (!Arg)
4831  return false;
4832 
4833  MachineFunction &MF = DAG.getMachineFunction();
4834  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4835 
4836  bool IsIndirect = false;
4838  // Some arguments' frame index is recorded during argument lowering.
4839  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4840  if (FI != std::numeric_limits<int>::max())
4841  Op = MachineOperand::CreateFI(FI);
4842 
4843  if (!Op && N.getNode()) {
4844  unsigned Reg = getUnderlyingArgReg(N);
4845  if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4846  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4847  unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4848  if (PR)
4849  Reg = PR;
4850  }
4851  if (Reg) {
4852  Op = MachineOperand::CreateReg(Reg, false);
4853  IsIndirect = IsDbgDeclare;
4854  }
4855  }
4856 
4857  if (!Op) {
4858  // Check if ValueMap has reg number.
4859  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4860  if (VMI != FuncInfo.ValueMap.end()) {
4861  const auto &TLI = DAG.getTargetLoweringInfo();
4862  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4863  V->getType(), isABIRegCopy(V));
4864  unsigned NumRegs =
4865  std::accumulate(RFV.RegCount.begin(), RFV.RegCount.end(), 0);
4866  if (NumRegs > 1) {
4867  unsigned I = 0;
4868  unsigned Offset = 0;
4869  auto RegisterVT = RFV.RegVTs.begin();
4870  for (auto RegCount : RFV.RegCount) {
4871  unsigned RegisterSize = (RegisterVT++)->getSizeInBits();
4872  for (unsigned E = I + RegCount; I != E; ++I) {
4873  // The vregs are guaranteed to be allocated in sequence.
4874  Op = MachineOperand::CreateReg(VMI->second + I, false);
4875  auto FragmentExpr = DIExpression::createFragmentExpression(
4876  Expr, Offset, RegisterSize);
4877  if (!FragmentExpr)
4878  continue;
4879  FuncInfo.ArgDbgValues.push_back(
4880  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4881  Op->getReg(), Variable, *FragmentExpr));
4882  Offset += RegisterSize;
4883  }
4884  }
4885  return true;
4886  }
4887  Op = MachineOperand::CreateReg(VMI->second, false);
4888  IsIndirect = IsDbgDeclare;
4889  }
4890  }
4891 
4892  if (!Op && N.getNode())
4893  // Check if frame index is available.
4894  if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4895  if (FrameIndexSDNode *FINode =
4896  dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4897  Op = MachineOperand::CreateFI(FINode->getIndex());
4898 
4899  if (!Op)
4900  return false;
4901 
4902  assert(Variable->isValidLocationForIntrinsic(DL) &&
4903  "Expected inlined-at fields to agree");
4904  if (Op->isReg())
4905  FuncInfo.ArgDbgValues.push_back(
4906  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsIndirect,
4907  Op->getReg(), Variable, Expr));
4908  else
4909  FuncInfo.ArgDbgValues.push_back(
4910  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE))
4911  .add(*Op)
4912  .addImm(0)
4913  .addMetadata(Variable)
4914  .addMetadata(Expr));
4915 
4916  return true;
4917 }
4918 
4919 /// Return the appropriate SDDbgValue based on N.
4920 SDDbgValue *SelectionDAGBuilder::getDbgValue(SDValue N,
4921  DILocalVariable *Variable,
4922  DIExpression *Expr,
4923  const DebugLoc &dl,
4924  unsigned DbgSDNodeOrder) {
4925  if (auto *FISDN = dyn_cast<FrameIndexSDNode>(N.getNode())) {
4926  // Construct a FrameIndexDbgValue for FrameIndexSDNodes so we can describe
4927  // stack slot locations as such instead of as indirectly addressed
4928  // locations.
4929  return DAG.getFrameIndexDbgValue(Variable, Expr, FISDN->getIndex(), dl,
4930  DbgSDNodeOrder);
4931  }
4932  return DAG.getDbgValue(Variable, Expr, N.getNode(), N.getResNo(), false, dl,
4933  DbgSDNodeOrder);
4934 }
4935 
4936 // VisualStudio defines setjmp as _setjmp
4937 #if defined(_MSC_VER) && defined(setjmp) && \
4938  !defined(setjmp_undefined_for_msvc)
4939 # pragma push_macro("setjmp")
4940 # undef setjmp
4941 # define setjmp_undefined_for_msvc
4942 #endif
4943 
4944 /// Lower the call to the specified intrinsic function. If we want to emit this
4945 /// as a call to a named external function, return the name. Otherwise, lower it
4946 /// and return null.
4947 const char *
4948 SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) {
4949  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4950  SDLoc sdl = getCurSDLoc();
4951  DebugLoc dl = getCurDebugLoc();
4952  SDValue Res;
4953 
4954  switch (Intrinsic) {
4955  default:
4956  // By default, turn this into a target intrinsic node.
4957  visitTargetIntrinsic(I, Intrinsic);
4958  return nullptr;
4959  case Intrinsic::vastart: visitVAStart(I); return nullptr;
4960  case Intrinsic::vaend: visitVAEnd(I); return nullptr;
4961  case Intrinsic::vacopy: visitVACopy(I); return nullptr;
4962  case Intrinsic::returnaddress:
4963  setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl,
4964  TLI.getPointerTy(DAG.getDataLayout()),
4965  getValue(I.getArgOperand(0))));
4966  return nullptr;
4967  case Intrinsic::addressofreturnaddress:
4968  setValue(&I, DAG.getNode(ISD::ADDROFRETURNADDR, sdl,
4969  TLI.getPointerTy(DAG.getDataLayout())));
4970  return nullptr;
4971  case Intrinsic::frameaddress:
4972  setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl,
4973  TLI.getPointerTy(DAG.getDataLayout()),
4974  getValue(I.getArgOperand(0))));
4975  return nullptr;
4976  case Intrinsic::read_register: {
4977  Value *Reg = I.getArgOperand(0);
4978  SDValue Chain = getRoot();
4979  SDValue RegName =
4980  DAG.getMDNode(cast<MDNode>(cast<MetadataAsValue>(Reg)->getMetadata()));
4981  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4982  Res = DAG.getNode(ISD::READ_REGISTER, sdl,
4983  DAG.getVTList(VT, MVT::Other), Chain, RegName);
4984  setValue(&I, Res);
4985  DAG.setRoot(Res.getValue(1));
4986  return nullptr;
4987  }
4988  case Intrinsic::write_register: {
4989  Value *Reg = I.getArgOperand(0);
4990  Value *RegValue = I.getArgOperand(1);
4991  SDValue Chain = getRoot();
4992  SDValue RegName =