LLVM  8.0.0svn
SelectionDAGBuilder.cpp
Go to the documentation of this file.
1 //===- SelectionDAGBuilder.cpp - Selection-DAG building -------------------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This implements routines for translating from LLVM IR into SelectionDAG IR.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "SelectionDAGBuilder.h"
15 #include "SDNodeDbgValue.h"
16 #include "llvm/ADT/APFloat.h"
17 #include "llvm/ADT/APInt.h"
18 #include "llvm/ADT/ArrayRef.h"
19 #include "llvm/ADT/BitVector.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/None.h"
22 #include "llvm/ADT/Optional.h"
23 #include "llvm/ADT/STLExtras.h"
24 #include "llvm/ADT/SmallPtrSet.h"
25 #include "llvm/ADT/SmallSet.h"
26 #include "llvm/ADT/SmallVector.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/Triple.h"
29 #include "llvm/ADT/Twine.h"
34 #include "llvm/Analysis/Loads.h"
39 #include "llvm/CodeGen/Analysis.h"
57 #include "llvm/CodeGen/StackMaps.h"
66 #include "llvm/IR/Argument.h"
67 #include "llvm/IR/Attributes.h"
68 #include "llvm/IR/BasicBlock.h"
69 #include "llvm/IR/CFG.h"
70 #include "llvm/IR/CallSite.h"
71 #include "llvm/IR/CallingConv.h"
72 #include "llvm/IR/Constant.h"
73 #include "llvm/IR/ConstantRange.h"
74 #include "llvm/IR/Constants.h"
75 #include "llvm/IR/DataLayout.h"
77 #include "llvm/IR/DebugLoc.h"
78 #include "llvm/IR/DerivedTypes.h"
79 #include "llvm/IR/Function.h"
81 #include "llvm/IR/InlineAsm.h"
82 #include "llvm/IR/InstrTypes.h"
83 #include "llvm/IR/Instruction.h"
84 #include "llvm/IR/Instructions.h"
85 #include "llvm/IR/IntrinsicInst.h"
86 #include "llvm/IR/Intrinsics.h"
87 #include "llvm/IR/LLVMContext.h"
88 #include "llvm/IR/Metadata.h"
89 #include "llvm/IR/Module.h"
90 #include "llvm/IR/Operator.h"
91 #include "llvm/IR/PatternMatch.h"
92 #include "llvm/IR/Statepoint.h"
93 #include "llvm/IR/Type.h"
94 #include "llvm/IR/User.h"
95 #include "llvm/IR/Value.h"
96 #include "llvm/MC/MCContext.h"
97 #include "llvm/MC/MCSymbol.h"
100 #include "llvm/Support/Casting.h"
101 #include "llvm/Support/CodeGen.h"
103 #include "llvm/Support/Compiler.h"
104 #include "llvm/Support/Debug.h"
107 #include "llvm/Support/MathExtras.h"
112 #include <algorithm>
113 #include <cassert>
114 #include <cstddef>
115 #include <cstdint>
116 #include <cstring>
117 #include <iterator>
118 #include <limits>
119 #include <numeric>
120 #include <tuple>
121 #include <utility>
122 #include <vector>
123 
124 using namespace llvm;
125 using namespace PatternMatch;
126 
127 #define DEBUG_TYPE "isel"
128 
129 /// LimitFloatPrecision - Generate low-precision inline sequences for
130 /// some float libcalls (6, 8 or 12 bits).
131 static unsigned LimitFloatPrecision;
132 
134  LimitFPPrecision("limit-float-precision",
135  cl::desc("Generate low-precision inline sequences "
136  "for some float libcalls"),
137  cl::location(LimitFloatPrecision), cl::Hidden,
138  cl::init(0));
139 
141  "switch-peel-threshold", cl::Hidden, cl::init(66),
142  cl::desc("Set the case probability threshold for peeling the case from a "
143  "switch statement. A value greater than 100 will void this "
144  "optimization"));
145 
146 // Limit the width of DAG chains. This is important in general to prevent
147 // DAG-based analysis from blowing up. For example, alias analysis and
148 // load clustering may not complete in reasonable time. It is difficult to
149 // recognize and avoid this situation within each individual analysis, and
150 // future analyses are likely to have the same behavior. Limiting DAG width is
151 // the safe approach and will be especially important with global DAGs.
152 //
153 // MaxParallelChains default is arbitrarily high to avoid affecting
154 // optimization, but could be lowered to improve compile time. Any ld-ld-st-st
155 // sequence over this should have been converted to llvm.memcpy by the
156 // frontend. It is easy to induce this behavior with .ll code such as:
157 // %buffer = alloca [4096 x i8]
158 // %data = load [4096 x i8]* %argPtr
159 // store [4096 x i8] %data, [4096 x i8]* %buffer
160 static const unsigned MaxParallelChains = 64;
161 
162 // Return the calling convention if the Value passed requires ABI mangling as it
163 // is a parameter to a function or a return value from a function which is not
164 // an intrinsic.
166  if (auto *R = dyn_cast<ReturnInst>(V))
167  return R->getParent()->getParent()->getCallingConv();
168 
169  if (auto *CI = dyn_cast<CallInst>(V)) {
170  const bool IsInlineAsm = CI->isInlineAsm();
171  const bool IsIndirectFunctionCall =
172  !IsInlineAsm && !CI->getCalledFunction();
173 
174  // It is possible that the call instruction is an inline asm statement or an
175  // indirect function call in which case the return value of
176  // getCalledFunction() would be nullptr.
177  const bool IsInstrinsicCall =
178  !IsInlineAsm && !IsIndirectFunctionCall &&
179  CI->getCalledFunction()->getIntrinsicID() != Intrinsic::not_intrinsic;
180 
181  if (!IsInlineAsm && !IsInstrinsicCall)
182  return CI->getCallingConv();
183  }
184 
185  return None;
186 }
187 
188 static SDValue getCopyFromPartsVector(SelectionDAG &DAG, const SDLoc &DL,
189  const SDValue *Parts, unsigned NumParts,
190  MVT PartVT, EVT ValueVT, const Value *V,
192 
193 /// getCopyFromParts - Create a value that contains the specified legal parts
194 /// combined into the value they represent. If the parts combine to a type
195 /// larger than ValueVT then AssertOp can be used to specify whether the extra
196 /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
197 /// (ISD::AssertSext).
199  const SDValue *Parts, unsigned NumParts,
200  MVT PartVT, EVT ValueVT, const Value *V,
201  Optional<CallingConv::ID> CC = None,
202  Optional<ISD::NodeType> AssertOp = None) {
203  if (ValueVT.isVector())
204  return getCopyFromPartsVector(DAG, DL, Parts, NumParts, PartVT, ValueVT, V,
205  CC);
206 
207  assert(NumParts > 0 && "No parts to assemble!");
208  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
209  SDValue Val = Parts[0];
210 
211  if (NumParts > 1) {
212  // Assemble the value from multiple parts.
213  if (ValueVT.isInteger()) {
214  unsigned PartBits = PartVT.getSizeInBits();
215  unsigned ValueBits = ValueVT.getSizeInBits();
216 
217  // Assemble the power of 2 part.
218  unsigned RoundParts = NumParts & (NumParts - 1) ?
219  1 << Log2_32(NumParts) : NumParts;
220  unsigned RoundBits = PartBits * RoundParts;
221  EVT RoundVT = RoundBits == ValueBits ?
222  ValueVT : EVT::getIntegerVT(*DAG.getContext(), RoundBits);
223  SDValue Lo, Hi;
224 
225  EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
226 
227  if (RoundParts > 2) {
228  Lo = getCopyFromParts(DAG, DL, Parts, RoundParts / 2,
229  PartVT, HalfVT, V);
230  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts / 2,
231  RoundParts / 2, PartVT, HalfVT, V);
232  } else {
233  Lo = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[0]);
234  Hi = DAG.getNode(ISD::BITCAST, DL, HalfVT, Parts[1]);
235  }
236 
237  if (DAG.getDataLayout().isBigEndian())
238  std::swap(Lo, Hi);
239 
240  Val = DAG.getNode(ISD::BUILD_PAIR, DL, RoundVT, Lo, Hi);
241 
242  if (RoundParts < NumParts) {
243  // Assemble the trailing non-power-of-2 part.
244  unsigned OddParts = NumParts - RoundParts;
245  EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
246  Hi = getCopyFromParts(DAG, DL, Parts + RoundParts, OddParts, PartVT,
247  OddVT, V, CC);
248 
249  // Combine the round and odd parts.
250  Lo = Val;
251  if (DAG.getDataLayout().isBigEndian())
252  std::swap(Lo, Hi);
253  EVT TotalVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
254  Hi = DAG.getNode(ISD::ANY_EXTEND, DL, TotalVT, Hi);
255  Hi =
256  DAG.getNode(ISD::SHL, DL, TotalVT, Hi,
257  DAG.getConstant(Lo.getValueSizeInBits(), DL,
258  TLI.getPointerTy(DAG.getDataLayout())));
259  Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, TotalVT, Lo);
260  Val = DAG.getNode(ISD::OR, DL, TotalVT, Lo, Hi);
261  }
262  } else if (PartVT.isFloatingPoint()) {
263  // FP split into multiple FP parts (for ppcf128)
264  assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 &&
265  "Unexpected split");
266  SDValue Lo, Hi;
267  Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]);
268  Hi = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[1]);
269  if (TLI.hasBigEndianPartOrdering(ValueVT, DAG.getDataLayout()))
270  std::swap(Lo, Hi);
271  Val = DAG.getNode(ISD::BUILD_PAIR, DL, ValueVT, Lo, Hi);
272  } else {
273  // FP split into integer parts (soft fp)
274  assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
275  !PartVT.isVector() && "Unexpected split");
276  EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
277  Val = getCopyFromParts(DAG, DL, Parts, NumParts, PartVT, IntVT, V, CC);
278  }
279  }
280 
281  // There is now one part, held in Val. Correct it to match ValueVT.
282  // PartEVT is the type of the register class that holds the value.
283  // ValueVT is the type of the inline asm operation.
284  EVT PartEVT = Val.getValueType();
285 
286  if (PartEVT == ValueVT)
287  return Val;
288 
289  if (PartEVT.isInteger() && ValueVT.isFloatingPoint() &&
290  ValueVT.bitsLT(PartEVT)) {
291  // For an FP value in an integer part, we need to truncate to the right
292  // width first.
293  PartEVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
294  Val = DAG.getNode(ISD::TRUNCATE, DL, PartEVT, Val);
295  }
296 
297  // Handle types that have the same size.
298  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits())
299  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
300 
301  // Handle types with different sizes.
302  if (PartEVT.isInteger() && ValueVT.isInteger()) {
303  if (ValueVT.bitsLT(PartEVT)) {
304  // For a truncate, see if we have any information to
305  // indicate whether the truncated bits will always be
306  // zero or sign-extension.
307  if (AssertOp.hasValue())
308  Val = DAG.getNode(*AssertOp, DL, PartEVT, Val,
309  DAG.getValueType(ValueVT));
310  return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
311  }
312  return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val);
313  }
314 
315  if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
316  // FP_ROUND's are always exact here.
317  if (ValueVT.bitsLT(Val.getValueType()))
318  return DAG.getNode(
319  ISD::FP_ROUND, DL, ValueVT, Val,
320  DAG.getTargetConstant(1, DL, TLI.getPointerTy(DAG.getDataLayout())));
321 
322  return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val);
323  }
324 
325  llvm_unreachable("Unknown mismatch!");
326 }
327 
329  const Twine &ErrMsg) {
330  const Instruction *I = dyn_cast_or_null<Instruction>(V);
331  if (!V)
332  return Ctx.emitError(ErrMsg);
333 
334  const char *AsmError = ", possible invalid constraint for vector type";
335  if (const CallInst *CI = dyn_cast<CallInst>(I))
336  if (isa<InlineAsm>(CI->getCalledValue()))
337  return Ctx.emitError(I, ErrMsg + AsmError);
338 
339  return Ctx.emitError(I, ErrMsg);
340 }
341 
342 /// getCopyFromPartsVector - Create a value that contains the specified legal
343 /// parts combined into the value they represent. If the parts combine to a
344 /// type larger than ValueVT then AssertOp can be used to specify whether the
345 /// extra bits are known to be zero (ISD::AssertZext) or sign extended from
346 /// ValueVT (ISD::AssertSext).
348  const SDValue *Parts, unsigned NumParts,
349  MVT PartVT, EVT ValueVT, const Value *V,
350  Optional<CallingConv::ID> CallConv) {
351  assert(ValueVT.isVector() && "Not a vector value");
352  assert(NumParts > 0 && "No parts to assemble!");
353  const bool IsABIRegCopy = CallConv.hasValue();
354 
355  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
356  SDValue Val = Parts[0];
357 
358  // Handle a multi-element vector.
359  if (NumParts > 1) {
360  EVT IntermediateVT;
361  MVT RegisterVT;
362  unsigned NumIntermediates;
363  unsigned NumRegs;
364 
365  if (IsABIRegCopy) {
367  *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
368  NumIntermediates, RegisterVT);
369  } else {
370  NumRegs =
371  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
372  NumIntermediates, RegisterVT);
373  }
374 
375  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
376  NumParts = NumRegs; // Silence a compiler warning.
377  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
378  assert(RegisterVT.getSizeInBits() ==
379  Parts[0].getSimpleValueType().getSizeInBits() &&
380  "Part type sizes don't match!");
381 
382  // Assemble the parts into intermediate operands.
383  SmallVector<SDValue, 8> Ops(NumIntermediates);
384  if (NumIntermediates == NumParts) {
385  // If the register was not expanded, truncate or copy the value,
386  // as appropriate.
387  for (unsigned i = 0; i != NumParts; ++i)
388  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i], 1,
389  PartVT, IntermediateVT, V);
390  } else if (NumParts > 0) {
391  // If the intermediate type was expanded, build the intermediate
392  // operands from the parts.
393  assert(NumParts % NumIntermediates == 0 &&
394  "Must expand into a divisible number of parts!");
395  unsigned Factor = NumParts / NumIntermediates;
396  for (unsigned i = 0; i != NumIntermediates; ++i)
397  Ops[i] = getCopyFromParts(DAG, DL, &Parts[i * Factor], Factor,
398  PartVT, IntermediateVT, V);
399  }
400 
401  // Build a vector with BUILD_VECTOR or CONCAT_VECTORS from the
402  // intermediate operands.
403  EVT BuiltVectorTy =
404  EVT::getVectorVT(*DAG.getContext(), IntermediateVT.getScalarType(),
405  (IntermediateVT.isVector()
406  ? IntermediateVT.getVectorNumElements() * NumParts
407  : NumIntermediates));
408  Val = DAG.getNode(IntermediateVT.isVector() ? ISD::CONCAT_VECTORS
410  DL, BuiltVectorTy, Ops);
411  }
412 
413  // There is now one part, held in Val. Correct it to match ValueVT.
414  EVT PartEVT = Val.getValueType();
415 
416  if (PartEVT == ValueVT)
417  return Val;
418 
419  if (PartEVT.isVector()) {
420  // If the element type of the source/dest vectors are the same, but the
421  // parts vector has more elements than the value vector, then we have a
422  // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the
423  // elements we want.
424  if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) {
425  assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() &&
426  "Cannot narrow, it would be a lossy transformation");
427  return DAG.getNode(
428  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
429  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
430  }
431 
432  // Vector/Vector bitcast.
433  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits())
434  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
435 
436  assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() &&
437  "Cannot handle this kind of promotion");
438  // Promoted vector extract
439  return DAG.getAnyExtOrTrunc(Val, DL, ValueVT);
440 
441  }
442 
443  // Trivial bitcast if the types are the same size and the destination
444  // vector type is legal.
445  if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() &&
446  TLI.isTypeLegal(ValueVT))
447  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
448 
449  if (ValueVT.getVectorNumElements() != 1) {
450  // Certain ABIs require that vectors are passed as integers. For vectors
451  // are the same size, this is an obvious bitcast.
452  if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) {
453  return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
454  } else if (ValueVT.getSizeInBits() < PartEVT.getSizeInBits()) {
455  // Bitcast Val back the original type and extract the corresponding
456  // vector we want.
457  unsigned Elts = PartEVT.getSizeInBits() / ValueVT.getScalarSizeInBits();
458  EVT WiderVecType = EVT::getVectorVT(*DAG.getContext(),
459  ValueVT.getVectorElementType(), Elts);
460  Val = DAG.getBitcast(WiderVecType, Val);
461  return DAG.getNode(
462  ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val,
463  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
464  }
465 
467  *DAG.getContext(), V, "non-trivial scalar-to-vector conversion");
468  return DAG.getUNDEF(ValueVT);
469  }
470 
471  // Handle cases such as i8 -> <1 x i1>
472  EVT ValueSVT = ValueVT.getVectorElementType();
473  if (ValueVT.getVectorNumElements() == 1 && ValueSVT != PartEVT)
474  Val = ValueVT.isFloatingPoint() ? DAG.getFPExtendOrRound(Val, DL, ValueSVT)
475  : DAG.getAnyExtOrTrunc(Val, DL, ValueSVT);
476 
477  return DAG.getBuildVector(ValueVT, DL, Val);
478 }
479 
480 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &dl,
481  SDValue Val, SDValue *Parts, unsigned NumParts,
482  MVT PartVT, const Value *V,
483  Optional<CallingConv::ID> CallConv);
484 
485 /// getCopyToParts - Create a series of nodes that contain the specified value
486 /// split into legal parts. If the parts contain more bits than Val, then, for
487 /// integers, ExtendKind can be used to specify how to generate the extra bits.
488 static void getCopyToParts(SelectionDAG &DAG, const SDLoc &DL, SDValue Val,
489  SDValue *Parts, unsigned NumParts, MVT PartVT,
490  const Value *V,
491  Optional<CallingConv::ID> CallConv = None,
492  ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
493  EVT ValueVT = Val.getValueType();
494 
495  // Handle the vector case separately.
496  if (ValueVT.isVector())
497  return getCopyToPartsVector(DAG, DL, Val, Parts, NumParts, PartVT, V,
498  CallConv);
499 
500  unsigned PartBits = PartVT.getSizeInBits();
501  unsigned OrigNumParts = NumParts;
502  assert(DAG.getTargetLoweringInfo().isTypeLegal(PartVT) &&
503  "Copying to an illegal type!");
504 
505  if (NumParts == 0)
506  return;
507 
508  assert(!ValueVT.isVector() && "Vector case handled elsewhere");
509  EVT PartEVT = PartVT;
510  if (PartEVT == ValueVT) {
511  assert(NumParts == 1 && "No-op copy with multiple parts!");
512  Parts[0] = Val;
513  return;
514  }
515 
516  if (NumParts * PartBits > ValueVT.getSizeInBits()) {
517  // If the parts cover more bits than the value has, promote the value.
518  if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) {
519  assert(NumParts == 1 && "Do not know what to promote to!");
520  Val = DAG.getNode(ISD::FP_EXTEND, DL, PartVT, Val);
521  } else {
522  if (ValueVT.isFloatingPoint()) {
523  // FP values need to be bitcast, then extended if they are being put
524  // into a larger container.
525  ValueVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
526  Val = DAG.getNode(ISD::BITCAST, DL, ValueVT, Val);
527  }
528  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
529  ValueVT.isInteger() &&
530  "Unknown mismatch!");
531  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
532  Val = DAG.getNode(ExtendKind, DL, ValueVT, Val);
533  if (PartVT == MVT::x86mmx)
534  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
535  }
536  } else if (PartBits == ValueVT.getSizeInBits()) {
537  // Different types of the same size.
538  assert(NumParts == 1 && PartEVT != ValueVT);
539  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
540  } else if (NumParts * PartBits < ValueVT.getSizeInBits()) {
541  // If the parts cover less bits than value has, truncate the value.
542  assert((PartVT.isInteger() || PartVT == MVT::x86mmx) &&
543  ValueVT.isInteger() &&
544  "Unknown mismatch!");
545  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
546  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
547  if (PartVT == MVT::x86mmx)
548  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
549  }
550 
551  // The value may have changed - recompute ValueVT.
552  ValueVT = Val.getValueType();
553  assert(NumParts * PartBits == ValueVT.getSizeInBits() &&
554  "Failed to tile the value with PartVT!");
555 
556  if (NumParts == 1) {
557  if (PartEVT != ValueVT) {
559  "scalar-to-vector conversion failed");
560  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
561  }
562 
563  Parts[0] = Val;
564  return;
565  }
566 
567  // Expand the value into multiple parts.
568  if (NumParts & (NumParts - 1)) {
569  // The number of parts is not a power of 2. Split off and copy the tail.
570  assert(PartVT.isInteger() && ValueVT.isInteger() &&
571  "Do not know what to expand to!");
572  unsigned RoundParts = 1 << Log2_32(NumParts);
573  unsigned RoundBits = RoundParts * PartBits;
574  unsigned OddParts = NumParts - RoundParts;
575  SDValue OddVal = DAG.getNode(ISD::SRL, DL, ValueVT, Val,
576  DAG.getIntPtrConstant(RoundBits, DL));
577  getCopyToParts(DAG, DL, OddVal, Parts + RoundParts, OddParts, PartVT, V,
578  CallConv);
579 
580  if (DAG.getDataLayout().isBigEndian())
581  // The odd parts were reversed by getCopyToParts - unreverse them.
582  std::reverse(Parts + RoundParts, Parts + NumParts);
583 
584  NumParts = RoundParts;
585  ValueVT = EVT::getIntegerVT(*DAG.getContext(), NumParts * PartBits);
586  Val = DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val);
587  }
588 
589  // The number of parts is a power of 2. Repeatedly bisect the value using
590  // EXTRACT_ELEMENT.
591  Parts[0] = DAG.getNode(ISD::BITCAST, DL,
593  ValueVT.getSizeInBits()),
594  Val);
595 
596  for (unsigned StepSize = NumParts; StepSize > 1; StepSize /= 2) {
597  for (unsigned i = 0; i < NumParts; i += StepSize) {
598  unsigned ThisBits = StepSize * PartBits / 2;
599  EVT ThisVT = EVT::getIntegerVT(*DAG.getContext(), ThisBits);
600  SDValue &Part0 = Parts[i];
601  SDValue &Part1 = Parts[i+StepSize/2];
602 
603  Part1 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
604  ThisVT, Part0, DAG.getIntPtrConstant(1, DL));
605  Part0 = DAG.getNode(ISD::EXTRACT_ELEMENT, DL,
606  ThisVT, Part0, DAG.getIntPtrConstant(0, DL));
607 
608  if (ThisBits == PartBits && ThisVT != PartVT) {
609  Part0 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part0);
610  Part1 = DAG.getNode(ISD::BITCAST, DL, PartVT, Part1);
611  }
612  }
613  }
614 
615  if (DAG.getDataLayout().isBigEndian())
616  std::reverse(Parts, Parts + OrigNumParts);
617 }
618 
620  SDValue Val, const SDLoc &DL, EVT PartVT) {
621  if (!PartVT.isVector())
622  return SDValue();
623 
624  EVT ValueVT = Val.getValueType();
625  unsigned PartNumElts = PartVT.getVectorNumElements();
626  unsigned ValueNumElts = ValueVT.getVectorNumElements();
627  if (PartNumElts > ValueNumElts &&
628  PartVT.getVectorElementType() == ValueVT.getVectorElementType()) {
629  EVT ElementVT = PartVT.getVectorElementType();
630  // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in
631  // undef elements.
633  DAG.ExtractVectorElements(Val, Ops);
634  SDValue EltUndef = DAG.getUNDEF(ElementVT);
635  for (unsigned i = ValueNumElts, e = PartNumElts; i != e; ++i)
636  Ops.push_back(EltUndef);
637 
638  // FIXME: Use CONCAT for 2x -> 4x.
639  return DAG.getBuildVector(PartVT, DL, Ops);
640  }
641 
642  return SDValue();
643 }
644 
645 /// getCopyToPartsVector - Create a series of nodes that contain the specified
646 /// value split into legal parts.
647 static void getCopyToPartsVector(SelectionDAG &DAG, const SDLoc &DL,
648  SDValue Val, SDValue *Parts, unsigned NumParts,
649  MVT PartVT, const Value *V,
650  Optional<CallingConv::ID> CallConv) {
651  EVT ValueVT = Val.getValueType();
652  assert(ValueVT.isVector() && "Not a vector");
653  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
654  const bool IsABIRegCopy = CallConv.hasValue();
655 
656  if (NumParts == 1) {
657  EVT PartEVT = PartVT;
658  if (PartEVT == ValueVT) {
659  // Nothing to do.
660  } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) {
661  // Bitconvert vector->vector case.
662  Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val);
663  } else if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, PartVT)) {
664  Val = Widened;
665  } else if (PartVT.isVector() &&
666  PartEVT.getVectorElementType().bitsGE(
667  ValueVT.getVectorElementType()) &&
668  PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) {
669 
670  // Promoted vector extract
671  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
672  } else {
673  if (ValueVT.getVectorNumElements() == 1) {
674  Val = DAG.getNode(
675  ISD::EXTRACT_VECTOR_ELT, DL, PartVT, Val,
676  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
677  } else {
678  assert(PartVT.getSizeInBits() > ValueVT.getSizeInBits() &&
679  "lossy conversion of vector to scalar type");
680  EVT IntermediateType =
681  EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
682  Val = DAG.getBitcast(IntermediateType, Val);
683  Val = DAG.getAnyExtOrTrunc(Val, DL, PartVT);
684  }
685  }
686 
687  assert(Val.getValueType() == PartVT && "Unexpected vector part value type");
688  Parts[0] = Val;
689  return;
690  }
691 
692  // Handle a multi-element vector.
693  EVT IntermediateVT;
694  MVT RegisterVT;
695  unsigned NumIntermediates;
696  unsigned NumRegs;
697  if (IsABIRegCopy) {
698  NumRegs = TLI.getVectorTypeBreakdownForCallingConv(
699  *DAG.getContext(), CallConv.getValue(), ValueVT, IntermediateVT,
700  NumIntermediates, RegisterVT);
701  } else {
702  NumRegs =
703  TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT,
704  NumIntermediates, RegisterVT);
705  }
706 
707  assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!");
708  NumParts = NumRegs; // Silence a compiler warning.
709  assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!");
710 
711  unsigned IntermediateNumElts = IntermediateVT.isVector() ?
712  IntermediateVT.getVectorNumElements() : 1;
713 
714  // Convert the vector to the appropiate type if necessary.
715  unsigned DestVectorNoElts = NumIntermediates * IntermediateNumElts;
716 
717  EVT BuiltVectorTy = EVT::getVectorVT(
718  *DAG.getContext(), IntermediateVT.getScalarType(), DestVectorNoElts);
719  MVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
720  if (ValueVT != BuiltVectorTy) {
721  if (SDValue Widened = widenVectorToPartType(DAG, Val, DL, BuiltVectorTy))
722  Val = Widened;
723 
724  Val = DAG.getNode(ISD::BITCAST, DL, BuiltVectorTy, Val);
725  }
726 
727  // Split the vector into intermediate operands.
728  SmallVector<SDValue, 8> Ops(NumIntermediates);
729  for (unsigned i = 0; i != NumIntermediates; ++i) {
730  if (IntermediateVT.isVector()) {
731  Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val,
732  DAG.getConstant(i * IntermediateNumElts, DL, IdxVT));
733  } else {
734  Ops[i] = DAG.getNode(
735  ISD::EXTRACT_VECTOR_ELT, DL, IntermediateVT, Val,
736  DAG.getConstant(i, DL, IdxVT));
737  }
738  }
739 
740  // Split the intermediate operands into legal parts.
741  if (NumParts == NumIntermediates) {
742  // If the register was not expanded, promote or copy the value,
743  // as appropriate.
744  for (unsigned i = 0; i != NumParts; ++i)
745  getCopyToParts(DAG, DL, Ops[i], &Parts[i], 1, PartVT, V, CallConv);
746  } else if (NumParts > 0) {
747  // If the intermediate type was expanded, split each the value into
748  // legal parts.
749  assert(NumIntermediates != 0 && "division by zero");
750  assert(NumParts % NumIntermediates == 0 &&
751  "Must expand into a divisible number of parts!");
752  unsigned Factor = NumParts / NumIntermediates;
753  for (unsigned i = 0; i != NumIntermediates; ++i)
754  getCopyToParts(DAG, DL, Ops[i], &Parts[i * Factor], Factor, PartVT, V,
755  CallConv);
756  }
757 }
758 
760  EVT valuevt, Optional<CallingConv::ID> CC)
761  : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs),
762  RegCount(1, regs.size()), CallConv(CC) {}
763 
765  const DataLayout &DL, unsigned Reg, Type *Ty,
767  ComputeValueVTs(TLI, DL, Ty, ValueVTs);
768 
769  CallConv = CC;
770 
771  for (EVT ValueVT : ValueVTs) {
772  unsigned NumRegs =
773  isABIMangled()
774  ? TLI.getNumRegistersForCallingConv(Context, CC.getValue(), ValueVT)
775  : TLI.getNumRegisters(Context, ValueVT);
776  MVT RegisterVT =
777  isABIMangled()
778  ? TLI.getRegisterTypeForCallingConv(Context, CC.getValue(), ValueVT)
779  : TLI.getRegisterType(Context, ValueVT);
780  for (unsigned i = 0; i != NumRegs; ++i)
781  Regs.push_back(Reg + i);
782  RegVTs.push_back(RegisterVT);
783  RegCount.push_back(NumRegs);
784  Reg += NumRegs;
785  }
786 }
787 
789  FunctionLoweringInfo &FuncInfo,
790  const SDLoc &dl, SDValue &Chain,
791  SDValue *Flag, const Value *V) const {
792  // A Value with type {} or [0 x %t] needs no registers.
793  if (ValueVTs.empty())
794  return SDValue();
795 
796  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
797 
798  // Assemble the legal parts into the final values.
799  SmallVector<SDValue, 4> Values(ValueVTs.size());
801  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
802  // Copy the legal parts from the registers.
803  EVT ValueVT = ValueVTs[Value];
804  unsigned NumRegs = RegCount[Value];
805  MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
806  *DAG.getContext(),
807  CallConv.getValue(), RegVTs[Value])
808  : RegVTs[Value];
809 
810  Parts.resize(NumRegs);
811  for (unsigned i = 0; i != NumRegs; ++i) {
812  SDValue P;
813  if (!Flag) {
814  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT);
815  } else {
816  P = DAG.getCopyFromReg(Chain, dl, Regs[Part+i], RegisterVT, *Flag);
817  *Flag = P.getValue(2);
818  }
819 
820  Chain = P.getValue(1);
821  Parts[i] = P;
822 
823  // If the source register was virtual and if we know something about it,
824  // add an assert node.
826  !RegisterVT.isInteger())
827  continue;
828 
830  FuncInfo.GetLiveOutRegInfo(Regs[Part+i]);
831  if (!LOI)
832  continue;
833 
834  unsigned RegSize = RegisterVT.getScalarSizeInBits();
835  unsigned NumSignBits = LOI->NumSignBits;
836  unsigned NumZeroBits = LOI->Known.countMinLeadingZeros();
837 
838  if (NumZeroBits == RegSize) {
839  // The current value is a zero.
840  // Explicitly express that as it would be easier for
841  // optimizations to kick in.
842  Parts[i] = DAG.getConstant(0, dl, RegisterVT);
843  continue;
844  }
845 
846  // FIXME: We capture more information than the dag can represent. For
847  // now, just use the tightest assertzext/assertsext possible.
848  bool isSExt;
849  EVT FromVT(MVT::Other);
850  if (NumZeroBits) {
851  FromVT = EVT::getIntegerVT(*DAG.getContext(), RegSize - NumZeroBits);
852  isSExt = false;
853  } else if (NumSignBits > 1) {
854  FromVT =
855  EVT::getIntegerVT(*DAG.getContext(), RegSize - NumSignBits + 1);
856  isSExt = true;
857  } else {
858  continue;
859  }
860  // Add an assertion node.
861  assert(FromVT != MVT::Other);
862  Parts[i] = DAG.getNode(isSExt ? ISD::AssertSext : ISD::AssertZext, dl,
863  RegisterVT, P, DAG.getValueType(FromVT));
864  }
865 
866  Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(), NumRegs,
867  RegisterVT, ValueVT, V, CallConv);
868  Part += NumRegs;
869  Parts.clear();
870  }
871 
872  return DAG.getNode(ISD::MERGE_VALUES, dl, DAG.getVTList(ValueVTs), Values);
873 }
874 
876  const SDLoc &dl, SDValue &Chain, SDValue *Flag,
877  const Value *V,
878  ISD::NodeType PreferredExtendType) const {
879  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
880  ISD::NodeType ExtendKind = PreferredExtendType;
881 
882  // Get the list of the values's legal parts.
883  unsigned NumRegs = Regs.size();
884  SmallVector<SDValue, 8> Parts(NumRegs);
885  for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) {
886  unsigned NumParts = RegCount[Value];
887 
888  MVT RegisterVT = isABIMangled() ? TLI.getRegisterTypeForCallingConv(
889  *DAG.getContext(),
890  CallConv.getValue(), RegVTs[Value])
891  : RegVTs[Value];
892 
893  if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT))
894  ExtendKind = ISD::ZERO_EXTEND;
895 
896  getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part],
897  NumParts, RegisterVT, V, CallConv, ExtendKind);
898  Part += NumParts;
899  }
900 
901  // Copy the parts into the registers.
902  SmallVector<SDValue, 8> Chains(NumRegs);
903  for (unsigned i = 0; i != NumRegs; ++i) {
904  SDValue Part;
905  if (!Flag) {
906  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i]);
907  } else {
908  Part = DAG.getCopyToReg(Chain, dl, Regs[i], Parts[i], *Flag);
909  *Flag = Part.getValue(1);
910  }
911 
912  Chains[i] = Part.getValue(0);
913  }
914 
915  if (NumRegs == 1 || Flag)
916  // If NumRegs > 1 && Flag is used then the use of the last CopyToReg is
917  // flagged to it. That is the CopyToReg nodes and the user are considered
918  // a single scheduling unit. If we create a TokenFactor and return it as
919  // chain, then the TokenFactor is both a predecessor (operand) of the
920  // user as well as a successor (the TF operands are flagged to the user).
921  // c1, f1 = CopyToReg
922  // c2, f2 = CopyToReg
923  // c3 = TokenFactor c1, c2
924  // ...
925  // = op c3, ..., f2
926  Chain = Chains[NumRegs-1];
927  else
928  Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chains);
929 }
930 
931 void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching,
932  unsigned MatchingIdx, const SDLoc &dl,
933  SelectionDAG &DAG,
934  std::vector<SDValue> &Ops) const {
935  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
936 
937  unsigned Flag = InlineAsm::getFlagWord(Code, Regs.size());
938  if (HasMatching)
939  Flag = InlineAsm::getFlagWordForMatchingOp(Flag, MatchingIdx);
940  else if (!Regs.empty() &&
942  // Put the register class of the virtual registers in the flag word. That
943  // way, later passes can recompute register class constraints for inline
944  // assembly as well as normal instructions.
945  // Don't do this for tied operands that can use the regclass information
946  // from the def.
948  const TargetRegisterClass *RC = MRI.getRegClass(Regs.front());
949  Flag = InlineAsm::getFlagWordForRegClass(Flag, RC->getID());
950  }
951 
952  SDValue Res = DAG.getTargetConstant(Flag, dl, MVT::i32);
953  Ops.push_back(Res);
954 
955  if (Code == InlineAsm::Kind_Clobber) {
956  // Clobbers should always have a 1:1 mapping with registers, and may
957  // reference registers that have illegal (e.g. vector) types. Hence, we
958  // shouldn't try to apply any sort of splitting logic to them.
959  assert(Regs.size() == RegVTs.size() && Regs.size() == ValueVTs.size() &&
960  "No 1:1 mapping from clobbers to regs?");
961  unsigned SP = TLI.getStackPointerRegisterToSaveRestore();
962  (void)SP;
963  for (unsigned I = 0, E = ValueVTs.size(); I != E; ++I) {
964  Ops.push_back(DAG.getRegister(Regs[I], RegVTs[I]));
965  assert(
966  (Regs[I] != SP ||
968  "If we clobbered the stack pointer, MFI should know about it.");
969  }
970  return;
971  }
972 
973  for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) {
974  unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]);
975  MVT RegisterVT = RegVTs[Value];
976  for (unsigned i = 0; i != NumRegs; ++i) {
977  assert(Reg < Regs.size() && "Mismatch in # registers expected");
978  unsigned TheReg = Regs[Reg++];
979  Ops.push_back(DAG.getRegister(TheReg, RegisterVT));
980  }
981  }
982 }
983 
987  unsigned I = 0;
988  for (auto CountAndVT : zip_first(RegCount, RegVTs)) {
989  unsigned RegCount = std::get<0>(CountAndVT);
990  MVT RegisterVT = std::get<1>(CountAndVT);
991  unsigned RegisterSize = RegisterVT.getSizeInBits();
992  for (unsigned E = I + RegCount; I != E; ++I)
993  OutVec.push_back(std::make_pair(Regs[I], RegisterSize));
994  }
995  return OutVec;
996 }
997 
999  const TargetLibraryInfo *li) {
1000  AA = aa;
1001  GFI = gfi;
1002  LibInfo = li;
1003  DL = &DAG.getDataLayout();
1004  Context = DAG.getContext();
1005  LPadToCallSiteMap.clear();
1006 }
1007 
1009  NodeMap.clear();
1010  UnusedArgNodeMap.clear();
1011  PendingLoads.clear();
1012  PendingExports.clear();
1013  CurInst = nullptr;
1014  HasTailCall = false;
1015  SDNodeOrder = LowestSDNodeOrder;
1016  StatepointLowering.clear();
1017 }
1018 
1020  DanglingDebugInfoMap.clear();
1021 }
1022 
1024  if (PendingLoads.empty())
1025  return DAG.getRoot();
1026 
1027  if (PendingLoads.size() == 1) {
1028  SDValue Root = PendingLoads[0];
1029  DAG.setRoot(Root);
1030  PendingLoads.clear();
1031  return Root;
1032  }
1033 
1034  // Otherwise, we have to make a token factor node.
1035  // If we have >= 2^16 loads then split across multiple token factors as
1036  // there's a 64k limit on the number of SDNode operands.
1037  SDValue Root;
1038  size_t Limit = (1 << 16) - 1;
1039  while (PendingLoads.size() > Limit) {
1040  unsigned SliceIdx = PendingLoads.size() - Limit;
1041  auto ExtractedTFs = ArrayRef<SDValue>(PendingLoads).slice(SliceIdx, Limit);
1042  SDValue NewTF =
1043  DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, ExtractedTFs);
1044  PendingLoads.erase(PendingLoads.begin() + SliceIdx, PendingLoads.end());
1045  PendingLoads.emplace_back(NewTF);
1046  }
1047  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, PendingLoads);
1048  PendingLoads.clear();
1049  DAG.setRoot(Root);
1050  return Root;
1051 }
1052 
1054  SDValue Root = DAG.getRoot();
1055 
1056  if (PendingExports.empty())
1057  return Root;
1058 
1059  // Turn all of the CopyToReg chains into one factored node.
1060  if (Root.getOpcode() != ISD::EntryToken) {
1061  unsigned i = 0, e = PendingExports.size();
1062  for (; i != e; ++i) {
1063  assert(PendingExports[i].getNode()->getNumOperands() > 1);
1064  if (PendingExports[i].getNode()->getOperand(0) == Root)
1065  break; // Don't add the root if we already indirectly depend on it.
1066  }
1067 
1068  if (i == e)
1069  PendingExports.push_back(Root);
1070  }
1071 
1072  Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other,
1073  PendingExports);
1074  PendingExports.clear();
1075  DAG.setRoot(Root);
1076  return Root;
1077 }
1078 
1080  // Set up outgoing PHI node register values before emitting the terminator.
1081  if (I.isTerminator()) {
1082  HandlePHINodesInSuccessorBlocks(I.getParent());
1083  }
1084 
1085  // Increase the SDNodeOrder if dealing with a non-debug instruction.
1086  if (!isa<DbgInfoIntrinsic>(I))
1087  ++SDNodeOrder;
1088 
1089  CurInst = &I;
1090 
1091  visit(I.getOpcode(), I);
1092 
1093  if (auto *FPMO = dyn_cast<FPMathOperator>(&I)) {
1094  // Propagate the fast-math-flags of this IR instruction to the DAG node that
1095  // maps to this instruction.
1096  // TODO: We could handle all flags (nsw, etc) here.
1097  // TODO: If an IR instruction maps to >1 node, only the final node will have
1098  // flags set.
1099  if (SDNode *Node = getNodeForIRValue(&I)) {
1100  SDNodeFlags IncomingFlags;
1101  IncomingFlags.copyFMF(*FPMO);
1102  if (!Node->getFlags().isDefined())
1103  Node->setFlags(IncomingFlags);
1104  else
1105  Node->intersectFlagsWith(IncomingFlags);
1106  }
1107  }
1108 
1109  if (!I.isTerminator() && !HasTailCall &&
1110  !isStatepoint(&I)) // statepoints handle their exports internally
1111  CopyToExportRegsIfNeeded(&I);
1112 
1113  CurInst = nullptr;
1114 }
1115 
1116 void SelectionDAGBuilder::visitPHI(const PHINode &) {
1117  llvm_unreachable("SelectionDAGBuilder shouldn't visit PHI nodes!");
1118 }
1119 
1120 void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) {
1121  // Note: this doesn't use InstVisitor, because it has to work with
1122  // ConstantExpr's in addition to instructions.
1123  switch (Opcode) {
1124  default: llvm_unreachable("Unknown instruction type encountered!");
1125  // Build the switch statement using the Instruction.def file.
1126 #define HANDLE_INST(NUM, OPCODE, CLASS) \
1127  case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break;
1128 #include "llvm/IR/Instruction.def"
1129  }
1130 }
1131 
1133  const DIExpression *Expr) {
1134  auto isMatchingDbgValue = [&](DanglingDebugInfo &DDI) {
1135  const DbgValueInst *DI = DDI.getDI();
1136  DIVariable *DanglingVariable = DI->getVariable();
1137  DIExpression *DanglingExpr = DI->getExpression();
1138  if (DanglingVariable == Variable && Expr->fragmentsOverlap(DanglingExpr)) {
1139  LLVM_DEBUG(dbgs() << "Dropping dangling debug info for " << *DI << "\n");
1140  return true;
1141  }
1142  return false;
1143  };
1144 
1145  for (auto &DDIMI : DanglingDebugInfoMap) {
1146  DanglingDebugInfoVector &DDIV = DDIMI.second;
1147  DDIV.erase(remove_if(DDIV, isMatchingDbgValue), DDIV.end());
1148  }
1149 }
1150 
1151 // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V,
1152 // generate the debug data structures now that we've seen its definition.
1154  SDValue Val) {
1155  auto DanglingDbgInfoIt = DanglingDebugInfoMap.find(V);
1156  if (DanglingDbgInfoIt == DanglingDebugInfoMap.end())
1157  return;
1158 
1159  DanglingDebugInfoVector &DDIV = DanglingDbgInfoIt->second;
1160  for (auto &DDI : DDIV) {
1161  const DbgValueInst *DI = DDI.getDI();
1162  assert(DI && "Ill-formed DanglingDebugInfo");
1163  DebugLoc dl = DDI.getdl();
1164  unsigned ValSDNodeOrder = Val.getNode()->getIROrder();
1165  unsigned DbgSDNodeOrder = DDI.getSDNodeOrder();
1166  DILocalVariable *Variable = DI->getVariable();
1167  DIExpression *Expr = DI->getExpression();
1168  assert(Variable->isValidLocationForIntrinsic(dl) &&
1169  "Expected inlined-at fields to agree");
1170  SDDbgValue *SDV;
1171  if (Val.getNode()) {
1172  if (!EmitFuncArgumentDbgValue(V, Variable, Expr, dl, false, Val)) {
1173  LLVM_DEBUG(dbgs() << "Resolve dangling debug info [order="
1174  << DbgSDNodeOrder << "] for:\n " << *DI << "\n");
1175  LLVM_DEBUG(dbgs() << " By mapping to:\n "; Val.dump());
1176  // Increase the SDNodeOrder for the DbgValue here to make sure it is
1177  // inserted after the definition of Val when emitting the instructions
1178  // after ISel. An alternative could be to teach
1179  // ScheduleDAGSDNodes::EmitSchedule to delay the insertion properly.
1180  LLVM_DEBUG(if (ValSDNodeOrder > DbgSDNodeOrder) dbgs()
1181  << "changing SDNodeOrder from " << DbgSDNodeOrder << " to "
1182  << ValSDNodeOrder << "\n");
1183  SDV = getDbgValue(Val, Variable, Expr, dl,
1184  std::max(DbgSDNodeOrder, ValSDNodeOrder));
1185  DAG.AddDbgValue(SDV, Val.getNode(), false);
1186  } else
1187  LLVM_DEBUG(dbgs() << "Resolved dangling debug info for " << *DI
1188  << "in EmitFuncArgumentDbgValue\n");
1189  } else
1190  LLVM_DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n");
1191  }
1192  DDIV.clear();
1193 }
1194 
1195 /// getCopyFromRegs - If there was virtual register allocated for the value V
1196 /// emit CopyFromReg of the specified type Ty. Return empty SDValue() otherwise.
1198  DenseMap<const Value *, unsigned>::iterator It = FuncInfo.ValueMap.find(V);
1199  SDValue Result;
1200 
1201  if (It != FuncInfo.ValueMap.end()) {
1202  unsigned InReg = It->second;
1203 
1204  RegsForValue RFV(*DAG.getContext(), DAG.getTargetLoweringInfo(),
1205  DAG.getDataLayout(), InReg, Ty,
1206  None); // This is not an ABI copy.
1207  SDValue Chain = DAG.getEntryNode();
1208  Result = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr,
1209  V);
1210  resolveDanglingDebugInfo(V, Result);
1211  }
1212 
1213  return Result;
1214 }
1215 
1216 /// getValue - Return an SDValue for the given Value.
1218  // If we already have an SDValue for this value, use it. It's important
1219  // to do this first, so that we don't create a CopyFromReg if we already
1220  // have a regular SDValue.
1221  SDValue &N = NodeMap[V];
1222  if (N.getNode()) return N;
1223 
1224  // If there's a virtual register allocated and initialized for this
1225  // value, use it.
1226  if (SDValue copyFromReg = getCopyFromRegs(V, V->getType()))
1227  return copyFromReg;
1228 
1229  // Otherwise create a new SDValue and remember it.
1230  SDValue Val = getValueImpl(V);
1231  NodeMap[V] = Val;
1232  resolveDanglingDebugInfo(V, Val);
1233  return Val;
1234 }
1235 
1236 // Return true if SDValue exists for the given Value
1238  return (NodeMap.find(V) != NodeMap.end()) ||
1239  (FuncInfo.ValueMap.find(V) != FuncInfo.ValueMap.end());
1240 }
1241 
1242 /// getNonRegisterValue - Return an SDValue for the given Value, but
1243 /// don't look in FuncInfo.ValueMap for a virtual register.
1245  // If we already have an SDValue for this value, use it.
1246  SDValue &N = NodeMap[V];
1247  if (N.getNode()) {
1248  if (isa<ConstantSDNode>(N) || isa<ConstantFPSDNode>(N)) {
1249  // Remove the debug location from the node as the node is about to be used
1250  // in a location which may differ from the original debug location. This
1251  // is relevant to Constant and ConstantFP nodes because they can appear
1252  // as constant expressions inside PHI nodes.
1253  N->setDebugLoc(DebugLoc());
1254  }
1255  return N;
1256  }
1257 
1258  // Otherwise create a new SDValue and remember it.
1259  SDValue Val = getValueImpl(V);
1260  NodeMap[V] = Val;
1261  resolveDanglingDebugInfo(V, Val);
1262  return Val;
1263 }
1264 
1265 /// getValueImpl - Helper function for getValue and getNonRegisterValue.
1266 /// Create an SDValue for the given value.
1268  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1269 
1270  if (const Constant *C = dyn_cast<Constant>(V)) {
1271  EVT VT = TLI.getValueType(DAG.getDataLayout(), V->getType(), true);
1272 
1273  if (const ConstantInt *CI = dyn_cast<ConstantInt>(C))
1274  return DAG.getConstant(*CI, getCurSDLoc(), VT);
1275 
1276  if (const GlobalValue *GV = dyn_cast<GlobalValue>(C))
1277  return DAG.getGlobalAddress(GV, getCurSDLoc(), VT);
1278 
1279  if (isa<ConstantPointerNull>(C)) {
1280  unsigned AS = V->getType()->getPointerAddressSpace();
1281  return DAG.getConstant(0, getCurSDLoc(),
1282  TLI.getPointerTy(DAG.getDataLayout(), AS));
1283  }
1284 
1285  if (const ConstantFP *CFP = dyn_cast<ConstantFP>(C))
1286  return DAG.getConstantFP(*CFP, getCurSDLoc(), VT);
1287 
1288  if (isa<UndefValue>(C) && !V->getType()->isAggregateType())
1289  return DAG.getUNDEF(VT);
1290 
1291  if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
1292  visit(CE->getOpcode(), *CE);
1293  SDValue N1 = NodeMap[V];
1294  assert(N1.getNode() && "visit didn't populate the NodeMap!");
1295  return N1;
1296  }
1297 
1298  if (isa<ConstantStruct>(C) || isa<ConstantArray>(C)) {
1300  for (User::const_op_iterator OI = C->op_begin(), OE = C->op_end();
1301  OI != OE; ++OI) {
1302  SDNode *Val = getValue(*OI).getNode();
1303  // If the operand is an empty aggregate, there are no values.
1304  if (!Val) continue;
1305  // Add each leaf value from the operand to the Constants list
1306  // to form a flattened list of all the values.
1307  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1308  Constants.push_back(SDValue(Val, i));
1309  }
1310 
1311  return DAG.getMergeValues(Constants, getCurSDLoc());
1312  }
1313 
1314  if (const ConstantDataSequential *CDS =
1315  dyn_cast<ConstantDataSequential>(C)) {
1317  for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) {
1318  SDNode *Val = getValue(CDS->getElementAsConstant(i)).getNode();
1319  // Add each leaf value from the operand to the Constants list
1320  // to form a flattened list of all the values.
1321  for (unsigned i = 0, e = Val->getNumValues(); i != e; ++i)
1322  Ops.push_back(SDValue(Val, i));
1323  }
1324 
1325  if (isa<ArrayType>(CDS->getType()))
1326  return DAG.getMergeValues(Ops, getCurSDLoc());
1327  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1328  }
1329 
1330  if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
1331  assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
1332  "Unknown struct or array constant!");
1333 
1335  ComputeValueVTs(TLI, DAG.getDataLayout(), C->getType(), ValueVTs);
1336  unsigned NumElts = ValueVTs.size();
1337  if (NumElts == 0)
1338  return SDValue(); // empty struct
1340  for (unsigned i = 0; i != NumElts; ++i) {
1341  EVT EltVT = ValueVTs[i];
1342  if (isa<UndefValue>(C))
1343  Constants[i] = DAG.getUNDEF(EltVT);
1344  else if (EltVT.isFloatingPoint())
1345  Constants[i] = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1346  else
1347  Constants[i] = DAG.getConstant(0, getCurSDLoc(), EltVT);
1348  }
1349 
1350  return DAG.getMergeValues(Constants, getCurSDLoc());
1351  }
1352 
1353  if (const BlockAddress *BA = dyn_cast<BlockAddress>(C))
1354  return DAG.getBlockAddress(BA, VT);
1355 
1356  VectorType *VecTy = cast<VectorType>(V->getType());
1357  unsigned NumElements = VecTy->getNumElements();
1358 
1359  // Now that we know the number and type of the elements, get that number of
1360  // elements into the Ops array based on what kind of constant it is.
1362  if (const ConstantVector *CV = dyn_cast<ConstantVector>(C)) {
1363  for (unsigned i = 0; i != NumElements; ++i)
1364  Ops.push_back(getValue(CV->getOperand(i)));
1365  } else {
1366  assert(isa<ConstantAggregateZero>(C) && "Unknown vector constant!");
1367  EVT EltVT =
1368  TLI.getValueType(DAG.getDataLayout(), VecTy->getElementType());
1369 
1370  SDValue Op;
1371  if (EltVT.isFloatingPoint())
1372  Op = DAG.getConstantFP(0, getCurSDLoc(), EltVT);
1373  else
1374  Op = DAG.getConstant(0, getCurSDLoc(), EltVT);
1375  Ops.assign(NumElements, Op);
1376  }
1377 
1378  // Create a BUILD_VECTOR node.
1379  return NodeMap[V] = DAG.getBuildVector(VT, getCurSDLoc(), Ops);
1380  }
1381 
1382  // If this is a static alloca, generate it as the frameindex instead of
1383  // computation.
1384  if (const AllocaInst *AI = dyn_cast<AllocaInst>(V)) {
1386  FuncInfo.StaticAllocaMap.find(AI);
1387  if (SI != FuncInfo.StaticAllocaMap.end())
1388  return DAG.getFrameIndex(SI->second,
1389  TLI.getFrameIndexTy(DAG.getDataLayout()));
1390  }
1391 
1392  // If this is an instruction which fast-isel has deferred, select it now.
1393  if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
1394  unsigned InReg = FuncInfo.InitializeRegForValue(Inst);
1395 
1396  RegsForValue RFV(*DAG.getContext(), TLI, DAG.getDataLayout(), InReg,
1397  Inst->getType(), getABIRegCopyCC(V));
1398  SDValue Chain = DAG.getEntryNode();
1399  return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, nullptr, V);
1400  }
1401 
1402  llvm_unreachable("Can't get register for value!");
1403 }
1404 
1405 void SelectionDAGBuilder::visitCatchPad(const CatchPadInst &I) {
1406  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1407  bool IsMSVCCXX = Pers == EHPersonality::MSVC_CXX;
1408  bool IsCoreCLR = Pers == EHPersonality::CoreCLR;
1409  bool IsSEH = isAsynchronousEHPersonality(Pers);
1410  bool IsWasmCXX = Pers == EHPersonality::Wasm_CXX;
1411  MachineBasicBlock *CatchPadMBB = FuncInfo.MBB;
1412  if (!IsSEH)
1413  CatchPadMBB->setIsEHScopeEntry();
1414  // In MSVC C++ and CoreCLR, catchblocks are funclets and need prologues.
1415  if (IsMSVCCXX || IsCoreCLR)
1416  CatchPadMBB->setIsEHFuncletEntry();
1417  // Wasm does not need catchpads anymore
1418  if (!IsWasmCXX)
1419  DAG.setRoot(DAG.getNode(ISD::CATCHPAD, getCurSDLoc(), MVT::Other,
1420  getControlRoot()));
1421 }
1422 
1423 void SelectionDAGBuilder::visitCatchRet(const CatchReturnInst &I) {
1424  // Update machine-CFG edge.
1425  MachineBasicBlock *TargetMBB = FuncInfo.MBBMap[I.getSuccessor()];
1426  FuncInfo.MBB->addSuccessor(TargetMBB);
1427 
1428  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1429  bool IsSEH = isAsynchronousEHPersonality(Pers);
1430  if (IsSEH) {
1431  // If this is not a fall-through branch or optimizations are switched off,
1432  // emit the branch.
1433  if (TargetMBB != NextBlock(FuncInfo.MBB) ||
1434  TM.getOptLevel() == CodeGenOpt::None)
1435  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other,
1436  getControlRoot(), DAG.getBasicBlock(TargetMBB)));
1437  return;
1438  }
1439 
1440  // Figure out the funclet membership for the catchret's successor.
1441  // This will be used by the FuncletLayout pass to determine how to order the
1442  // BB's.
1443  // A 'catchret' returns to the outer scope's color.
1444  Value *ParentPad = I.getCatchSwitchParentPad();
1445  const BasicBlock *SuccessorColor;
1446  if (isa<ConstantTokenNone>(ParentPad))
1447  SuccessorColor = &FuncInfo.Fn->getEntryBlock();
1448  else
1449  SuccessorColor = cast<Instruction>(ParentPad)->getParent();
1450  assert(SuccessorColor && "No parent funclet for catchret!");
1451  MachineBasicBlock *SuccessorColorMBB = FuncInfo.MBBMap[SuccessorColor];
1452  assert(SuccessorColorMBB && "No MBB for SuccessorColor!");
1453 
1454  // Create the terminator node.
1455  SDValue Ret = DAG.getNode(ISD::CATCHRET, getCurSDLoc(), MVT::Other,
1456  getControlRoot(), DAG.getBasicBlock(TargetMBB),
1457  DAG.getBasicBlock(SuccessorColorMBB));
1458  DAG.setRoot(Ret);
1459 }
1460 
1461 void SelectionDAGBuilder::visitCleanupPad(const CleanupPadInst &CPI) {
1462  // Don't emit any special code for the cleanuppad instruction. It just marks
1463  // the start of an EH scope/funclet.
1464  FuncInfo.MBB->setIsEHScopeEntry();
1465  auto Pers = classifyEHPersonality(FuncInfo.Fn->getPersonalityFn());
1466  if (Pers != EHPersonality::Wasm_CXX) {
1467  FuncInfo.MBB->setIsEHFuncletEntry();
1468  FuncInfo.MBB->setIsCleanupFuncletEntry();
1469  }
1470 }
1471 
1472 /// When an invoke or a cleanupret unwinds to the next EH pad, there are
1473 /// many places it could ultimately go. In the IR, we have a single unwind
1474 /// destination, but in the machine CFG, we enumerate all the possible blocks.
1475 /// This function skips over imaginary basic blocks that hold catchswitch
1476 /// instructions, and finds all the "real" machine
1477 /// basic block destinations. As those destinations may not be successors of
1478 /// EHPadBB, here we also calculate the edge probability to those destinations.
1479 /// The passed-in Prob is the edge probability to EHPadBB.
1481  FunctionLoweringInfo &FuncInfo, const BasicBlock *EHPadBB,
1482  BranchProbability Prob,
1483  SmallVectorImpl<std::pair<MachineBasicBlock *, BranchProbability>>
1484  &UnwindDests) {
1485  EHPersonality Personality =
1487  bool IsMSVCCXX = Personality == EHPersonality::MSVC_CXX;
1488  bool IsCoreCLR = Personality == EHPersonality::CoreCLR;
1489  bool IsWasmCXX = Personality == EHPersonality::Wasm_CXX;
1490  bool IsSEH = isAsynchronousEHPersonality(Personality);
1491 
1492  while (EHPadBB) {
1493  const Instruction *Pad = EHPadBB->getFirstNonPHI();
1494  BasicBlock *NewEHPadBB = nullptr;
1495  if (isa<LandingPadInst>(Pad)) {
1496  // Stop on landingpads. They are not funclets.
1497  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1498  break;
1499  } else if (isa<CleanupPadInst>(Pad)) {
1500  // Stop on cleanup pads. Cleanups are always funclet entries for all known
1501  // personalities.
1502  UnwindDests.emplace_back(FuncInfo.MBBMap[EHPadBB], Prob);
1503  UnwindDests.back().first->setIsEHScopeEntry();
1504  if (!IsWasmCXX)
1505  UnwindDests.back().first->setIsEHFuncletEntry();
1506  break;
1507  } else if (auto *CatchSwitch = dyn_cast<CatchSwitchInst>(Pad)) {
1508  // Add the catchpad handlers to the possible destinations.
1509  for (const BasicBlock *CatchPadBB : CatchSwitch->handlers()) {
1510  UnwindDests.emplace_back(FuncInfo.MBBMap[CatchPadBB], Prob);
1511  // For MSVC++ and the CLR, catchblocks are funclets and need prologues.
1512  if (IsMSVCCXX || IsCoreCLR)
1513  UnwindDests.back().first->setIsEHFuncletEntry();
1514  if (!IsSEH)
1515  UnwindDests.back().first->setIsEHScopeEntry();
1516  }
1517  NewEHPadBB = CatchSwitch->getUnwindDest();
1518  } else {
1519  continue;
1520  }
1521 
1522  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1523  if (BPI && NewEHPadBB)
1524  Prob *= BPI->getEdgeProbability(EHPadBB, NewEHPadBB);
1525  EHPadBB = NewEHPadBB;
1526  }
1527 }
1528 
1529 void SelectionDAGBuilder::visitCleanupRet(const CleanupReturnInst &I) {
1530  // Update successor info.
1532  auto UnwindDest = I.getUnwindDest();
1533  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1534  BranchProbability UnwindDestProb =
1535  (BPI && UnwindDest)
1536  ? BPI->getEdgeProbability(FuncInfo.MBB->getBasicBlock(), UnwindDest)
1538  findUnwindDestinations(FuncInfo, UnwindDest, UnwindDestProb, UnwindDests);
1539  for (auto &UnwindDest : UnwindDests) {
1540  UnwindDest.first->setIsEHPad();
1541  addSuccessorWithProb(FuncInfo.MBB, UnwindDest.first, UnwindDest.second);
1542  }
1543  FuncInfo.MBB->normalizeSuccProbs();
1544 
1545  // Create the terminator node.
1546  SDValue Ret =
1547  DAG.getNode(ISD::CLEANUPRET, getCurSDLoc(), MVT::Other, getControlRoot());
1548  DAG.setRoot(Ret);
1549 }
1550 
1551 void SelectionDAGBuilder::visitCatchSwitch(const CatchSwitchInst &CSI) {
1552  report_fatal_error("visitCatchSwitch not yet implemented!");
1553 }
1554 
1555 void SelectionDAGBuilder::visitRet(const ReturnInst &I) {
1556  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
1557  auto &DL = DAG.getDataLayout();
1558  SDValue Chain = getControlRoot();
1560  SmallVector<SDValue, 8> OutVals;
1561 
1562  // Calls to @llvm.experimental.deoptimize don't generate a return value, so
1563  // lower
1564  //
1565  // %val = call <ty> @llvm.experimental.deoptimize()
1566  // ret <ty> %val
1567  //
1568  // differently.
1570  LowerDeoptimizingReturn();
1571  return;
1572  }
1573 
1574  if (!FuncInfo.CanLowerReturn) {
1575  unsigned DemoteReg = FuncInfo.DemoteRegister;
1576  const Function *F = I.getParent()->getParent();
1577 
1578  // Emit a store of the return value through the virtual register.
1579  // Leave Outs empty so that LowerReturn won't try to load return
1580  // registers the usual way.
1581  SmallVector<EVT, 1> PtrValueVTs;
1582  ComputeValueVTs(TLI, DL,
1585  PtrValueVTs);
1586 
1587  SDValue RetPtr = DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(),
1588  DemoteReg, PtrValueVTs[0]);
1589  SDValue RetOp = getValue(I.getOperand(0));
1590 
1593  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs, &Offsets);
1594  unsigned NumValues = ValueVTs.size();
1595 
1596  SmallVector<SDValue, 4> Chains(NumValues);
1597  for (unsigned i = 0; i != NumValues; ++i) {
1598  // An aggregate return value cannot wrap around the address space, so
1599  // offsets to its parts don't wrap either.
1600  SDValue Ptr = DAG.getObjectPtrOffset(getCurSDLoc(), RetPtr, Offsets[i]);
1601  Chains[i] = DAG.getStore(
1602  Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i),
1603  // FIXME: better loc info would be nice.
1605  }
1606 
1607  Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(),
1608  MVT::Other, Chains);
1609  } else if (I.getNumOperands() != 0) {
1611  ComputeValueVTs(TLI, DL, I.getOperand(0)->getType(), ValueVTs);
1612  unsigned NumValues = ValueVTs.size();
1613  if (NumValues) {
1614  SDValue RetOp = getValue(I.getOperand(0));
1615 
1616  const Function *F = I.getParent()->getParent();
1617 
1618  ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
1619  if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1620  Attribute::SExt))
1621  ExtendKind = ISD::SIGN_EXTEND;
1622  else if (F->getAttributes().hasAttribute(AttributeList::ReturnIndex,
1623  Attribute::ZExt))
1624  ExtendKind = ISD::ZERO_EXTEND;
1625 
1626  LLVMContext &Context = F->getContext();
1627  bool RetInReg = F->getAttributes().hasAttribute(
1628  AttributeList::ReturnIndex, Attribute::InReg);
1629 
1630  for (unsigned j = 0; j != NumValues; ++j) {
1631  EVT VT = ValueVTs[j];
1632 
1633  if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger())
1634  VT = TLI.getTypeForExtReturn(Context, VT, ExtendKind);
1635 
1636  CallingConv::ID CC = F->getCallingConv();
1637 
1638  unsigned NumParts = TLI.getNumRegistersForCallingConv(Context, CC, VT);
1639  MVT PartVT = TLI.getRegisterTypeForCallingConv(Context, CC, VT);
1640  SmallVector<SDValue, 4> Parts(NumParts);
1641  getCopyToParts(DAG, getCurSDLoc(),
1642  SDValue(RetOp.getNode(), RetOp.getResNo() + j),
1643  &Parts[0], NumParts, PartVT, &I, CC, ExtendKind);
1644 
1645  // 'inreg' on function refers to return value
1646  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1647  if (RetInReg)
1648  Flags.setInReg();
1649 
1650  // Propagate extension type if any
1651  if (ExtendKind == ISD::SIGN_EXTEND)
1652  Flags.setSExt();
1653  else if (ExtendKind == ISD::ZERO_EXTEND)
1654  Flags.setZExt();
1655 
1656  for (unsigned i = 0; i < NumParts; ++i) {
1657  Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(),
1658  VT, /*isfixed=*/true, 0, 0));
1659  OutVals.push_back(Parts[i]);
1660  }
1661  }
1662  }
1663  }
1664 
1665  // Push in swifterror virtual register as the last element of Outs. This makes
1666  // sure swifterror virtual register will be returned in the swifterror
1667  // physical register.
1668  const Function *F = I.getParent()->getParent();
1669  if (TLI.supportSwiftError() &&
1670  F->getAttributes().hasAttrSomewhere(Attribute::SwiftError)) {
1671  assert(FuncInfo.SwiftErrorArg && "Need a swift error argument");
1672  ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
1673  Flags.setSwiftError();
1674  Outs.push_back(ISD::OutputArg(Flags, EVT(TLI.getPointerTy(DL)) /*vt*/,
1675  EVT(TLI.getPointerTy(DL)) /*argvt*/,
1676  true /*isfixed*/, 1 /*origidx*/,
1677  0 /*partOffs*/));
1678  // Create SDNode for the swifterror virtual register.
1679  OutVals.push_back(
1680  DAG.getRegister(FuncInfo.getOrCreateSwiftErrorVRegUseAt(
1681  &I, FuncInfo.MBB, FuncInfo.SwiftErrorArg).first,
1682  EVT(TLI.getPointerTy(DL))));
1683  }
1684 
1685  bool isVarArg = DAG.getMachineFunction().getFunction().isVarArg();
1686  CallingConv::ID CallConv =
1688  Chain = DAG.getTargetLoweringInfo().LowerReturn(
1689  Chain, CallConv, isVarArg, Outs, OutVals, getCurSDLoc(), DAG);
1690 
1691  // Verify that the target's LowerReturn behaved as expected.
1692  assert(Chain.getNode() && Chain.getValueType() == MVT::Other &&
1693  "LowerReturn didn't return a valid chain!");
1694 
1695  // Update the DAG with the new chain value resulting from return lowering.
1696  DAG.setRoot(Chain);
1697 }
1698 
1699 /// CopyToExportRegsIfNeeded - If the given value has virtual registers
1700 /// created for it, emit nodes to copy the value into the virtual
1701 /// registers.
1703  // Skip empty types
1704  if (V->getType()->isEmptyTy())
1705  return;
1706 
1707  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
1708  if (VMI != FuncInfo.ValueMap.end()) {
1709  assert(!V->use_empty() && "Unused value assigned virtual registers!");
1710  CopyValueToVirtualRegister(V, VMI->second);
1711  }
1712 }
1713 
1714 /// ExportFromCurrentBlock - If this condition isn't known to be exported from
1715 /// the current basic block, add it to ValueMap now so that we'll get a
1716 /// CopyTo/FromReg.
1718  // No need to export constants.
1719  if (!isa<Instruction>(V) && !isa<Argument>(V)) return;
1720 
1721  // Already exported?
1722  if (FuncInfo.isExportedInst(V)) return;
1723 
1724  unsigned Reg = FuncInfo.InitializeRegForValue(V);
1725  CopyValueToVirtualRegister(V, Reg);
1726 }
1727 
1729  const BasicBlock *FromBB) {
1730  // The operands of the setcc have to be in this block. We don't know
1731  // how to export them from some other block.
1732  if (const Instruction *VI = dyn_cast<Instruction>(V)) {
1733  // Can export from current BB.
1734  if (VI->getParent() == FromBB)
1735  return true;
1736 
1737  // Is already exported, noop.
1738  return FuncInfo.isExportedInst(V);
1739  }
1740 
1741  // If this is an argument, we can export it if the BB is the entry block or
1742  // if it is already exported.
1743  if (isa<Argument>(V)) {
1744  if (FromBB == &FromBB->getParent()->getEntryBlock())
1745  return true;
1746 
1747  // Otherwise, can only export this if it is already exported.
1748  return FuncInfo.isExportedInst(V);
1749  }
1750 
1751  // Otherwise, constants can always be exported.
1752  return true;
1753 }
1754 
1755 /// Return branch probability calculated by BranchProbabilityInfo for IR blocks.
1757 SelectionDAGBuilder::getEdgeProbability(const MachineBasicBlock *Src,
1758  const MachineBasicBlock *Dst) const {
1759  BranchProbabilityInfo *BPI = FuncInfo.BPI;
1760  const BasicBlock *SrcBB = Src->getBasicBlock();
1761  const BasicBlock *DstBB = Dst->getBasicBlock();
1762  if (!BPI) {
1763  // If BPI is not available, set the default probability as 1 / N, where N is
1764  // the number of successors.
1765  auto SuccSize = std::max<uint32_t>(succ_size(SrcBB), 1);
1766  return BranchProbability(1, SuccSize);
1767  }
1768  return BPI->getEdgeProbability(SrcBB, DstBB);
1769 }
1770 
1771 void SelectionDAGBuilder::addSuccessorWithProb(MachineBasicBlock *Src,
1772  MachineBasicBlock *Dst,
1773  BranchProbability Prob) {
1774  if (!FuncInfo.BPI)
1775  Src->addSuccessorWithoutProb(Dst);
1776  else {
1777  if (Prob.isUnknown())
1778  Prob = getEdgeProbability(Src, Dst);
1779  Src->addSuccessor(Dst, Prob);
1780  }
1781 }
1782 
1783 static bool InBlock(const Value *V, const BasicBlock *BB) {
1784  if (const Instruction *I = dyn_cast<Instruction>(V))
1785  return I->getParent() == BB;
1786  return true;
1787 }
1788 
1789 /// EmitBranchForMergedCondition - Helper method for FindMergedConditions.
1790 /// This function emits a branch and is used at the leaves of an OR or an
1791 /// AND operator tree.
1792 void
1794  MachineBasicBlock *TBB,
1795  MachineBasicBlock *FBB,
1796  MachineBasicBlock *CurBB,
1797  MachineBasicBlock *SwitchBB,
1798  BranchProbability TProb,
1799  BranchProbability FProb,
1800  bool InvertCond) {
1801  const BasicBlock *BB = CurBB->getBasicBlock();
1802 
1803  // If the leaf of the tree is a comparison, merge the condition into
1804  // the caseblock.
1805  if (const CmpInst *BOp = dyn_cast<CmpInst>(Cond)) {
1806  // The operands of the cmp have to be in this block. We don't know
1807  // how to export them from some other block. If this is the first block
1808  // of the sequence, no exporting is needed.
1809  if (CurBB == SwitchBB ||
1810  (isExportableFromCurrentBlock(BOp->getOperand(0), BB) &&
1811  isExportableFromCurrentBlock(BOp->getOperand(1), BB))) {
1812  ISD::CondCode Condition;
1813  if (const ICmpInst *IC = dyn_cast<ICmpInst>(Cond)) {
1814  ICmpInst::Predicate Pred =
1815  InvertCond ? IC->getInversePredicate() : IC->getPredicate();
1816  Condition = getICmpCondCode(Pred);
1817  } else {
1818  const FCmpInst *FC = cast<FCmpInst>(Cond);
1819  FCmpInst::Predicate Pred =
1820  InvertCond ? FC->getInversePredicate() : FC->getPredicate();
1821  Condition = getFCmpCondCode(Pred);
1822  if (TM.Options.NoNaNsFPMath)
1823  Condition = getFCmpCodeWithoutNaN(Condition);
1824  }
1825 
1826  CaseBlock CB(Condition, BOp->getOperand(0), BOp->getOperand(1), nullptr,
1827  TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1828  SwitchCases.push_back(CB);
1829  return;
1830  }
1831  }
1832 
1833  // Create a CaseBlock record representing this branch.
1834  ISD::CondCode Opc = InvertCond ? ISD::SETNE : ISD::SETEQ;
1835  CaseBlock CB(Opc, Cond, ConstantInt::getTrue(*DAG.getContext()),
1836  nullptr, TBB, FBB, CurBB, getCurSDLoc(), TProb, FProb);
1837  SwitchCases.push_back(CB);
1838 }
1839 
1841  MachineBasicBlock *TBB,
1842  MachineBasicBlock *FBB,
1843  MachineBasicBlock *CurBB,
1844  MachineBasicBlock *SwitchBB,
1846  BranchProbability TProb,
1847  BranchProbability FProb,
1848  bool InvertCond) {
1849  // Skip over not part of the tree and remember to invert op and operands at
1850  // next level.
1851  Value *NotCond;
1852  if (match(Cond, m_OneUse(m_Not(m_Value(NotCond)))) &&
1853  InBlock(NotCond, CurBB->getBasicBlock())) {
1854  FindMergedConditions(NotCond, TBB, FBB, CurBB, SwitchBB, Opc, TProb, FProb,
1855  !InvertCond);
1856  return;
1857  }
1858 
1859  const Instruction *BOp = dyn_cast<Instruction>(Cond);
1860  // Compute the effective opcode for Cond, taking into account whether it needs
1861  // to be inverted, e.g.
1862  // and (not (or A, B)), C
1863  // gets lowered as
1864  // and (and (not A, not B), C)
1865  unsigned BOpc = 0;
1866  if (BOp) {
1867  BOpc = BOp->getOpcode();
1868  if (InvertCond) {
1869  if (BOpc == Instruction::And)
1870  BOpc = Instruction::Or;
1871  else if (BOpc == Instruction::Or)
1872  BOpc = Instruction::And;
1873  }
1874  }
1875 
1876  // If this node is not part of the or/and tree, emit it as a branch.
1877  if (!BOp || !(isa<BinaryOperator>(BOp) || isa<CmpInst>(BOp)) ||
1878  BOpc != unsigned(Opc) || !BOp->hasOneUse() ||
1879  BOp->getParent() != CurBB->getBasicBlock() ||
1880  !InBlock(BOp->getOperand(0), CurBB->getBasicBlock()) ||
1881  !InBlock(BOp->getOperand(1), CurBB->getBasicBlock())) {
1882  EmitBranchForMergedCondition(Cond, TBB, FBB, CurBB, SwitchBB,
1883  TProb, FProb, InvertCond);
1884  return;
1885  }
1886 
1887  // Create TmpBB after CurBB.
1888  MachineFunction::iterator BBI(CurBB);
1889  MachineFunction &MF = DAG.getMachineFunction();
1891  CurBB->getParent()->insert(++BBI, TmpBB);
1892 
1893  if (Opc == Instruction::Or) {
1894  // Codegen X | Y as:
1895  // BB1:
1896  // jmp_if_X TBB
1897  // jmp TmpBB
1898  // TmpBB:
1899  // jmp_if_Y TBB
1900  // jmp FBB
1901  //
1902 
1903  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1904  // The requirement is that
1905  // TrueProb for BB1 + (FalseProb for BB1 * TrueProb for TmpBB)
1906  // = TrueProb for original BB.
1907  // Assuming the original probabilities are A and B, one choice is to set
1908  // BB1's probabilities to A/2 and A/2+B, and set TmpBB's probabilities to
1909  // A/(1+B) and 2B/(1+B). This choice assumes that
1910  // TrueProb for BB1 == FalseProb for BB1 * TrueProb for TmpBB.
1911  // Another choice is to assume TrueProb for BB1 equals to TrueProb for
1912  // TmpBB, but the math is more complicated.
1913 
1914  auto NewTrueProb = TProb / 2;
1915  auto NewFalseProb = TProb / 2 + FProb;
1916  // Emit the LHS condition.
1917  FindMergedConditions(BOp->getOperand(0), TBB, TmpBB, CurBB, SwitchBB, Opc,
1918  NewTrueProb, NewFalseProb, InvertCond);
1919 
1920  // Normalize A/2 and B to get A/(1+B) and 2B/(1+B).
1921  SmallVector<BranchProbability, 2> Probs{TProb / 2, FProb};
1922  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1923  // Emit the RHS condition into TmpBB.
1924  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1925  Probs[0], Probs[1], InvertCond);
1926  } else {
1927  assert(Opc == Instruction::And && "Unknown merge op!");
1928  // Codegen X & Y as:
1929  // BB1:
1930  // jmp_if_X TmpBB
1931  // jmp FBB
1932  // TmpBB:
1933  // jmp_if_Y TBB
1934  // jmp FBB
1935  //
1936  // This requires creation of TmpBB after CurBB.
1937 
1938  // We have flexibility in setting Prob for BB1 and Prob for TmpBB.
1939  // The requirement is that
1940  // FalseProb for BB1 + (TrueProb for BB1 * FalseProb for TmpBB)
1941  // = FalseProb for original BB.
1942  // Assuming the original probabilities are A and B, one choice is to set
1943  // BB1's probabilities to A+B/2 and B/2, and set TmpBB's probabilities to
1944  // 2A/(1+A) and B/(1+A). This choice assumes that FalseProb for BB1 ==
1945  // TrueProb for BB1 * FalseProb for TmpBB.
1946 
1947  auto NewTrueProb = TProb + FProb / 2;
1948  auto NewFalseProb = FProb / 2;
1949  // Emit the LHS condition.
1950  FindMergedConditions(BOp->getOperand(0), TmpBB, FBB, CurBB, SwitchBB, Opc,
1951  NewTrueProb, NewFalseProb, InvertCond);
1952 
1953  // Normalize A and B/2 to get 2A/(1+A) and B/(1+A).
1954  SmallVector<BranchProbability, 2> Probs{TProb, FProb / 2};
1955  BranchProbability::normalizeProbabilities(Probs.begin(), Probs.end());
1956  // Emit the RHS condition into TmpBB.
1957  FindMergedConditions(BOp->getOperand(1), TBB, FBB, TmpBB, SwitchBB, Opc,
1958  Probs[0], Probs[1], InvertCond);
1959  }
1960 }
1961 
1962 /// If the set of cases should be emitted as a series of branches, return true.
1963 /// If we should emit this as a bunch of and/or'd together conditions, return
1964 /// false.
1965 bool
1966 SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector<CaseBlock> &Cases) {
1967  if (Cases.size() != 2) return true;
1968 
1969  // If this is two comparisons of the same values or'd or and'd together, they
1970  // will get folded into a single comparison, so don't emit two blocks.
1971  if ((Cases[0].CmpLHS == Cases[1].CmpLHS &&
1972  Cases[0].CmpRHS == Cases[1].CmpRHS) ||
1973  (Cases[0].CmpRHS == Cases[1].CmpLHS &&
1974  Cases[0].CmpLHS == Cases[1].CmpRHS)) {
1975  return false;
1976  }
1977 
1978  // Handle: (X != null) | (Y != null) --> (X|Y) != 0
1979  // Handle: (X == null) & (Y == null) --> (X|Y) == 0
1980  if (Cases[0].CmpRHS == Cases[1].CmpRHS &&
1981  Cases[0].CC == Cases[1].CC &&
1982  isa<Constant>(Cases[0].CmpRHS) &&
1983  cast<Constant>(Cases[0].CmpRHS)->isNullValue()) {
1984  if (Cases[0].CC == ISD::SETEQ && Cases[0].TrueBB == Cases[1].ThisBB)
1985  return false;
1986  if (Cases[0].CC == ISD::SETNE && Cases[0].FalseBB == Cases[1].ThisBB)
1987  return false;
1988  }
1989 
1990  return true;
1991 }
1992 
1993 void SelectionDAGBuilder::visitBr(const BranchInst &I) {
1994  MachineBasicBlock *BrMBB = FuncInfo.MBB;
1995 
1996  // Update machine-CFG edges.
1997  MachineBasicBlock *Succ0MBB = FuncInfo.MBBMap[I.getSuccessor(0)];
1998 
1999  if (I.isUnconditional()) {
2000  // Update machine-CFG edges.
2001  BrMBB->addSuccessor(Succ0MBB);
2002 
2003  // If this is not a fall-through branch or optimizations are switched off,
2004  // emit the branch.
2005  if (Succ0MBB != NextBlock(BrMBB) || TM.getOptLevel() == CodeGenOpt::None)
2006  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2007  MVT::Other, getControlRoot(),
2008  DAG.getBasicBlock(Succ0MBB)));
2009 
2010  return;
2011  }
2012 
2013  // If this condition is one of the special cases we handle, do special stuff
2014  // now.
2015  const Value *CondVal = I.getCondition();
2016  MachineBasicBlock *Succ1MBB = FuncInfo.MBBMap[I.getSuccessor(1)];
2017 
2018  // If this is a series of conditions that are or'd or and'd together, emit
2019  // this as a sequence of branches instead of setcc's with and/or operations.
2020  // As long as jumps are not expensive, this should improve performance.
2021  // For example, instead of something like:
2022  // cmp A, B
2023  // C = seteq
2024  // cmp D, E
2025  // F = setle
2026  // or C, F
2027  // jnz foo
2028  // Emit:
2029  // cmp A, B
2030  // je foo
2031  // cmp D, E
2032  // jle foo
2033  if (const BinaryOperator *BOp = dyn_cast<BinaryOperator>(CondVal)) {
2034  Instruction::BinaryOps Opcode = BOp->getOpcode();
2035  if (!DAG.getTargetLoweringInfo().isJumpExpensive() && BOp->hasOneUse() &&
2037  (Opcode == Instruction::And || Opcode == Instruction::Or)) {
2038  FindMergedConditions(BOp, Succ0MBB, Succ1MBB, BrMBB, BrMBB,
2039  Opcode,
2040  getEdgeProbability(BrMBB, Succ0MBB),
2041  getEdgeProbability(BrMBB, Succ1MBB),
2042  /*InvertCond=*/false);
2043  // If the compares in later blocks need to use values not currently
2044  // exported from this block, export them now. This block should always
2045  // be the first entry.
2046  assert(SwitchCases[0].ThisBB == BrMBB && "Unexpected lowering!");
2047 
2048  // Allow some cases to be rejected.
2049  if (ShouldEmitAsBranches(SwitchCases)) {
2050  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i) {
2051  ExportFromCurrentBlock(SwitchCases[i].CmpLHS);
2052  ExportFromCurrentBlock(SwitchCases[i].CmpRHS);
2053  }
2054 
2055  // Emit the branch for this block.
2056  visitSwitchCase(SwitchCases[0], BrMBB);
2057  SwitchCases.erase(SwitchCases.begin());
2058  return;
2059  }
2060 
2061  // Okay, we decided not to do this, remove any inserted MBB's and clear
2062  // SwitchCases.
2063  for (unsigned i = 1, e = SwitchCases.size(); i != e; ++i)
2064  FuncInfo.MF->erase(SwitchCases[i].ThisBB);
2065 
2066  SwitchCases.clear();
2067  }
2068  }
2069 
2070  // Create a CaseBlock record representing this branch.
2071  CaseBlock CB(ISD::SETEQ, CondVal, ConstantInt::getTrue(*DAG.getContext()),
2072  nullptr, Succ0MBB, Succ1MBB, BrMBB, getCurSDLoc());
2073 
2074  // Use visitSwitchCase to actually insert the fast branch sequence for this
2075  // cond branch.
2076  visitSwitchCase(CB, BrMBB);
2077 }
2078 
2079 /// visitSwitchCase - Emits the necessary code to represent a single node in
2080 /// the binary search tree resulting from lowering a switch instruction.
2082  MachineBasicBlock *SwitchBB) {
2083  SDValue Cond;
2084  SDValue CondLHS = getValue(CB.CmpLHS);
2085  SDLoc dl = CB.DL;
2086 
2087  // Build the setcc now.
2088  if (!CB.CmpMHS) {
2089  // Fold "(X == true)" to X and "(X == false)" to !X to
2090  // handle common cases produced by branch lowering.
2091  if (CB.CmpRHS == ConstantInt::getTrue(*DAG.getContext()) &&
2092  CB.CC == ISD::SETEQ)
2093  Cond = CondLHS;
2094  else if (CB.CmpRHS == ConstantInt::getFalse(*DAG.getContext()) &&
2095  CB.CC == ISD::SETEQ) {
2096  SDValue True = DAG.getConstant(1, dl, CondLHS.getValueType());
2097  Cond = DAG.getNode(ISD::XOR, dl, CondLHS.getValueType(), CondLHS, True);
2098  } else
2099  Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC);
2100  } else {
2101  assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now");
2102 
2103  const APInt& Low = cast<ConstantInt>(CB.CmpLHS)->getValue();
2104  const APInt& High = cast<ConstantInt>(CB.CmpRHS)->getValue();
2105 
2106  SDValue CmpOp = getValue(CB.CmpMHS);
2107  EVT VT = CmpOp.getValueType();
2108 
2109  if (cast<ConstantInt>(CB.CmpLHS)->isMinValue(true)) {
2110  Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, dl, VT),
2111  ISD::SETLE);
2112  } else {
2113  SDValue SUB = DAG.getNode(ISD::SUB, dl,
2114  VT, CmpOp, DAG.getConstant(Low, dl, VT));
2115  Cond = DAG.getSetCC(dl, MVT::i1, SUB,
2116  DAG.getConstant(High-Low, dl, VT), ISD::SETULE);
2117  }
2118  }
2119 
2120  // Update successor info
2121  addSuccessorWithProb(SwitchBB, CB.TrueBB, CB.TrueProb);
2122  // TrueBB and FalseBB are always different unless the incoming IR is
2123  // degenerate. This only happens when running llc on weird IR.
2124  if (CB.TrueBB != CB.FalseBB)
2125  addSuccessorWithProb(SwitchBB, CB.FalseBB, CB.FalseProb);
2126  SwitchBB->normalizeSuccProbs();
2127 
2128  // If the lhs block is the next block, invert the condition so that we can
2129  // fall through to the lhs instead of the rhs block.
2130  if (CB.TrueBB == NextBlock(SwitchBB)) {
2131  std::swap(CB.TrueBB, CB.FalseBB);
2132  SDValue True = DAG.getConstant(1, dl, Cond.getValueType());
2133  Cond = DAG.getNode(ISD::XOR, dl, Cond.getValueType(), Cond, True);
2134  }
2135 
2136  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2137  MVT::Other, getControlRoot(), Cond,
2138  DAG.getBasicBlock(CB.TrueBB));
2139 
2140  // Insert the false branch. Do this even if it's a fall through branch,
2141  // this makes it easier to do DAG optimizations which require inverting
2142  // the branch condition.
2143  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2144  DAG.getBasicBlock(CB.FalseBB));
2145 
2146  DAG.setRoot(BrCond);
2147 }
2148 
2149 /// visitJumpTable - Emit JumpTable node in the current MBB
2151  // Emit the code for the jump table
2152  assert(JT.Reg != -1U && "Should lower JT Header first!");
2154  SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(),
2155  JT.Reg, PTy);
2156  SDValue Table = DAG.getJumpTable(JT.JTI, PTy);
2157  SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(),
2158  MVT::Other, Index.getValue(1),
2159  Table, Index);
2160  DAG.setRoot(BrJumpTable);
2161 }
2162 
2163 /// visitJumpTableHeader - This function emits necessary code to produce index
2164 /// in the JumpTable from switch case.
2166  JumpTableHeader &JTH,
2167  MachineBasicBlock *SwitchBB) {
2168  SDLoc dl = getCurSDLoc();
2169 
2170  // Subtract the lowest switch case value from the value being switched on and
2171  // conditional branch to default mbb if the result is greater than the
2172  // difference between smallest and largest cases.
2173  SDValue SwitchOp = getValue(JTH.SValue);
2174  EVT VT = SwitchOp.getValueType();
2175  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2176  DAG.getConstant(JTH.First, dl, VT));
2177 
2178  // The SDNode we just created, which holds the value being switched on minus
2179  // the smallest case value, needs to be copied to a virtual register so it
2180  // can be used as an index into the jump table in a subsequent basic block.
2181  // This value may be smaller or larger than the target's pointer type, and
2182  // therefore require extension or truncating.
2183  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2184  SwitchOp = DAG.getZExtOrTrunc(Sub, dl, TLI.getPointerTy(DAG.getDataLayout()));
2185 
2186  unsigned JumpTableReg =
2187  FuncInfo.CreateReg(TLI.getPointerTy(DAG.getDataLayout()));
2188  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl,
2189  JumpTableReg, SwitchOp);
2190  JT.Reg = JumpTableReg;
2191 
2192  // Emit the range check for the jump table, and branch to the default block
2193  // for the switch statement if the value being switched on exceeds the largest
2194  // case in the switch.
2195  SDValue CMP = DAG.getSetCC(
2196  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2197  Sub.getValueType()),
2198  Sub, DAG.getConstant(JTH.Last - JTH.First, dl, VT), ISD::SETUGT);
2199 
2200  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2201  MVT::Other, CopyTo, CMP,
2202  DAG.getBasicBlock(JT.Default));
2203 
2204  // Avoid emitting unnecessary branches to the next block.
2205  if (JT.MBB != NextBlock(SwitchBB))
2206  BrCond = DAG.getNode(ISD::BR, dl, MVT::Other, BrCond,
2207  DAG.getBasicBlock(JT.MBB));
2208 
2209  DAG.setRoot(BrCond);
2210 }
2211 
2212 /// Create a LOAD_STACK_GUARD node, and let it carry the target specific global
2213 /// variable if there exists one.
2215  SDValue &Chain) {
2216  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2217  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2218  MachineFunction &MF = DAG.getMachineFunction();
2219  Value *Global = TLI.getSDagStackGuard(*MF.getFunction().getParent());
2220  MachineSDNode *Node =
2221  DAG.getMachineNode(TargetOpcode::LOAD_STACK_GUARD, DL, PtrTy, Chain);
2222  if (Global) {
2223  MachinePointerInfo MPInfo(Global);
2227  MPInfo, Flags, PtrTy.getSizeInBits() / 8, DAG.getEVTAlignment(PtrTy));
2228  DAG.setNodeMemRefs(Node, {MemRef});
2229  }
2230  return SDValue(Node, 0);
2231 }
2232 
2233 /// Codegen a new tail for a stack protector check ParentMBB which has had its
2234 /// tail spliced into a stack protector check success bb.
2235 ///
2236 /// For a high level explanation of how this fits into the stack protector
2237 /// generation see the comment on the declaration of class
2238 /// StackProtectorDescriptor.
2239 void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD,
2240  MachineBasicBlock *ParentBB) {
2241 
2242  // First create the loads to the guard/stack slot for the comparison.
2243  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2244  EVT PtrTy = TLI.getPointerTy(DAG.getDataLayout());
2245 
2246  MachineFrameInfo &MFI = ParentBB->getParent()->getFrameInfo();
2247  int FI = MFI.getStackProtectorIndex();
2248 
2249  SDValue Guard;
2250  SDLoc dl = getCurSDLoc();
2251  SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy);
2252  const Module &M = *ParentBB->getParent()->getFunction().getParent();
2253  unsigned Align = DL->getPrefTypeAlignment(Type::getInt8PtrTy(M.getContext()));
2254 
2255  // Generate code to load the content of the guard slot.
2256  SDValue GuardVal = DAG.getLoad(
2257  PtrTy, dl, DAG.getEntryNode(), StackSlotPtr,
2260 
2261  if (TLI.useStackGuardXorFP())
2262  GuardVal = TLI.emitStackGuardXorFP(DAG, GuardVal, dl);
2263 
2264  // Retrieve guard check function, nullptr if instrumentation is inlined.
2265  if (const Value *GuardCheck = TLI.getSSPStackGuardCheck(M)) {
2266  // The target provides a guard check function to validate the guard value.
2267  // Generate a call to that function with the content of the guard slot as
2268  // argument.
2269  auto *Fn = cast<Function>(GuardCheck);
2270  FunctionType *FnTy = Fn->getFunctionType();
2271  assert(FnTy->getNumParams() == 1 && "Invalid function signature");
2272 
2275  Entry.Node = GuardVal;
2276  Entry.Ty = FnTy->getParamType(0);
2277  if (Fn->hasAttribute(1, Attribute::AttrKind::InReg))
2278  Entry.IsInReg = true;
2279  Args.push_back(Entry);
2280 
2282  CLI.setDebugLoc(getCurSDLoc())
2283  .setChain(DAG.getEntryNode())
2284  .setCallee(Fn->getCallingConv(), FnTy->getReturnType(),
2285  getValue(GuardCheck), std::move(Args));
2286 
2287  std::pair<SDValue, SDValue> Result = TLI.LowerCallTo(CLI);
2288  DAG.setRoot(Result.second);
2289  return;
2290  }
2291 
2292  // If useLoadStackGuardNode returns true, generate LOAD_STACK_GUARD.
2293  // Otherwise, emit a volatile load to retrieve the stack guard value.
2294  SDValue Chain = DAG.getEntryNode();
2295  if (TLI.useLoadStackGuardNode()) {
2296  Guard = getLoadStackGuard(DAG, dl, Chain);
2297  } else {
2298  const Value *IRGuard = TLI.getSDagStackGuard(M);
2299  SDValue GuardPtr = getValue(IRGuard);
2300 
2301  Guard =
2302  DAG.getLoad(PtrTy, dl, Chain, GuardPtr, MachinePointerInfo(IRGuard, 0),
2304  }
2305 
2306  // Perform the comparison via a subtract/getsetcc.
2307  EVT VT = Guard.getValueType();
2308  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, Guard, GuardVal);
2309 
2310  SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(DAG.getDataLayout(),
2311  *DAG.getContext(),
2312  Sub.getValueType()),
2313  Sub, DAG.getConstant(0, dl, VT), ISD::SETNE);
2314 
2315  // If the sub is not 0, then we know the guard/stackslot do not equal, so
2316  // branch to failure MBB.
2317  SDValue BrCond = DAG.getNode(ISD::BRCOND, dl,
2318  MVT::Other, GuardVal.getOperand(0),
2319  Cmp, DAG.getBasicBlock(SPD.getFailureMBB()));
2320  // Otherwise branch to success MBB.
2321  SDValue Br = DAG.getNode(ISD::BR, dl,
2322  MVT::Other, BrCond,
2323  DAG.getBasicBlock(SPD.getSuccessMBB()));
2324 
2325  DAG.setRoot(Br);
2326 }
2327 
2328 /// Codegen the failure basic block for a stack protector check.
2329 ///
2330 /// A failure stack protector machine basic block consists simply of a call to
2331 /// __stack_chk_fail().
2332 ///
2333 /// For a high level explanation of how this fits into the stack protector
2334 /// generation see the comment on the declaration of class
2335 /// StackProtectorDescriptor.
2336 void
2337 SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) {
2338  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2339  SDValue Chain =
2340  TLI.makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, MVT::isVoid,
2341  None, false, getCurSDLoc(), false, false).second;
2342  DAG.setRoot(Chain);
2343 }
2344 
2345 /// visitBitTestHeader - This function emits necessary code to produce value
2346 /// suitable for "bit tests"
2348  MachineBasicBlock *SwitchBB) {
2349  SDLoc dl = getCurSDLoc();
2350 
2351  // Subtract the minimum value
2352  SDValue SwitchOp = getValue(B.SValue);
2353  EVT VT = SwitchOp.getValueType();
2354  SDValue Sub = DAG.getNode(ISD::SUB, dl, VT, SwitchOp,
2355  DAG.getConstant(B.First, dl, VT));
2356 
2357  // Check range
2358  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2359  SDValue RangeCmp = DAG.getSetCC(
2360  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(),
2361  Sub.getValueType()),
2362  Sub, DAG.getConstant(B.Range, dl, VT), ISD::SETUGT);
2363 
2364  // Determine the type of the test operands.
2365  bool UsePtrType = false;
2366  if (!TLI.isTypeLegal(VT))
2367  UsePtrType = true;
2368  else {
2369  for (unsigned i = 0, e = B.Cases.size(); i != e; ++i)
2370  if (!isUIntN(VT.getSizeInBits(), B.Cases[i].Mask)) {
2371  // Switch table case range are encoded into series of masks.
2372  // Just use pointer type, it's guaranteed to fit.
2373  UsePtrType = true;
2374  break;
2375  }
2376  }
2377  if (UsePtrType) {
2378  VT = TLI.getPointerTy(DAG.getDataLayout());
2379  Sub = DAG.getZExtOrTrunc(Sub, dl, VT);
2380  }
2381 
2382  B.RegVT = VT.getSimpleVT();
2383  B.Reg = FuncInfo.CreateReg(B.RegVT);
2384  SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), dl, B.Reg, Sub);
2385 
2386  MachineBasicBlock* MBB = B.Cases[0].ThisBB;
2387 
2388  addSuccessorWithProb(SwitchBB, B.Default, B.DefaultProb);
2389  addSuccessorWithProb(SwitchBB, MBB, B.Prob);
2390  SwitchBB->normalizeSuccProbs();
2391 
2392  SDValue BrRange = DAG.getNode(ISD::BRCOND, dl,
2393  MVT::Other, CopyTo, RangeCmp,
2394  DAG.getBasicBlock(B.Default));
2395 
2396  // Avoid emitting unnecessary branches to the next block.
2397  if (MBB != NextBlock(SwitchBB))
2398  BrRange = DAG.getNode(ISD::BR, dl, MVT::Other, BrRange,
2399  DAG.getBasicBlock(MBB));
2400 
2401  DAG.setRoot(BrRange);
2402 }
2403 
2404 /// visitBitTestCase - this function produces one "bit test"
2406  MachineBasicBlock* NextMBB,
2407  BranchProbability BranchProbToNext,
2408  unsigned Reg,
2409  BitTestCase &B,
2410  MachineBasicBlock *SwitchBB) {
2411  SDLoc dl = getCurSDLoc();
2412  MVT VT = BB.RegVT;
2413  SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), dl, Reg, VT);
2414  SDValue Cmp;
2415  unsigned PopCount = countPopulation(B.Mask);
2416  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2417  if (PopCount == 1) {
2418  // Testing for a single bit; just compare the shift count with what it
2419  // would need to be to shift a 1 bit in that position.
2420  Cmp = DAG.getSetCC(
2421  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2422  ShiftOp, DAG.getConstant(countTrailingZeros(B.Mask), dl, VT),
2423  ISD::SETEQ);
2424  } else if (PopCount == BB.Range) {
2425  // There is only one zero bit in the range, test for it directly.
2426  Cmp = DAG.getSetCC(
2427  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2428  ShiftOp, DAG.getConstant(countTrailingOnes(B.Mask), dl, VT),
2429  ISD::SETNE);
2430  } else {
2431  // Make desired shift
2432  SDValue SwitchVal = DAG.getNode(ISD::SHL, dl, VT,
2433  DAG.getConstant(1, dl, VT), ShiftOp);
2434 
2435  // Emit bit tests and jumps
2436  SDValue AndOp = DAG.getNode(ISD::AND, dl,
2437  VT, SwitchVal, DAG.getConstant(B.Mask, dl, VT));
2438  Cmp = DAG.getSetCC(
2439  dl, TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT),
2440  AndOp, DAG.getConstant(0, dl, VT), ISD::SETNE);
2441  }
2442 
2443  // The branch probability from SwitchBB to B.TargetBB is B.ExtraProb.
2444  addSuccessorWithProb(SwitchBB, B.TargetBB, B.ExtraProb);
2445  // The branch probability from SwitchBB to NextMBB is BranchProbToNext.
2446  addSuccessorWithProb(SwitchBB, NextMBB, BranchProbToNext);
2447  // It is not guaranteed that the sum of B.ExtraProb and BranchProbToNext is
2448  // one as they are relative probabilities (and thus work more like weights),
2449  // and hence we need to normalize them to let the sum of them become one.
2450  SwitchBB->normalizeSuccProbs();
2451 
2452  SDValue BrAnd = DAG.getNode(ISD::BRCOND, dl,
2453  MVT::Other, getControlRoot(),
2454  Cmp, DAG.getBasicBlock(B.TargetBB));
2455 
2456  // Avoid emitting unnecessary branches to the next block.
2457  if (NextMBB != NextBlock(SwitchBB))
2458  BrAnd = DAG.getNode(ISD::BR, dl, MVT::Other, BrAnd,
2459  DAG.getBasicBlock(NextMBB));
2460 
2461  DAG.setRoot(BrAnd);
2462 }
2463 
2464 void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) {
2465  MachineBasicBlock *InvokeMBB = FuncInfo.MBB;
2466 
2467  // Retrieve successors. Look through artificial IR level blocks like
2468  // catchswitch for successors.
2469  MachineBasicBlock *Return = FuncInfo.MBBMap[I.getSuccessor(0)];
2470  const BasicBlock *EHPadBB = I.getSuccessor(1);
2471 
2472  // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
2473  // have to do anything here to lower funclet bundles.
2475  {LLVMContext::OB_deopt, LLVMContext::OB_funclet}) &&
2476  "Cannot lower invokes with arbitrary operand bundles yet!");
2477 
2478  const Value *Callee(I.getCalledValue());
2479  const Function *Fn = dyn_cast<Function>(Callee);
2480  if (isa<InlineAsm>(Callee))
2481  visitInlineAsm(&I);
2482  else if (Fn && Fn->isIntrinsic()) {
2483  switch (Fn->getIntrinsicID()) {
2484  default:
2485  llvm_unreachable("Cannot invoke this intrinsic");
2486  case Intrinsic::donothing:
2487  // Ignore invokes to @llvm.donothing: jump directly to the next BB.
2488  break;
2489  case Intrinsic::experimental_patchpoint_void:
2490  case Intrinsic::experimental_patchpoint_i64:
2491  visitPatchpoint(&I, EHPadBB);
2492  break;
2493  case Intrinsic::experimental_gc_statepoint:
2494  LowerStatepoint(ImmutableStatepoint(&I), EHPadBB);
2495  break;
2496  }
2498  // Currently we do not lower any intrinsic calls with deopt operand bundles.
2499  // Eventually we will support lowering the @llvm.experimental.deoptimize
2500  // intrinsic, and right now there are no plans to support other intrinsics
2501  // with deopt state.
2502  LowerCallSiteWithDeoptBundle(&I, getValue(Callee), EHPadBB);
2503  } else {
2504  LowerCallTo(&I, getValue(Callee), false, EHPadBB);
2505  }
2506 
2507  // If the value of the invoke is used outside of its defining block, make it
2508  // available as a virtual register.
2509  // We already took care of the exported value for the statepoint instruction
2510  // during call to the LowerStatepoint.
2511  if (!isStatepoint(I)) {
2512  CopyToExportRegsIfNeeded(&I);
2513  }
2514 
2516  BranchProbabilityInfo *BPI = FuncInfo.BPI;
2517  BranchProbability EHPadBBProb =
2518  BPI ? BPI->getEdgeProbability(InvokeMBB->getBasicBlock(), EHPadBB)
2520  findUnwindDestinations(FuncInfo, EHPadBB, EHPadBBProb, UnwindDests);
2521 
2522  // Update successor info.
2523  addSuccessorWithProb(InvokeMBB, Return);
2524  for (auto &UnwindDest : UnwindDests) {
2525  UnwindDest.first->setIsEHPad();
2526  addSuccessorWithProb(InvokeMBB, UnwindDest.first, UnwindDest.second);
2527  }
2528  InvokeMBB->normalizeSuccProbs();
2529 
2530  // Drop into normal successor.
2531  DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(),
2532  MVT::Other, getControlRoot(),
2533  DAG.getBasicBlock(Return)));
2534 }
2535 
2536 void SelectionDAGBuilder::visitResume(const ResumeInst &RI) {
2537  llvm_unreachable("SelectionDAGBuilder shouldn't visit resume instructions!");
2538 }
2539 
2540 void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) {
2541  assert(FuncInfo.MBB->isEHPad() &&
2542  "Call to landingpad not in landing pad!");
2543 
2544  // If there aren't registers to copy the values into (e.g., during SjLj
2545  // exceptions), then don't bother to create these DAG nodes.
2546  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
2547  const Constant *PersonalityFn = FuncInfo.Fn->getPersonalityFn();
2548  if (TLI.getExceptionPointerRegister(PersonalityFn) == 0 &&
2549  TLI.getExceptionSelectorRegister(PersonalityFn) == 0)
2550  return;
2551 
2552  // If landingpad's return type is token type, we don't create DAG nodes
2553  // for its exception pointer and selector value. The extraction of exception
2554  // pointer or selector value from token type landingpads is not currently
2555  // supported.
2556  if (LP.getType()->isTokenTy())
2557  return;
2558 
2560  SDLoc dl = getCurSDLoc();
2561  ComputeValueVTs(TLI, DAG.getDataLayout(), LP.getType(), ValueVTs);
2562  assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported");
2563 
2564  // Get the two live-in registers as SDValues. The physregs have already been
2565  // copied into virtual registers.
2566  SDValue Ops[2];
2567  if (FuncInfo.ExceptionPointerVirtReg) {
2568  Ops[0] = DAG.getZExtOrTrunc(
2569  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2570  FuncInfo.ExceptionPointerVirtReg,
2571  TLI.getPointerTy(DAG.getDataLayout())),
2572  dl, ValueVTs[0]);
2573  } else {
2574  Ops[0] = DAG.getConstant(0, dl, TLI.getPointerTy(DAG.getDataLayout()));
2575  }
2576  Ops[1] = DAG.getZExtOrTrunc(
2577  DAG.getCopyFromReg(DAG.getEntryNode(), dl,
2578  FuncInfo.ExceptionSelectorVirtReg,
2579  TLI.getPointerTy(DAG.getDataLayout())),
2580  dl, ValueVTs[1]);
2581 
2582  // Merge into one.
2583  SDValue Res = DAG.getNode(ISD::MERGE_VALUES, dl,
2584  DAG.getVTList(ValueVTs), Ops);
2585  setValue(&LP, Res);
2586 }
2587 
2588 void SelectionDAGBuilder::sortAndRangeify(CaseClusterVector &Clusters) {
2589 #ifndef NDEBUG
2590  for (const CaseCluster &CC : Clusters)
2591  assert(CC.Low == CC.High && "Input clusters must be single-case");
2592 #endif
2593 
2594  llvm::sort(Clusters, [](const CaseCluster &a, const CaseCluster &b) {
2595  return a.Low->getValue().slt(b.Low->getValue());
2596  });
2597 
2598  // Merge adjacent clusters with the same destination.
2599  const unsigned N = Clusters.size();
2600  unsigned DstIndex = 0;
2601  for (unsigned SrcIndex = 0; SrcIndex < N; ++SrcIndex) {
2602  CaseCluster &CC = Clusters[SrcIndex];
2603  const ConstantInt *CaseVal = CC.Low;
2604  MachineBasicBlock *Succ = CC.MBB;
2605 
2606  if (DstIndex != 0 && Clusters[DstIndex - 1].MBB == Succ &&
2607  (CaseVal->getValue() - Clusters[DstIndex - 1].High->getValue()) == 1) {
2608  // If this case has the same successor and is a neighbour, merge it into
2609  // the previous cluster.
2610  Clusters[DstIndex - 1].High = CaseVal;
2611  Clusters[DstIndex - 1].Prob += CC.Prob;
2612  } else {
2613  std::memmove(&Clusters[DstIndex++], &Clusters[SrcIndex],
2614  sizeof(Clusters[SrcIndex]));
2615  }
2616  }
2617  Clusters.resize(DstIndex);
2618 }
2619 
2621  MachineBasicBlock *Last) {
2622  // Update JTCases.
2623  for (unsigned i = 0, e = JTCases.size(); i != e; ++i)
2624  if (JTCases[i].first.HeaderBB == First)
2625  JTCases[i].first.HeaderBB = Last;
2626 
2627  // Update BitTestCases.
2628  for (unsigned i = 0, e = BitTestCases.size(); i != e; ++i)
2629  if (BitTestCases[i].Parent == First)
2630  BitTestCases[i].Parent = Last;
2631 }
2632 
2633 void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) {
2634  MachineBasicBlock *IndirectBrMBB = FuncInfo.MBB;
2635 
2636  // Update machine-CFG edges with unique successors.
2638  for (unsigned i = 0, e = I.getNumSuccessors(); i != e; ++i) {
2639  BasicBlock *BB = I.getSuccessor(i);
2640  bool Inserted = Done.insert(BB).second;
2641  if (!Inserted)
2642  continue;
2643 
2644  MachineBasicBlock *Succ = FuncInfo.MBBMap[BB];
2645  addSuccessorWithProb(IndirectBrMBB, Succ);
2646  }
2647  IndirectBrMBB->normalizeSuccProbs();
2648 
2649  DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(),
2650  MVT::Other, getControlRoot(),
2651  getValue(I.getAddress())));
2652 }
2653 
2654 void SelectionDAGBuilder::visitUnreachable(const UnreachableInst &I) {
2655  if (!DAG.getTarget().Options.TrapUnreachable)
2656  return;
2657 
2658  // We may be able to ignore unreachable behind a noreturn call.
2660  const BasicBlock &BB = *I.getParent();
2661  if (&I != &BB.front()) {
2663  std::prev(BasicBlock::const_iterator(&I));
2664  if (const CallInst *Call = dyn_cast<CallInst>(&*PredI)) {
2665  if (Call->doesNotReturn())
2666  return;
2667  }
2668  }
2669  }
2670 
2671  DAG.setRoot(DAG.getNode(ISD::TRAP, getCurSDLoc(), MVT::Other, DAG.getRoot()));
2672 }
2673 
2674 void SelectionDAGBuilder::visitFSub(const User &I) {
2675  // -0.0 - X --> fneg
2676  Type *Ty = I.getType();
2677  if (isa<Constant>(I.getOperand(0)) &&
2679  SDValue Op2 = getValue(I.getOperand(1));
2680  setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(),
2681  Op2.getValueType(), Op2));
2682  return;
2683  }
2684 
2685  visitBinary(I, ISD::FSUB);
2686 }
2687 
2688 /// Checks if the given instruction performs a vector reduction, in which case
2689 /// we have the freedom to alter the elements in the result as long as the
2690 /// reduction of them stays unchanged.
2691 static bool isVectorReductionOp(const User *I) {
2692  const Instruction *Inst = dyn_cast<Instruction>(I);
2693  if (!Inst || !Inst->getType()->isVectorTy())
2694  return false;
2695 
2696  auto OpCode = Inst->getOpcode();
2697  switch (OpCode) {
2698  case Instruction::Add:
2699  case Instruction::Mul:
2700  case Instruction::And:
2701  case Instruction::Or:
2702  case Instruction::Xor:
2703  break;
2704  case Instruction::FAdd:
2705  case Instruction::FMul:
2706  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2707  if (FPOp->getFastMathFlags().isFast())
2708  break;
2710  default:
2711  return false;
2712  }
2713 
2714  unsigned ElemNum = Inst->getType()->getVectorNumElements();
2715  // Ensure the reduction size is a power of 2.
2716  if (!isPowerOf2_32(ElemNum))
2717  return false;
2718 
2719  unsigned ElemNumToReduce = ElemNum;
2720 
2721  // Do DFS search on the def-use chain from the given instruction. We only
2722  // allow four kinds of operations during the search until we reach the
2723  // instruction that extracts the first element from the vector:
2724  //
2725  // 1. The reduction operation of the same opcode as the given instruction.
2726  //
2727  // 2. PHI node.
2728  //
2729  // 3. ShuffleVector instruction together with a reduction operation that
2730  // does a partial reduction.
2731  //
2732  // 4. ExtractElement that extracts the first element from the vector, and we
2733  // stop searching the def-use chain here.
2734  //
2735  // 3 & 4 above perform a reduction on all elements of the vector. We push defs
2736  // from 1-3 to the stack to continue the DFS. The given instruction is not
2737  // a reduction operation if we meet any other instructions other than those
2738  // listed above.
2739 
2740  SmallVector<const User *, 16> UsersToVisit{Inst};
2742  bool ReduxExtracted = false;
2743 
2744  while (!UsersToVisit.empty()) {
2745  auto User = UsersToVisit.back();
2746  UsersToVisit.pop_back();
2747  if (!Visited.insert(User).second)
2748  continue;
2749 
2750  for (const auto &U : User->users()) {
2751  auto Inst = dyn_cast<Instruction>(U);
2752  if (!Inst)
2753  return false;
2754 
2755  if (Inst->getOpcode() == OpCode || isa<PHINode>(U)) {
2756  if (const FPMathOperator *FPOp = dyn_cast<const FPMathOperator>(Inst))
2757  if (!isa<PHINode>(FPOp) && !FPOp->getFastMathFlags().isFast())
2758  return false;
2759  UsersToVisit.push_back(U);
2760  } else if (const ShuffleVectorInst *ShufInst =
2761  dyn_cast<ShuffleVectorInst>(U)) {
2762  // Detect the following pattern: A ShuffleVector instruction together
2763  // with a reduction that do partial reduction on the first and second
2764  // ElemNumToReduce / 2 elements, and store the result in
2765  // ElemNumToReduce / 2 elements in another vector.
2766 
2767  unsigned ResultElements = ShufInst->getType()->getVectorNumElements();
2768  if (ResultElements < ElemNum)
2769  return false;
2770 
2771  if (ElemNumToReduce == 1)
2772  return false;
2773  if (!isa<UndefValue>(U->getOperand(1)))
2774  return false;
2775  for (unsigned i = 0; i < ElemNumToReduce / 2; ++i)
2776  if (ShufInst->getMaskValue(i) != int(i + ElemNumToReduce / 2))
2777  return false;
2778  for (unsigned i = ElemNumToReduce / 2; i < ElemNum; ++i)
2779  if (ShufInst->getMaskValue(i) != -1)
2780  return false;
2781 
2782  // There is only one user of this ShuffleVector instruction, which
2783  // must be a reduction operation.
2784  if (!U->hasOneUse())
2785  return false;
2786 
2787  auto U2 = dyn_cast<Instruction>(*U->user_begin());
2788  if (!U2 || U2->getOpcode() != OpCode)
2789  return false;
2790 
2791  // Check operands of the reduction operation.
2792  if ((U2->getOperand(0) == U->getOperand(0) && U2->getOperand(1) == U) ||
2793  (U2->getOperand(1) == U->getOperand(0) && U2->getOperand(0) == U)) {
2794  UsersToVisit.push_back(U2);
2795  ElemNumToReduce /= 2;
2796  } else
2797  return false;
2798  } else if (isa<ExtractElementInst>(U)) {
2799  // At this moment we should have reduced all elements in the vector.
2800  if (ElemNumToReduce != 1)
2801  return false;
2802 
2803  const ConstantInt *Val = dyn_cast<ConstantInt>(U->getOperand(1));
2804  if (!Val || !Val->isZero())
2805  return false;
2806 
2807  ReduxExtracted = true;
2808  } else
2809  return false;
2810  }
2811  }
2812  return ReduxExtracted;
2813 }
2814 
2815 void SelectionDAGBuilder::visitUnary(const User &I, unsigned Opcode) {
2816  SDNodeFlags Flags;
2817 
2818  SDValue Op = getValue(I.getOperand(0));
2819  SDValue UnNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op.getValueType(),
2820  Op, Flags);
2821  setValue(&I, UnNodeValue);
2822 }
2823 
2824 void SelectionDAGBuilder::visitBinary(const User &I, unsigned Opcode) {
2825  SDNodeFlags Flags;
2826  if (auto *OFBinOp = dyn_cast<OverflowingBinaryOperator>(&I)) {
2827  Flags.setNoSignedWrap(OFBinOp->hasNoSignedWrap());
2828  Flags.setNoUnsignedWrap(OFBinOp->hasNoUnsignedWrap());
2829  }
2830  if (auto *ExactOp = dyn_cast<PossiblyExactOperator>(&I)) {
2831  Flags.setExact(ExactOp->isExact());
2832  }
2833  if (isVectorReductionOp(&I)) {
2834  Flags.setVectorReduction(true);
2835  LLVM_DEBUG(dbgs() << "Detected a reduction operation:" << I << "\n");
2836  }
2837 
2838  SDValue Op1 = getValue(I.getOperand(0));
2839  SDValue Op2 = getValue(I.getOperand(1));
2840  SDValue BinNodeValue = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(),
2841  Op1, Op2, Flags);
2842  setValue(&I, BinNodeValue);
2843 }
2844 
2845 void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) {
2846  SDValue Op1 = getValue(I.getOperand(0));
2847  SDValue Op2 = getValue(I.getOperand(1));
2848 
2849  EVT ShiftTy = DAG.getTargetLoweringInfo().getShiftAmountTy(
2850  Op1.getValueType(), DAG.getDataLayout());
2851 
2852  // Coerce the shift amount to the right type if we can.
2853  if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) {
2854  unsigned ShiftSize = ShiftTy.getSizeInBits();
2855  unsigned Op2Size = Op2.getValueSizeInBits();
2856  SDLoc DL = getCurSDLoc();
2857 
2858  // If the operand is smaller than the shift count type, promote it.
2859  if (ShiftSize > Op2Size)
2860  Op2 = DAG.getNode(ISD::ZERO_EXTEND, DL, ShiftTy, Op2);
2861 
2862  // If the operand is larger than the shift count type but the shift
2863  // count type has enough bits to represent any shift value, truncate
2864  // it now. This is a common case and it exposes the truncate to
2865  // optimization early.
2866  else if (ShiftSize >= Log2_32_Ceil(Op2.getValueSizeInBits()))
2867  Op2 = DAG.getNode(ISD::TRUNCATE, DL, ShiftTy, Op2);
2868  // Otherwise we'll need to temporarily settle for some other convenient
2869  // type. Type legalization will make adjustments once the shiftee is split.
2870  else
2871  Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32);
2872  }
2873 
2874  bool nuw = false;
2875  bool nsw = false;
2876  bool exact = false;
2877 
2878  if (Opcode == ISD::SRL || Opcode == ISD::SRA || Opcode == ISD::SHL) {
2879 
2880  if (const OverflowingBinaryOperator *OFBinOp =
2881  dyn_cast<const OverflowingBinaryOperator>(&I)) {
2882  nuw = OFBinOp->hasNoUnsignedWrap();
2883  nsw = OFBinOp->hasNoSignedWrap();
2884  }
2885  if (const PossiblyExactOperator *ExactOp =
2886  dyn_cast<const PossiblyExactOperator>(&I))
2887  exact = ExactOp->isExact();
2888  }
2889  SDNodeFlags Flags;
2890  Flags.setExact(exact);
2891  Flags.setNoSignedWrap(nsw);
2892  Flags.setNoUnsignedWrap(nuw);
2893  SDValue Res = DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2,
2894  Flags);
2895  setValue(&I, Res);
2896 }
2897 
2898 void SelectionDAGBuilder::visitSDiv(const User &I) {
2899  SDValue Op1 = getValue(I.getOperand(0));
2900  SDValue Op2 = getValue(I.getOperand(1));
2901 
2902  SDNodeFlags Flags;
2903  Flags.setExact(isa<PossiblyExactOperator>(&I) &&
2904  cast<PossiblyExactOperator>(&I)->isExact());
2905  setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1,
2906  Op2, Flags));
2907 }
2908 
2909 void SelectionDAGBuilder::visitICmp(const User &I) {
2911  if (const ICmpInst *IC = dyn_cast<ICmpInst>(&I))
2912  predicate = IC->getPredicate();
2913  else if (const ConstantExpr *IC = dyn_cast<ConstantExpr>(&I))
2914  predicate = ICmpInst::Predicate(IC->getPredicate());
2915  SDValue Op1 = getValue(I.getOperand(0));
2916  SDValue Op2 = getValue(I.getOperand(1));
2917  ISD::CondCode Opcode = getICmpCondCode(predicate);
2918 
2919  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2920  I.getType());
2921  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode));
2922 }
2923 
2924 void SelectionDAGBuilder::visitFCmp(const User &I) {
2926  if (const FCmpInst *FC = dyn_cast<FCmpInst>(&I))
2927  predicate = FC->getPredicate();
2928  else if (const ConstantExpr *FC = dyn_cast<ConstantExpr>(&I))
2929  predicate = FCmpInst::Predicate(FC->getPredicate());
2930  SDValue Op1 = getValue(I.getOperand(0));
2931  SDValue Op2 = getValue(I.getOperand(1));
2932 
2933  ISD::CondCode Condition = getFCmpCondCode(predicate);
2934  auto *FPMO = dyn_cast<FPMathOperator>(&I);
2935  if ((FPMO && FPMO->hasNoNaNs()) || TM.Options.NoNaNsFPMath)
2936  Condition = getFCmpCodeWithoutNaN(Condition);
2937 
2938  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
2939  I.getType());
2940  setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition));
2941 }
2942 
2943 // Check if the condition of the select has one use or two users that are both
2944 // selects with the same condition.
2945 static bool hasOnlySelectUsers(const Value *Cond) {
2946  return llvm::all_of(Cond->users(), [](const Value *V) {
2947  return isa<SelectInst>(V);
2948  });
2949 }
2950 
2951 void SelectionDAGBuilder::visitSelect(const User &I) {
2954  ValueVTs);
2955  unsigned NumValues = ValueVTs.size();
2956  if (NumValues == 0) return;
2957 
2958  SmallVector<SDValue, 4> Values(NumValues);
2959  SDValue Cond = getValue(I.getOperand(0));
2960  SDValue LHSVal = getValue(I.getOperand(1));
2961  SDValue RHSVal = getValue(I.getOperand(2));
2962  auto BaseOps = {Cond};
2963  ISD::NodeType OpCode = Cond.getValueType().isVector() ?
2965 
2966  // Min/max matching is only viable if all output VTs are the same.
2967  if (is_splat(ValueVTs)) {
2968  EVT VT = ValueVTs[0];
2969  LLVMContext &Ctx = *DAG.getContext();
2970  auto &TLI = DAG.getTargetLoweringInfo();
2971 
2972  // We care about the legality of the operation after it has been type
2973  // legalized.
2974  while (TLI.getTypeAction(Ctx, VT) != TargetLoweringBase::TypeLegal &&
2975  VT != TLI.getTypeToTransformTo(Ctx, VT))
2976  VT = TLI.getTypeToTransformTo(Ctx, VT);
2977 
2978  // If the vselect is legal, assume we want to leave this as a vector setcc +
2979  // vselect. Otherwise, if this is going to be scalarized, we want to see if
2980  // min/max is legal on the scalar type.
2981  bool UseScalarMinMax = VT.isVector() &&
2982  !TLI.isOperationLegalOrCustom(ISD::VSELECT, VT);
2983 
2984  Value *LHS, *RHS;
2985  auto SPR = matchSelectPattern(const_cast<User*>(&I), LHS, RHS);
2987  switch (SPR.Flavor) {
2988  case SPF_UMAX: Opc = ISD::UMAX; break;
2989  case SPF_UMIN: Opc = ISD::UMIN; break;
2990  case SPF_SMAX: Opc = ISD::SMAX; break;
2991  case SPF_SMIN: Opc = ISD::SMIN; break;
2992  case SPF_FMINNUM:
2993  switch (SPR.NaNBehavior) {
2994  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
2995  case SPNB_RETURNS_NAN: Opc = ISD::FMINIMUM; break;
2996  case SPNB_RETURNS_OTHER: Opc = ISD::FMINNUM; break;
2997  case SPNB_RETURNS_ANY: {
2998  if (TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT))
2999  Opc = ISD::FMINNUM;
3000  else if (TLI.isOperationLegalOrCustom(ISD::FMINIMUM, VT))
3001  Opc = ISD::FMINIMUM;
3002  else if (UseScalarMinMax)
3003  Opc = TLI.isOperationLegalOrCustom(ISD::FMINNUM, VT.getScalarType()) ?
3005  break;
3006  }
3007  }
3008  break;
3009  case SPF_FMAXNUM:
3010  switch (SPR.NaNBehavior) {
3011  case SPNB_NA: llvm_unreachable("No NaN behavior for FP op?");
3012  case SPNB_RETURNS_NAN: Opc = ISD::FMAXIMUM; break;
3013  case SPNB_RETURNS_OTHER: Opc = ISD::FMAXNUM; break;
3014  case SPNB_RETURNS_ANY:
3015 
3016  if (TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT))
3017  Opc = ISD::FMAXNUM;
3018  else if (TLI.isOperationLegalOrCustom(ISD::FMAXIMUM, VT))
3019  Opc = ISD::FMAXIMUM;
3020  else if (UseScalarMinMax)
3021  Opc = TLI.isOperationLegalOrCustom(ISD::FMAXNUM, VT.getScalarType()) ?
3023  break;
3024  }
3025  break;
3026  default: break;
3027  }
3028 
3029  if (Opc != ISD::DELETED_NODE &&
3030  (TLI.isOperationLegalOrCustom(Opc, VT) ||
3031  (UseScalarMinMax &&
3032  TLI.isOperationLegalOrCustom(Opc, VT.getScalarType()))) &&
3033  // If the underlying comparison instruction is used by any other
3034  // instruction, the consumed instructions won't be destroyed, so it is
3035  // not profitable to convert to a min/max.
3036  hasOnlySelectUsers(cast<SelectInst>(I).getCondition())) {
3037  OpCode = Opc;
3038  LHSVal = getValue(LHS);
3039  RHSVal = getValue(RHS);
3040  BaseOps = {};
3041  }
3042  }
3043 
3044  for (unsigned i = 0; i != NumValues; ++i) {
3045  SmallVector<SDValue, 3> Ops(BaseOps.begin(), BaseOps.end());
3046  Ops.push_back(SDValue(LHSVal.getNode(), LHSVal.getResNo() + i));
3047  Ops.push_back(SDValue(RHSVal.getNode(), RHSVal.getResNo() + i));
3048  Values[i] = DAG.getNode(OpCode, getCurSDLoc(),
3049  LHSVal.getNode()->getValueType(LHSVal.getResNo()+i),
3050  Ops);
3051  }
3052 
3053  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3054  DAG.getVTList(ValueVTs), Values));
3055 }
3056 
3057 void SelectionDAGBuilder::visitTrunc(const User &I) {
3058  // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest).
3059  SDValue N = getValue(I.getOperand(0));
3060  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3061  I.getType());
3062  setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N));
3063 }
3064 
3065 void SelectionDAGBuilder::visitZExt(const User &I) {
3066  // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3067  // ZExt also can't be a cast to bool for same reason. So, nothing much to do
3068  SDValue N = getValue(I.getOperand(0));
3069  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3070  I.getType());
3071  setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N));
3072 }
3073 
3074 void SelectionDAGBuilder::visitSExt(const User &I) {
3075  // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest).
3076  // SExt also can't be a cast to bool for same reason. So, nothing much to do
3077  SDValue N = getValue(I.getOperand(0));
3078  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3079  I.getType());
3080  setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N));
3081 }
3082 
3083 void SelectionDAGBuilder::visitFPTrunc(const User &I) {
3084  // FPTrunc is never a no-op cast, no need to check
3085  SDValue N = getValue(I.getOperand(0));
3086  SDLoc dl = getCurSDLoc();
3087  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3088  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3089  setValue(&I, DAG.getNode(ISD::FP_ROUND, dl, DestVT, N,
3090  DAG.getTargetConstant(
3091  0, dl, TLI.getPointerTy(DAG.getDataLayout()))));
3092 }
3093 
3094 void SelectionDAGBuilder::visitFPExt(const User &I) {
3095  // FPExt is never a no-op cast, no need to check
3096  SDValue N = getValue(I.getOperand(0));
3097  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3098  I.getType());
3099  setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N));
3100 }
3101 
3102 void SelectionDAGBuilder::visitFPToUI(const User &I) {
3103  // FPToUI is never a no-op cast, no need to check
3104  SDValue N = getValue(I.getOperand(0));
3105  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3106  I.getType());
3107  setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N));
3108 }
3109 
3110 void SelectionDAGBuilder::visitFPToSI(const User &I) {
3111  // FPToSI is never a no-op cast, no need to check
3112  SDValue N = getValue(I.getOperand(0));
3113  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3114  I.getType());
3115  setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N));
3116 }
3117 
3118 void SelectionDAGBuilder::visitUIToFP(const User &I) {
3119  // UIToFP is never a no-op cast, no need to check
3120  SDValue N = getValue(I.getOperand(0));
3121  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3122  I.getType());
3123  setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N));
3124 }
3125 
3126 void SelectionDAGBuilder::visitSIToFP(const User &I) {
3127  // SIToFP is never a no-op cast, no need to check
3128  SDValue N = getValue(I.getOperand(0));
3129  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3130  I.getType());
3131  setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N));
3132 }
3133 
3134 void SelectionDAGBuilder::visitPtrToInt(const User &I) {
3135  // What to do depends on the size of the integer and the size of the pointer.
3136  // We can either truncate, zero extend, or no-op, accordingly.
3137  SDValue N = getValue(I.getOperand(0));
3138  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3139  I.getType());
3140  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3141 }
3142 
3143 void SelectionDAGBuilder::visitIntToPtr(const User &I) {
3144  // What to do depends on the size of the integer and the size of the pointer.
3145  // We can either truncate, zero extend, or no-op, accordingly.
3146  SDValue N = getValue(I.getOperand(0));
3147  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3148  I.getType());
3149  setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT));
3150 }
3151 
3152 void SelectionDAGBuilder::visitBitCast(const User &I) {
3153  SDValue N = getValue(I.getOperand(0));
3154  SDLoc dl = getCurSDLoc();
3155  EVT DestVT = DAG.getTargetLoweringInfo().getValueType(DAG.getDataLayout(),
3156  I.getType());
3157 
3158  // BitCast assures us that source and destination are the same size so this is
3159  // either a BITCAST or a no-op.
3160  if (DestVT != N.getValueType())
3161  setValue(&I, DAG.getNode(ISD::BITCAST, dl,
3162  DestVT, N)); // convert types.
3163  // Check if the original LLVM IR Operand was a ConstantInt, because getValue()
3164  // might fold any kind of constant expression to an integer constant and that
3165  // is not what we are looking for. Only recognize a bitcast of a genuine
3166  // constant integer as an opaque constant.
3167  else if(ConstantInt *C = dyn_cast<ConstantInt>(I.getOperand(0)))
3168  setValue(&I, DAG.getConstant(C->getValue(), dl, DestVT, /*isTarget=*/false,
3169  /*isOpaque*/true));
3170  else
3171  setValue(&I, N); // noop cast.
3172 }
3173 
3174 void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) {
3175  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3176  const Value *SV = I.getOperand(0);
3177  SDValue N = getValue(SV);
3178  EVT DestVT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3179 
3180  unsigned SrcAS = SV->getType()->getPointerAddressSpace();
3181  unsigned DestAS = I.getType()->getPointerAddressSpace();
3182 
3183  if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS))
3184  N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS);
3185 
3186  setValue(&I, N);
3187 }
3188 
3189 void SelectionDAGBuilder::visitInsertElement(const User &I) {
3190  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3191  SDValue InVec = getValue(I.getOperand(0));
3192  SDValue InVal = getValue(I.getOperand(1));
3193  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), getCurSDLoc(),
3194  TLI.getVectorIdxTy(DAG.getDataLayout()));
3195  setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(),
3196  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3197  InVec, InVal, InIdx));
3198 }
3199 
3200 void SelectionDAGBuilder::visitExtractElement(const User &I) {
3201  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3202  SDValue InVec = getValue(I.getOperand(0));
3203  SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), getCurSDLoc(),
3204  TLI.getVectorIdxTy(DAG.getDataLayout()));
3205  setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(),
3206  TLI.getValueType(DAG.getDataLayout(), I.getType()),
3207  InVec, InIdx));
3208 }
3209 
3210 void SelectionDAGBuilder::visitShuffleVector(const User &I) {
3211  SDValue Src1 = getValue(I.getOperand(0));
3212  SDValue Src2 = getValue(I.getOperand(1));
3213  SDLoc DL = getCurSDLoc();
3214 
3216  ShuffleVectorInst::getShuffleMask(cast<Constant>(I.getOperand(2)), Mask);
3217  unsigned MaskNumElts = Mask.size();
3218 
3219  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3220  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
3221  EVT SrcVT = Src1.getValueType();
3222  unsigned SrcNumElts = SrcVT.getVectorNumElements();
3223 
3224  if (SrcNumElts == MaskNumElts) {
3225  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, Mask));
3226  return;
3227  }
3228 
3229  // Normalize the shuffle vector since mask and vector length don't match.
3230  if (SrcNumElts < MaskNumElts) {
3231  // Mask is longer than the source vectors. We can use concatenate vector to
3232  // make the mask and vectors lengths match.
3233 
3234  if (MaskNumElts % SrcNumElts == 0) {
3235  // Mask length is a multiple of the source vector length.
3236  // Check if the shuffle is some kind of concatenation of the input
3237  // vectors.
3238  unsigned NumConcat = MaskNumElts / SrcNumElts;
3239  bool IsConcat = true;
3240  SmallVector<int, 8> ConcatSrcs(NumConcat, -1);
3241  for (unsigned i = 0; i != MaskNumElts; ++i) {
3242  int Idx = Mask[i];
3243  if (Idx < 0)
3244  continue;
3245  // Ensure the indices in each SrcVT sized piece are sequential and that
3246  // the same source is used for the whole piece.
3247  if ((Idx % SrcNumElts != (i % SrcNumElts)) ||
3248  (ConcatSrcs[i / SrcNumElts] >= 0 &&
3249  ConcatSrcs[i / SrcNumElts] != (int)(Idx / SrcNumElts))) {
3250  IsConcat = false;
3251  break;
3252  }
3253  // Remember which source this index came from.
3254  ConcatSrcs[i / SrcNumElts] = Idx / SrcNumElts;
3255  }
3256 
3257  // The shuffle is concatenating multiple vectors together. Just emit
3258  // a CONCAT_VECTORS operation.
3259  if (IsConcat) {
3260  SmallVector<SDValue, 8> ConcatOps;
3261  for (auto Src : ConcatSrcs) {
3262  if (Src < 0)
3263  ConcatOps.push_back(DAG.getUNDEF(SrcVT));
3264  else if (Src == 0)
3265  ConcatOps.push_back(Src1);
3266  else
3267  ConcatOps.push_back(Src2);
3268  }
3269  setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, ConcatOps));
3270  return;
3271  }
3272  }
3273 
3274  unsigned PaddedMaskNumElts = alignTo(MaskNumElts, SrcNumElts);
3275  unsigned NumConcat = PaddedMaskNumElts / SrcNumElts;
3276  EVT PaddedVT = EVT::getVectorVT(*DAG.getContext(), VT.getScalarType(),
3277  PaddedMaskNumElts);
3278 
3279  // Pad both vectors with undefs to make them the same length as the mask.
3280  SDValue UndefVal = DAG.getUNDEF(SrcVT);
3281 
3282  SmallVector<SDValue, 8> MOps1(NumConcat, UndefVal);
3283  SmallVector<SDValue, 8> MOps2(NumConcat, UndefVal);
3284  MOps1[0] = Src1;
3285  MOps2[0] = Src2;
3286 
3287  Src1 = Src1.isUndef()
3288  ? DAG.getUNDEF(PaddedVT)
3289  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps1);
3290  Src2 = Src2.isUndef()
3291  ? DAG.getUNDEF(PaddedVT)
3292  : DAG.getNode(ISD::CONCAT_VECTORS, DL, PaddedVT, MOps2);
3293 
3294  // Readjust mask for new input vector length.
3295  SmallVector<int, 8> MappedOps(PaddedMaskNumElts, -1);
3296  for (unsigned i = 0; i != MaskNumElts; ++i) {
3297  int Idx = Mask[i];
3298  if (Idx >= (int)SrcNumElts)
3299  Idx -= SrcNumElts - PaddedMaskNumElts;
3300  MappedOps[i] = Idx;
3301  }
3302 
3303  SDValue Result = DAG.getVectorShuffle(PaddedVT, DL, Src1, Src2, MappedOps);
3304 
3305  // If the concatenated vector was padded, extract a subvector with the
3306  // correct number of elements.
3307  if (MaskNumElts != PaddedMaskNumElts)
3308  Result = DAG.getNode(
3309  ISD::EXTRACT_SUBVECTOR, DL, VT, Result,
3310  DAG.getConstant(0, DL, TLI.getVectorIdxTy(DAG.getDataLayout())));
3311 
3312  setValue(&I, Result);
3313  return;
3314  }
3315 
3316  if (SrcNumElts > MaskNumElts) {
3317  // Analyze the access pattern of the vector to see if we can extract
3318  // two subvectors and do the shuffle.
3319  int StartIdx[2] = { -1, -1 }; // StartIdx to extract from
3320  bool CanExtract = true;
3321  for (int Idx : Mask) {
3322  unsigned Input = 0;
3323  if (Idx < 0)
3324  continue;
3325 
3326  if (Idx >= (int)SrcNumElts) {
3327  Input = 1;
3328  Idx -= SrcNumElts;
3329  }
3330 
3331  // If all the indices come from the same MaskNumElts sized portion of
3332  // the sources we can use extract. Also make sure the extract wouldn't
3333  // extract past the end of the source.
3334  int NewStartIdx = alignDown(Idx, MaskNumElts);
3335  if (NewStartIdx + MaskNumElts > SrcNumElts ||
3336  (StartIdx[Input] >= 0 && StartIdx[Input] != NewStartIdx))
3337  CanExtract = false;
3338  // Make sure we always update StartIdx as we use it to track if all
3339  // elements are undef.
3340  StartIdx[Input] = NewStartIdx;
3341  }
3342 
3343  if (StartIdx[0] < 0 && StartIdx[1] < 0) {
3344  setValue(&I, DAG.getUNDEF(VT)); // Vectors are not used.
3345  return;
3346  }
3347  if (CanExtract) {
3348  // Extract appropriate subvector and generate a vector shuffle
3349  for (unsigned Input = 0; Input < 2; ++Input) {
3350  SDValue &Src = Input == 0 ? Src1 : Src2;
3351  if (StartIdx[Input] < 0)
3352  Src = DAG.getUNDEF(VT);
3353  else {
3354  Src = DAG.getNode(
3355  ISD::EXTRACT_SUBVECTOR, DL, VT, Src,
3356  DAG.getConstant(StartIdx[Input], DL,
3357  TLI.getVectorIdxTy(DAG.getDataLayout())));
3358  }
3359  }
3360 
3361  // Calculate new mask.
3362  SmallVector<int, 8> MappedOps(Mask.begin(), Mask.end());
3363  for (int &Idx : MappedOps) {
3364  if (Idx >= (int)SrcNumElts)
3365  Idx -= SrcNumElts + StartIdx[1] - MaskNumElts;
3366  else if (Idx >= 0)
3367  Idx -= StartIdx[0];
3368  }
3369 
3370  setValue(&I, DAG.getVectorShuffle(VT, DL, Src1, Src2, MappedOps));
3371  return;
3372  }
3373  }
3374 
3375  // We can't use either concat vectors or extract subvectors so fall back to
3376  // replacing the shuffle with extract and build vector.
3377  // to insert and build vector.
3378  EVT EltVT = VT.getVectorElementType();
3379  EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
3381  for (int Idx : Mask) {
3382  SDValue Res;
3383 
3384  if (Idx < 0) {
3385  Res = DAG.getUNDEF(EltVT);
3386  } else {
3387  SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2;
3388  if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts;
3389 
3390  Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL,
3391  EltVT, Src, DAG.getConstant(Idx, DL, IdxVT));
3392  }
3393 
3394  Ops.push_back(Res);
3395  }
3396 
3397  setValue(&I, DAG.getBuildVector(VT, DL, Ops));
3398 }
3399 
3400 void SelectionDAGBuilder::visitInsertValue(const User &I) {
3401  ArrayRef<unsigned> Indices;
3402  if (const InsertValueInst *IV = dyn_cast<InsertValueInst>(&I))
3403  Indices = IV->getIndices();
3404  else
3405  Indices = cast<ConstantExpr>(&I)->getIndices();
3406 
3407  const Value *Op0 = I.getOperand(0);
3408  const Value *Op1 = I.getOperand(1);
3409  Type *AggTy = I.getType();
3410  Type *ValTy = Op1->getType();
3411  bool IntoUndef = isa<UndefValue>(Op0);
3412  bool FromUndef = isa<UndefValue>(Op1);
3413 
3414  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3415 
3416  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3417  SmallVector<EVT, 4> AggValueVTs;
3418  ComputeValueVTs(TLI, DAG.getDataLayout(), AggTy, AggValueVTs);
3419  SmallVector<EVT, 4> ValValueVTs;
3420  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3421 
3422  unsigned NumAggValues = AggValueVTs.size();
3423  unsigned NumValValues = ValValueVTs.size();
3424  SmallVector<SDValue, 4> Values(NumAggValues);
3425 
3426  // Ignore an insertvalue that produces an empty object
3427  if (!NumAggValues) {
3428  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3429  return;
3430  }
3431 
3432  SDValue Agg = getValue(Op0);
3433  unsigned i = 0;
3434  // Copy the beginning value(s) from the original aggregate.
3435  for (; i != LinearIndex; ++i)
3436  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3437  SDValue(Agg.getNode(), Agg.getResNo() + i);
3438  // Copy values from the inserted value(s).
3439  if (NumValValues) {
3440  SDValue Val = getValue(Op1);
3441  for (; i != LinearIndex + NumValValues; ++i)
3442  Values[i] = FromUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3443  SDValue(Val.getNode(), Val.getResNo() + i - LinearIndex);
3444  }
3445  // Copy remaining value(s) from the original aggregate.
3446  for (; i != NumAggValues; ++i)
3447  Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) :
3448  SDValue(Agg.getNode(), Agg.getResNo() + i);
3449 
3450  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3451  DAG.getVTList(AggValueVTs), Values));
3452 }
3453 
3454 void SelectionDAGBuilder::visitExtractValue(const User &I) {
3455  ArrayRef<unsigned> Indices;
3456  if (const ExtractValueInst *EV = dyn_cast<ExtractValueInst>(&I))
3457  Indices = EV->getIndices();
3458  else
3459  Indices = cast<ConstantExpr>(&I)->getIndices();
3460 
3461  const Value *Op0 = I.getOperand(0);
3462  Type *AggTy = Op0->getType();
3463  Type *ValTy = I.getType();
3464  bool OutOfUndef = isa<UndefValue>(Op0);
3465 
3466  unsigned LinearIndex = ComputeLinearIndex(AggTy, Indices);
3467 
3468  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3469  SmallVector<EVT, 4> ValValueVTs;
3470  ComputeValueVTs(TLI, DAG.getDataLayout(), ValTy, ValValueVTs);
3471 
3472  unsigned NumValValues = ValValueVTs.size();
3473 
3474  // Ignore a extractvalue that produces an empty object
3475  if (!NumValValues) {
3476  setValue(&I, DAG.getUNDEF(MVT(MVT::Other)));
3477  return;
3478  }
3479 
3480  SmallVector<SDValue, 4> Values(NumValValues);
3481 
3482  SDValue Agg = getValue(Op0);
3483  // Copy out the selected value(s).
3484  for (unsigned i = LinearIndex; i != LinearIndex + NumValValues; ++i)
3485  Values[i - LinearIndex] =
3486  OutOfUndef ?
3487  DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) :
3488  SDValue(Agg.getNode(), Agg.getResNo() + i);
3489 
3490  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(),
3491  DAG.getVTList(ValValueVTs), Values));
3492 }
3493 
3494 void SelectionDAGBuilder::visitGetElementPtr(const User &I) {
3495  Value *Op0 = I.getOperand(0);
3496  // Note that the pointer operand may be a vector of pointers. Take the scalar
3497  // element which holds a pointer.
3498  unsigned AS = Op0->getType()->getScalarType()->getPointerAddressSpace();
3499  SDValue N = getValue(Op0);
3500  SDLoc dl = getCurSDLoc();
3501 
3502  // Normalize Vector GEP - all scalar operands should be converted to the
3503  // splat vector.
3504  unsigned VectorWidth = I.getType()->isVectorTy() ?
3505  cast<VectorType>(I.getType())->getVectorNumElements() : 0;
3506 
3507  if (VectorWidth && !N.getValueType().isVector()) {
3508  LLVMContext &Context = *DAG.getContext();
3509  EVT VT = EVT::getVectorVT(Context, N.getValueType(), VectorWidth);
3510  N = DAG.getSplatBuildVector(VT, dl, N);
3511  }
3512 
3513  for (gep_type_iterator GTI = gep_type_begin(&I), E = gep_type_end(&I);
3514  GTI != E; ++GTI) {
3515  const Value *Idx = GTI.getOperand();
3516  if (StructType *StTy = GTI.getStructTypeOrNull()) {
3517  unsigned Field = cast<Constant>(Idx)->getUniqueInteger().getZExtValue();
3518  if (Field) {
3519  // N = N + Offset
3520  uint64_t Offset = DL->getStructLayout(StTy)->getElementOffset(Field);
3521 
3522  // In an inbounds GEP with an offset that is nonnegative even when
3523  // interpreted as signed, assume there is no unsigned overflow.
3524  SDNodeFlags Flags;
3525  if (int64_t(Offset) >= 0 && cast<GEPOperator>(I).isInBounds())
3526  Flags.setNoUnsignedWrap(true);
3527 
3528  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N,
3529  DAG.getConstant(Offset, dl, N.getValueType()), Flags);
3530  }
3531  } else {
3532  unsigned IdxSize = DAG.getDataLayout().getIndexSizeInBits(AS);
3533  MVT IdxTy = MVT::getIntegerVT(IdxSize);
3534  APInt ElementSize(IdxSize, DL->getTypeAllocSize(GTI.getIndexedType()));
3535 
3536  // If this is a scalar constant or a splat vector of constants,
3537  // handle it quickly.
3538  const auto *CI = dyn_cast<ConstantInt>(Idx);
3539  if (!CI && isa<ConstantDataVector>(Idx) &&
3540  cast<ConstantDataVector>(Idx)->getSplatValue())
3541  CI = cast<ConstantInt>(cast<ConstantDataVector>(Idx)->getSplatValue());
3542 
3543  if (CI) {
3544  if (CI->isZero())
3545  continue;
3546  APInt Offs = ElementSize * CI->getValue().sextOrTrunc(IdxSize);
3547  LLVMContext &Context = *DAG.getContext();
3548  SDValue OffsVal = VectorWidth ?
3549  DAG.getConstant(Offs, dl, EVT::getVectorVT(Context, IdxTy, VectorWidth)) :
3550  DAG.getConstant(Offs, dl, IdxTy);
3551 
3552  // In an inbouds GEP with an offset that is nonnegative even when
3553  // interpreted as signed, assume there is no unsigned overflow.
3554  SDNodeFlags Flags;
3555  if (Offs.isNonNegative() && cast<GEPOperator>(I).isInBounds())
3556  Flags.setNoUnsignedWrap(true);
3557 
3558  N = DAG.getNode(ISD::ADD, dl, N.getValueType(), N, OffsVal, Flags);
3559  continue;
3560  }
3561 
3562  // N = N + Idx * ElementSize;
3563  SDValue IdxN = getValue(Idx);
3564 
3565  if (!IdxN.getValueType().isVector() && VectorWidth) {
3566  EVT VT = EVT::getVectorVT(*Context, IdxN.getValueType(), VectorWidth);
3567  IdxN = DAG.getSplatBuildVector(VT, dl, IdxN);
3568  }
3569 
3570  // If the index is smaller or larger than intptr_t, truncate or extend
3571  // it.
3572  IdxN = DAG.getSExtOrTrunc(IdxN, dl, N.getValueType());
3573 
3574  // If this is a multiply by a power of two, turn it into a shl
3575  // immediately. This is a very common case.
3576  if (ElementSize != 1) {
3577  if (ElementSize.isPowerOf2()) {
3578  unsigned Amt = ElementSize.logBase2();
3579  IdxN = DAG.getNode(ISD::SHL, dl,
3580  N.getValueType(), IdxN,
3581  DAG.getConstant(Amt, dl, IdxN.getValueType()));
3582  } else {
3583  SDValue Scale = DAG.getConstant(ElementSize, dl, IdxN.getValueType());
3584  IdxN = DAG.getNode(ISD::MUL, dl,
3585  N.getValueType(), IdxN, Scale);
3586  }
3587  }
3588 
3589  N = DAG.getNode(ISD::ADD, dl,
3590  N.getValueType(), N, IdxN);
3591  }
3592  }
3593 
3594  setValue(&I, N);
3595 }
3596 
3597 void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) {
3598  // If this is a fixed sized alloca in the entry block of the function,
3599  // allocate it statically on the stack.
3600  if (FuncInfo.StaticAllocaMap.count(&I))
3601  return; // getValue will auto-populate this.
3602 
3603  SDLoc dl = getCurSDLoc();
3604  Type *Ty = I.getAllocatedType();
3605  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3606  auto &DL = DAG.getDataLayout();
3607  uint64_t TySize = DL.getTypeAllocSize(Ty);
3608  unsigned Align =
3609  std::max((unsigned)DL.getPrefTypeAlignment(Ty), I.getAlignment());
3610 
3611  SDValue AllocSize = getValue(I.getArraySize());
3612 
3613  EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout(), DL.getAllocaAddrSpace());
3614  if (AllocSize.getValueType() != IntPtr)
3615  AllocSize = DAG.getZExtOrTrunc(AllocSize, dl, IntPtr);
3616 
3617  AllocSize = DAG.getNode(ISD::MUL, dl, IntPtr,
3618  AllocSize,
3619  DAG.getConstant(TySize, dl, IntPtr));
3620 
3621  // Handle alignment. If the requested alignment is less than or equal to
3622  // the stack alignment, ignore it. If the size is greater than or equal to
3623  // the stack alignment, we note this in the DYNAMIC_STACKALLOC node.
3624  unsigned StackAlign =
3626  if (Align <= StackAlign)
3627  Align = 0;
3628 
3629  // Round the size of the allocation up to the stack alignment size
3630  // by add SA-1 to the size. This doesn't overflow because we're computing
3631  // an address inside an alloca.
3632  SDNodeFlags Flags;
3633  Flags.setNoUnsignedWrap(true);
3634  AllocSize = DAG.getNode(ISD::ADD, dl, AllocSize.getValueType(), AllocSize,
3635  DAG.getConstant(StackAlign - 1, dl, IntPtr), Flags);
3636 
3637  // Mask out the low bits for alignment purposes.
3638  AllocSize =
3639  DAG.getNode(ISD::AND, dl, AllocSize.getValueType(), AllocSize,
3640  DAG.getConstant(~(uint64_t)(StackAlign - 1), dl, IntPtr));
3641 
3642  SDValue Ops[] = {getRoot(), AllocSize, DAG.getConstant(Align, dl, IntPtr)};
3643  SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other);
3644  SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, dl, VTs, Ops);
3645  setValue(&I, DSA);
3646  DAG.setRoot(DSA.getValue(1));
3647 
3648  assert(FuncInfo.MF->getFrameInfo().hasVarSizedObjects());
3649 }
3650 
3651 void SelectionDAGBuilder::visitLoad(const LoadInst &I) {
3652  if (I.isAtomic())
3653  return visitAtomicLoad(I);
3654 
3655  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3656  const Value *SV = I.getOperand(0);
3657  if (TLI.supportSwiftError()) {
3658  // Swifterror values can come from either a function parameter with
3659  // swifterror attribute or an alloca with swifterror attribute.
3660  if (const Argument *Arg = dyn_cast<Argument>(SV)) {
3661  if (Arg->hasSwiftErrorAttr())
3662  return visitLoadFromSwiftError(I);
3663  }
3664 
3665  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(SV)) {
3666  if (Alloca->isSwiftError())
3667  return visitLoadFromSwiftError(I);
3668  }
3669  }
3670 
3671  SDValue Ptr = getValue(SV);
3672 
3673  Type *Ty = I.getType();
3674 
3675  bool isVolatile = I.isVolatile();
3676  bool isNonTemporal = I.getMetadata(LLVMContext::MD_nontemporal) != nullptr;
3677  bool isInvariant = I.getMetadata(LLVMContext::MD_invariant_load) != nullptr;
3678  bool isDereferenceable = isDereferenceablePointer(SV, DAG.getDataLayout());
3679  unsigned Alignment = I.getAlignment();
3680 
3681  AAMDNodes AAInfo;
3682  I.getAAMetadata(AAInfo);
3683  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
3684 
3687  ComputeValueVTs(TLI, DAG.getDataLayout(), Ty, ValueVTs, &Offsets);
3688  unsigned NumValues = ValueVTs.size();
3689  if (NumValues == 0)
3690  return;
3691 
3692  SDValue Root;
3693  bool ConstantMemory = false;
3694  if (isVolatile || NumValues > MaxParallelChains)
3695  // Serialize volatile loads with other side effects.
3696  Root = getRoot();
3697  else if (AA && AA->pointsToConstantMemory(MemoryLocation(
3698  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) {
3699  // Do not serialize (non-volatile) loads of constant memory with anything.
3700  Root = DAG.getEntryNode();
3701  ConstantMemory = true;
3702  } else {
3703  // Do not serialize non-volatile loads against each other.
3704  Root = DAG.getRoot();
3705  }
3706 
3707  SDLoc dl = getCurSDLoc();
3708 
3709  if (isVolatile)
3710  Root = TLI.prepareVolatileOrAtomicLoad(Root, dl, DAG);
3711 
3712  // An aggregate load cannot wrap around the address space, so offsets to its
3713  // parts don't wrap either.
3714  SDNodeFlags Flags;
3715  Flags.setNoUnsignedWrap(true);
3716 
3717  SmallVector<SDValue, 4> Values(NumValues);
3718  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3719  EVT PtrVT = Ptr.getValueType();
3720  unsigned ChainI = 0;
3721  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3722  // Serializing loads here may result in excessive register pressure, and
3723  // TokenFactor places arbitrary choke points on the scheduler. SD scheduling
3724  // could recover a bit by hoisting nodes upward in the chain by recognizing
3725  // they are side-effect free or do not alias. The optimizer should really
3726  // avoid this case by converting large object/array copies to llvm.memcpy
3727  // (MaxParallelChains should always remain as failsafe).
3728  if (ChainI == MaxParallelChains) {
3729  assert(PendingLoads.empty() && "PendingLoads must be serialized first");
3730  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3731  makeArrayRef(Chains.data(), ChainI));
3732  Root = Chain;
3733  ChainI = 0;
3734  }
3735  SDValue A = DAG.getNode(ISD::ADD, dl,
3736  PtrVT, Ptr,
3737  DAG.getConstant(Offsets[i], dl, PtrVT),
3738  Flags);
3739  auto MMOFlags = MachineMemOperand::MONone;
3740  if (isVolatile)
3741  MMOFlags |= MachineMemOperand::MOVolatile;
3742  if (isNonTemporal)
3744  if (isInvariant)
3745  MMOFlags |= MachineMemOperand::MOInvariant;
3746  if (isDereferenceable)
3748  MMOFlags |= TLI.getMMOFlags(I);
3749 
3750  SDValue L = DAG.getLoad(ValueVTs[i], dl, Root, A,
3751  MachinePointerInfo(SV, Offsets[i]), Alignment,
3752  MMOFlags, AAInfo, Ranges);
3753 
3754  Values[i] = L;
3755  Chains[ChainI] = L.getValue(1);
3756  }
3757 
3758  if (!ConstantMemory) {
3759  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3760  makeArrayRef(Chains.data(), ChainI));
3761  if (isVolatile)
3762  DAG.setRoot(Chain);
3763  else
3764  PendingLoads.push_back(Chain);
3765  }
3766 
3767  setValue(&I, DAG.getNode(ISD::MERGE_VALUES, dl,
3768  DAG.getVTList(ValueVTs), Values));
3769 }
3770 
3771 void SelectionDAGBuilder::visitStoreToSwiftError(const StoreInst &I) {
3773  "call visitStoreToSwiftError when backend supports swifterror");
3774 
3777  const Value *SrcV = I.getOperand(0);
3779  SrcV->getType(), ValueVTs, &Offsets);
3780  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3781  "expect a single EVT for swifterror");
3782 
3783  SDValue Src = getValue(SrcV);
3784  // Create a virtual register, then update the virtual register.
3785  unsigned VReg; bool CreatedVReg;
3786  std::tie(VReg, CreatedVReg) = FuncInfo.getOrCreateSwiftErrorVRegDefAt(&I);
3787  // Chain, DL, Reg, N or Chain, DL, Reg, N, Glue
3788  // Chain can be getRoot or getControlRoot.
3789  SDValue CopyNode = DAG.getCopyToReg(getRoot(), getCurSDLoc(), VReg,
3790  SDValue(Src.getNode(), Src.getResNo()));
3791  DAG.setRoot(CopyNode);
3792  if (CreatedVReg)
3793  FuncInfo.setCurrentSwiftErrorVReg(FuncInfo.MBB, I.getOperand(1), VReg);
3794 }
3795 
3796 void SelectionDAGBuilder::visitLoadFromSwiftError(const LoadInst &I) {
3798  "call visitLoadFromSwiftError when backend supports swifterror");
3799 
3800  assert(!I.isVolatile() &&
3801  I.getMetadata(LLVMContext::MD_nontemporal) == nullptr &&
3803  "Support volatile, non temporal, invariant for load_from_swift_error");
3804 
3805  const Value *SV = I.getOperand(0);
3806  Type *Ty = I.getType();
3807  AAMDNodes AAInfo;
3808  I.getAAMetadata(AAInfo);
3809  assert((!AA || !AA->pointsToConstantMemory(MemoryLocation(
3810  SV, DAG.getDataLayout().getTypeStoreSize(Ty), AAInfo))) &&
3811  "load_from_swift_error should not be constant memory");
3812 
3816  ValueVTs, &Offsets);
3817  assert(ValueVTs.size() == 1 && Offsets[0] == 0 &&
3818  "expect a single EVT for swifterror");
3819 
3820  // Chain, DL, Reg, VT, Glue or Chain, DL, Reg, VT
3821  SDValue L = DAG.getCopyFromReg(
3822  getRoot(), getCurSDLoc(),
3823  FuncInfo.getOrCreateSwiftErrorVRegUseAt(&I, FuncInfo.MBB, SV).first,
3824  ValueVTs[0]);
3825 
3826  setValue(&I, L);
3827 }
3828 
3829 void SelectionDAGBuilder::visitStore(const StoreInst &I) {
3830  if (I.isAtomic())
3831  return visitAtomicStore(I);
3832 
3833  const Value *SrcV = I.getOperand(0);
3834  const Value *PtrV = I.getOperand(1);
3835 
3836  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
3837  if (TLI.supportSwiftError()) {
3838  // Swifterror values can come from either a function parameter with
3839  // swifterror attribute or an alloca with swifterror attribute.
3840  if (const Argument *Arg = dyn_cast<Argument>(PtrV)) {
3841  if (Arg->hasSwiftErrorAttr())
3842  return visitStoreToSwiftError(I);
3843  }
3844 
3845  if (const AllocaInst *Alloca = dyn_cast<AllocaInst>(PtrV)) {
3846  if (Alloca->isSwiftError())
3847  return visitStoreToSwiftError(I);
3848  }
3849  }
3850 
3854  SrcV->getType(), ValueVTs, &Offsets);
3855  unsigned NumValues = ValueVTs.size();
3856  if (NumValues == 0)
3857  return;
3858 
3859  // Get the lowered operands. Note that we do this after
3860  // checking if NumResults is zero, because with zero results
3861  // the operands won't have values in the map.
3862  SDValue Src = getValue(SrcV);
3863  SDValue Ptr = getValue(PtrV);
3864 
3865  SDValue Root = getRoot();
3866  SmallVector<SDValue, 4> Chains(std::min(MaxParallelChains, NumValues));
3867  SDLoc dl = getCurSDLoc();
3868  EVT PtrVT = Ptr.getValueType();
3869  unsigned Alignment = I.getAlignment();
3870  AAMDNodes AAInfo;
3871  I.getAAMetadata(AAInfo);
3872 
3873  auto MMOFlags = MachineMemOperand::MONone;
3874  if (I.isVolatile())
3875  MMOFlags |= MachineMemOperand::MOVolatile;
3876  if (I.getMetadata(LLVMContext::MD_nontemporal) != nullptr)
3878  MMOFlags |= TLI.getMMOFlags(I);
3879 
3880  // An aggregate load cannot wrap around the address space, so offsets to its
3881  // parts don't wrap either.
3882  SDNodeFlags Flags;
3883  Flags.setNoUnsignedWrap(true);
3884 
3885  unsigned ChainI = 0;
3886  for (unsigned i = 0; i != NumValues; ++i, ++ChainI) {
3887  // See visitLoad comments.
3888  if (ChainI == MaxParallelChains) {
3889  SDValue Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3890  makeArrayRef(Chains.data(), ChainI));
3891  Root = Chain;
3892  ChainI = 0;
3893  }
3894  SDValue Add = DAG.getNode(ISD::ADD, dl, PtrVT, Ptr,
3895  DAG.getConstant(Offsets[i], dl, PtrVT), Flags);
3896  SDValue St = DAG.getStore(
3897  Root, dl, SDValue(Src.getNode(), Src.getResNo() + i), Add,
3898  MachinePointerInfo(PtrV, Offsets[i]), Alignment, MMOFlags, AAInfo);
3899  Chains[ChainI] = St;
3900  }
3901 
3902  SDValue StoreNode = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
3903  makeArrayRef(Chains.data(), ChainI));
3904  DAG.setRoot(StoreNode);
3905 }
3906 
3907 void SelectionDAGBuilder::visitMaskedStore(const CallInst &I,
3908  bool IsCompressing) {
3909  SDLoc sdl = getCurSDLoc();
3910 
3911  auto getMaskedStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3912  unsigned& Alignment) {
3913  // llvm.masked.store.*(Src0, Ptr, alignment, Mask)
3914  Src0 = I.getArgOperand(0);
3915  Ptr = I.getArgOperand(1);
3916  Alignment = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3917  Mask = I.getArgOperand(3);
3918  };
3919  auto getCompressingStoreOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
3920  unsigned& Alignment) {
3921  // llvm.masked.compressstore.*(Src0, Ptr, Mask)
3922  Src0 = I.getArgOperand(0);
3923  Ptr = I.getArgOperand(1);
3924  Mask = I.getArgOperand(2);
3925  Alignment = 0;
3926  };
3927 
3928  Value *PtrOperand, *MaskOperand, *Src0Operand;
3929  unsigned Alignment;
3930  if (IsCompressing)
3931  getCompressingStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3932  else
3933  getMaskedStoreOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
3934 
3935  SDValue Ptr = getValue(PtrOperand);
3936  SDValue Src0 = getValue(Src0Operand);
3937  SDValue Mask = getValue(MaskOperand);
3938 
3939  EVT VT = Src0.getValueType();
3940  if (!Alignment)
3941  Alignment = DAG.getEVTAlignment(VT);
3942 
3943  AAMDNodes AAInfo;
3944  I.getAAMetadata(AAInfo);
3945 
3946  MachineMemOperand *MMO =
3947  DAG.getMachineFunction().
3948  getMachineMemOperand(MachinePointerInfo(PtrOperand),
3950  Alignment, AAInfo);
3951  SDValue StoreNode = DAG.getMaskedStore(getRoot(), sdl, Src0, Ptr, Mask, VT,
3952  MMO, false /* Truncating */,
3953  IsCompressing);
3954  DAG.setRoot(StoreNode);
3955  setValue(&I, StoreNode);
3956 }
3957 
3958 // Get a uniform base for the Gather/Scatter intrinsic.
3959 // The first argument of the Gather/Scatter intrinsic is a vector of pointers.
3960 // We try to represent it as a base pointer + vector of indices.
3961 // Usually, the vector of pointers comes from a 'getelementptr' instruction.
3962 // The first operand of the GEP may be a single pointer or a vector of pointers
3963 // Example:
3964 // %gep.ptr = getelementptr i32, <8 x i32*> %vptr, <8 x i32> %ind
3965 // or
3966 // %gep.ptr = getelementptr i32, i32* %ptr, <8 x i32> %ind
3967 // %res = call <8 x i32> @llvm.masked.gather.v8i32(<8 x i32*> %gep.ptr, ..
3968 //
3969 // When the first GEP operand is a single pointer - it is the uniform base we
3970 // are looking for. If first operand of the GEP is a splat vector - we
3971 // extract the splat value and use it as a uniform base.
3972 // In all other cases the function returns 'false'.
3973 static bool getUniformBase(const Value* &Ptr, SDValue& Base, SDValue& Index,
3974  SDValue &Scale, SelectionDAGBuilder* SDB) {
3975  SelectionDAG& DAG = SDB->DAG;
3976  LLVMContext &Context = *DAG.getContext();
3977 
3978  assert(Ptr->getType()->isVectorTy() && "Uexpected pointer type");
3980  if (!GEP)
3981  return false;
3982 
3983  const Value *GEPPtr = GEP->getPointerOperand();
3984  if (!GEPPtr->getType()->isVectorTy())
3985  Ptr = GEPPtr;
3986  else if (!(Ptr = getSplatValue(GEPPtr)))
3987  return false;
3988 
3989  unsigned FinalIndex = GEP->getNumOperands() - 1;
3990  Value *IndexVal = GEP->getOperand(FinalIndex);
3991 
3992  // Ensure all the other indices are 0.
3993  for (unsigned i = 1; i < FinalIndex; ++i) {
3994  auto *C = dyn_cast<ConstantInt>(GEP->getOperand(i));
3995  if (!C || !C->isZero())
3996  return false;
3997  }
3998 
3999  // The operands of the GEP may be defined in another basic block.
4000  // In this case we'll not find nodes for the operands.
4001  if (!SDB->findValue(Ptr) || !SDB->findValue(IndexVal))
4002  return false;
4003 
4004  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4005  const DataLayout &DL = DAG.getDataLayout();
4006  Scale = DAG.getTargetConstant(DL.getTypeAllocSize(GEP->getResultElementType()),
4007  SDB->getCurSDLoc(), TLI.getPointerTy(DL));
4008  Base = SDB->getValue(Ptr);
4009  Index = SDB->getValue(IndexVal);
4010 
4011  if (!Index.getValueType().isVector()) {
4012  unsigned GEPWidth = GEP->getType()->getVectorNumElements();
4013  EVT VT = EVT::getVectorVT(Context, Index.getValueType(), GEPWidth);
4014  Index = DAG.getSplatBuildVector(VT, SDLoc(Index), Index);
4015  }
4016  return true;
4017 }
4018 
4019 void SelectionDAGBuilder::visitMaskedScatter(const CallInst &I) {
4020  SDLoc sdl = getCurSDLoc();
4021 
4022  // llvm.masked.scatter.*(Src0, Ptrs, alignemt, Mask)
4023  const Value *Ptr = I.getArgOperand(1);
4024  SDValue Src0 = getValue(I.getArgOperand(0));
4025  SDValue Mask = getValue(I.getArgOperand(3));
4026  EVT VT = Src0.getValueType();
4027  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(2)))->getZExtValue();
4028  if (!Alignment)
4029  Alignment = DAG.getEVTAlignment(VT);
4030  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4031 
4032  AAMDNodes AAInfo;
4033  I.getAAMetadata(AAInfo);
4034 
4035  SDValue Base;
4036  SDValue Index;
4037  SDValue Scale;
4038  const Value *BasePtr = Ptr;
4039  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4040 
4041  const Value *MemOpBasePtr = UniformBase ? BasePtr : nullptr;
4043  getMachineMemOperand(MachinePointerInfo(MemOpBasePtr),
4044  MachineMemOperand::MOStore, VT.getStoreSize(),
4045  Alignment, AAInfo);
4046  if (!UniformBase) {
4047  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4048  Index = getValue(Ptr);
4049  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4050  }
4051  SDValue Ops[] = { getRoot(), Src0, Mask, Base, Index, Scale };
4052  SDValue Scatter = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), VT, sdl,
4053  Ops, MMO);
4054  DAG.setRoot(Scatter);
4055  setValue(&I, Scatter);
4056 }
4057 
4058 void SelectionDAGBuilder::visitMaskedLoad(const CallInst &I, bool IsExpanding) {
4059  SDLoc sdl = getCurSDLoc();
4060 
4061  auto getMaskedLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4062  unsigned& Alignment) {
4063  // @llvm.masked.load.*(Ptr, alignment, Mask, Src0)
4064  Ptr = I.getArgOperand(0);
4065  Alignment = cast<ConstantInt>(I.getArgOperand(1))->getZExtValue();
4066  Mask = I.getArgOperand(2);
4067  Src0 = I.getArgOperand(3);
4068  };
4069  auto getExpandingLoadOps = [&](Value* &Ptr, Value* &Mask, Value* &Src0,
4070  unsigned& Alignment) {
4071  // @llvm.masked.expandload.*(Ptr, Mask, Src0)
4072  Ptr = I.getArgOperand(0);
4073  Alignment = 0;
4074  Mask = I.getArgOperand(1);
4075  Src0 = I.getArgOperand(2);
4076  };
4077 
4078  Value *PtrOperand, *MaskOperand, *Src0Operand;
4079  unsigned Alignment;
4080  if (IsExpanding)
4081  getExpandingLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4082  else
4083  getMaskedLoadOps(PtrOperand, MaskOperand, Src0Operand, Alignment);
4084 
4085  SDValue Ptr = getValue(PtrOperand);
4086  SDValue Src0 = getValue(Src0Operand);
4087  SDValue Mask = getValue(MaskOperand);
4088 
4089  EVT VT = Src0.getValueType();
4090  if (!Alignment)
4091  Alignment = DAG.getEVTAlignment(VT);
4092 
4093  AAMDNodes AAInfo;
4094  I.getAAMetadata(AAInfo);
4095  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4096 
4097  // Do not serialize masked loads of constant memory with anything.
4098  bool AddToChain = !AA || !AA->pointsToConstantMemory(MemoryLocation(
4099  PtrOperand, DAG.getDataLayout().getTypeStoreSize(I.getType()), AAInfo));
4100  SDValue InChain = AddToChain ? DAG.getRoot() : DAG.getEntryNode();
4101 
4102  MachineMemOperand *MMO =
4103  DAG.getMachineFunction().
4104  getMachineMemOperand(MachinePointerInfo(PtrOperand),
4106  Alignment, AAInfo, Ranges);
4107 
4108  SDValue Load = DAG.getMaskedLoad(VT, sdl, InChain, Ptr, Mask, Src0, VT, MMO,
4109  ISD::NON_EXTLOAD, IsExpanding);
4110  if (AddToChain)
4111  PendingLoads.push_back(Load.getValue(1));
4112  setValue(&I, Load);
4113 }
4114 
4115 void SelectionDAGBuilder::visitMaskedGather(const CallInst &I) {
4116  SDLoc sdl = getCurSDLoc();
4117 
4118  // @llvm.masked.gather.*(Ptrs, alignment, Mask, Src0)
4119  const Value *Ptr = I.getArgOperand(0);
4120  SDValue Src0 = getValue(I.getArgOperand(3));
4121  SDValue Mask = getValue(I.getArgOperand(2));
4122 
4123  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4124  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4125  unsigned Alignment = (cast<ConstantInt>(I.getArgOperand(1)))->getZExtValue();
4126  if (!Alignment)
4127  Alignment = DAG.getEVTAlignment(VT);
4128 
4129  AAMDNodes AAInfo;
4130  I.getAAMetadata(AAInfo);
4131  const MDNode *Ranges = I.getMetadata(LLVMContext::MD_range);
4132 
4133  SDValue Root = DAG.getRoot();
4134  SDValue Base;
4135  SDValue Index;
4136  SDValue Scale;
4137  const Value *BasePtr = Ptr;
4138  bool UniformBase = getUniformBase(BasePtr, Base, Index, Scale, this);
4139  bool ConstantMemory = false;
4140  if (UniformBase &&
4141  AA && AA->pointsToConstantMemory(MemoryLocation(
4142  BasePtr, DAG.getDataLayout().getTypeStoreSize(I.getType()),
4143  AAInfo))) {
4144  // Do not serialize (non-volatile) loads of constant memory with anything.
4145  Root = DAG.getEntryNode();
4146  ConstantMemory = true;
4147  }
4148 
4149  MachineMemOperand *MMO =
4150  DAG.getMachineFunction().
4151  getMachineMemOperand(MachinePointerInfo(UniformBase ? BasePtr : nullptr),
4153  Alignment, AAInfo, Ranges);
4154 
4155  if (!UniformBase) {
4156  Base = DAG.getConstant(0, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4157  Index = getValue(Ptr);
4158  Scale = DAG.getTargetConstant(1, sdl, TLI.getPointerTy(DAG.getDataLayout()));
4159  }
4160  SDValue Ops[] = { Root, Src0, Mask, Base, Index, Scale };
4161  SDValue Gather = DAG.getMaskedGather(DAG.getVTList(VT, MVT::Other), VT, sdl,
4162  Ops, MMO);
4163 
4164  SDValue OutChain = Gather.getValue(1);
4165  if (!ConstantMemory)
4166  PendingLoads.push_back(OutChain);
4167  setValue(&I, Gather);
4168 }
4169 
4170 void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) {
4171  SDLoc dl = getCurSDLoc();
4172  AtomicOrdering SuccessOrder = I.getSuccessOrdering();
4173  AtomicOrdering FailureOrder = I.getFailureOrdering();
4174  SyncScope::ID SSID = I.getSyncScopeID();
4175 
4176  SDValue InChain = getRoot();
4177 
4178  MVT MemVT = getValue(I.getCompareOperand()).getSimpleValueType();
4179  SDVTList VTs = DAG.getVTList(MemVT, MVT::i1, MVT::Other);
4180  SDValue L = DAG.getAtomicCmpSwap(
4181  ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, dl, MemVT, VTs, InChain,
4182  getValue(I.getPointerOperand()), getValue(I.getCompareOperand()),
4184  /*Alignment=*/ 0, SuccessOrder, FailureOrder, SSID);
4185 
4186  SDValue OutChain = L.getValue(2);
4187 
4188  setValue(&I, L);
4189  DAG.setRoot(OutChain);
4190 }
4191 
4192 void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) {
4193  SDLoc dl = getCurSDLoc();
4194  ISD::NodeType NT;
4195  switch (I.getOperation()) {
4196  default: llvm_unreachable("Unknown atomicrmw operation");
4197  case AtomicRMWInst::Xchg: NT = ISD::ATOMIC_SWAP; break;
4198  case AtomicRMWInst::Add: NT = ISD::ATOMIC_LOAD_ADD; break;
4199  case AtomicRMWInst::Sub: NT = ISD::ATOMIC_LOAD_SUB; break;
4200  case AtomicRMWInst::And: NT = ISD::ATOMIC_LOAD_AND; break;
4201  case AtomicRMWInst::Nand: NT = ISD::ATOMIC_LOAD_NAND; break;
4202  case AtomicRMWInst::Or: NT = ISD::ATOMIC_LOAD_OR; break;
4203  case AtomicRMWInst::Xor: NT = ISD::ATOMIC_LOAD_XOR; break;
4204  case AtomicRMWInst::Max: NT = ISD::ATOMIC_LOAD_MAX; break;
4205  case AtomicRMWInst::Min: NT = ISD::ATOMIC_LOAD_MIN; break;
4206  case AtomicRMWInst::UMax: NT = ISD::ATOMIC_LOAD_UMAX; break;
4207  case AtomicRMWInst::UMin: NT = ISD::ATOMIC_LOAD_UMIN; break;
4208  }
4209  AtomicOrdering Order = I.getOrdering();
4210  SyncScope::ID SSID = I.getSyncScopeID();
4211 
4212  SDValue InChain = getRoot();
4213 
4214  SDValue L =
4215  DAG.getAtomic(NT, dl,
4216  getValue(I.getValOperand()).getSimpleValueType(),
4217  InChain,
4218  getValue(I.getPointerOperand()),
4219  getValue(I.getValOperand()),
4220  I.getPointerOperand(),
4221  /* Alignment=*/ 0, Order, SSID);
4222 
4223  SDValue OutChain = L.getValue(1);
4224 
4225  setValue(&I, L);
4226  DAG.setRoot(OutChain);
4227 }
4228 
4229 void SelectionDAGBuilder::visitFence(const FenceInst &I) {
4230  SDLoc dl = getCurSDLoc();
4231  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4232  SDValue Ops[3];
4233  Ops[0] = getRoot();
4234  Ops[1] = DAG.getConstant((unsigned)I.getOrdering(), dl,
4235  TLI.getFenceOperandTy(DAG.getDataLayout()));
4236  Ops[2] = DAG.getConstant(I.getSyncScopeID(), dl,
4237  TLI.getFenceOperandTy(DAG.getDataLayout()));
4238  DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops));
4239 }
4240 
4241 void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) {
4242  SDLoc dl = getCurSDLoc();
4243  AtomicOrdering Order = I.getOrdering();
4244  SyncScope::ID SSID = I.getSyncScopeID();
4245 
4246  SDValue InChain = getRoot();
4247 
4248  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4249  EVT VT = TLI.getValueType(DAG.getDataLayout(), I.getType());
4250 
4251  if (!TLI.supportsUnalignedAtomics() &&
4252  I.getAlignment() < VT.getStoreSize())
4253  report_fatal_error("Cannot generate unaligned atomic load");
4254 
4255  MachineMemOperand *MMO =
4256  DAG.getMachineFunction().
4257  getMachineMemOperand(MachinePointerInfo(I.getPointerOperand()),
4260  VT.getStoreSize(),
4261  I.getAlignment() ? I.getAlignment() :
4262  DAG.getEVTAlignment(VT),
4263  AAMDNodes(), nullptr, SSID, Order);
4264 
4265  InChain = TLI.prepareVolatileOrAtomicLoad(InChain, dl, DAG);
4266  SDValue L =
4267  DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain,
4268  getValue(I.getPointerOperand()), MMO);
4269 
4270  SDValue OutChain = L.getValue(1);
4271 
4272  setValue(&I, L);
4273  DAG.setRoot(OutChain);
4274 }
4275 
4276 void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) {
4277  SDLoc dl = getCurSDLoc();
4278 
4279  AtomicOrdering Order = I.getOrdering();
4280  SyncScope::ID SSID = I.getSyncScopeID();
4281 
4282  SDValue InChain = getRoot();
4283 
4284  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4285  EVT VT =
4287 
4288  if (I.getAlignment() < VT.getStoreSize())
4289  report_fatal_error("Cannot generate unaligned atomic store");
4290 
4291  SDValue OutChain =
4292  DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT,
4293  InChain,
4294  getValue(I.getPointerOperand()),
4295  getValue(I.getValueOperand()),
4297  Order, SSID);
4298 
4299  DAG.setRoot(OutChain);
4300 }
4301 
4302 /// visitTargetIntrinsic - Lower a call of a target intrinsic to an INTRINSIC
4303 /// node.
4304 void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I,
4305  unsigned Intrinsic) {
4306  // Ignore the callsite's attributes. A specific call site may be marked with
4307  // readnone, but the lowering code will expect the chain based on the
4308  // definition.
4309  const Function *F = I.getCalledFunction();
4310  bool HasChain = !F->doesNotAccessMemory();
4311  bool OnlyLoad = HasChain && F->onlyReadsMemory();
4312 
4313  // Build the operand list.
4315  if (HasChain) { // If this intrinsic has side-effects, chainify it.
4316  if (OnlyLoad) {
4317  // We don't need to serialize loads against other loads.
4318  Ops.push_back(DAG.getRoot());
4319  } else {
4320  Ops.push_back(getRoot());
4321  }
4322  }
4323 
4324  // Info is set by getTgtMemInstrinsic
4325  TargetLowering::IntrinsicInfo Info;
4326  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
4327  bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I,
4328  DAG.getMachineFunction(),
4329  Intrinsic);
4330 
4331  // Add the intrinsic ID as an integer operand if it's not a target intrinsic.
4332  if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID ||
4333  Info.opc == ISD::INTRINSIC_W_CHAIN)
4334  Ops.push_back(DAG.getTargetConstant(Intrinsic, getCurSDLoc(),
4335  TLI.getPointerTy(DAG.getDataLayout())));
4336 
4337  // Add all operands of the call to the operand list.
4338  for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) {
4339  SDValue Op = getValue(I.getArgOperand(i));
4340  Ops.push_back(Op);
4341  }
4342 
4344  ComputeValueVTs(TLI, DAG.getDataLayout(), I.getType(), ValueVTs);
4345 
4346  if (HasChain)
4347  ValueVTs.push_back(MVT::Other);
4348 
4349  SDVTList VTs = DAG.getVTList(ValueVTs);
4350 
4351  // Create the node.
4352  SDValue Result;
4353  if (IsTgtIntrinsic) {
4354  // This is target intrinsic that touches memory
4355  Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs,
4356  Ops, Info.memVT,
4357  MachinePointerInfo(Info.ptrVal, Info.offset), Info.align,
4358  Info.flags, Info.size);
4359  } else if (!HasChain) {
4360  Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, Ops);
4361  } else if (!I.getType()->isVoidTy()) {
4362  Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, Ops);
4363  } else {
4364  Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, Ops);
4365  }
4366 
4367  if (HasChain) {
4368  SDValue Chain = Result.getValue(Result.getNode()->getNumValues()-1);
4369  if (OnlyLoad)
4370  PendingLoads.push_back(Chain);
4371  else
4372  DAG.setRoot(Chain);
4373  }
4374 
4375  if (!I.getType()->isVoidTy()) {
4376  if (VectorType *PTy = dyn_cast<VectorType>(I.getType())) {
4377  EVT VT = TLI.getValueType(DAG.getDataLayout(), PTy);
4378  Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result);
4379  } else
4380  Result = lowerRangeToAssertZExt(DAG, I, Result);
4381 
4382  setValue(&I, Result);
4383  }
4384 }
4385 
4386 /// GetSignificand - Get the significand and build it into a floating-point
4387 /// number with exponent of 1:
4388 ///
4389 /// Op = (Op & 0x007fffff) | 0x3f800000;
4390 ///
4391 /// where Op is the hexadecimal representation of floating point value.
4393  SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4394  DAG.getConstant(0x007fffff, dl, MVT::i32));
4395  SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
4396  DAG.getConstant(0x3f800000, dl, MVT::i32));
4397  return DAG.getNode(ISD::BITCAST, dl, MVT::f32, t2);
4398 }
4399 
4400 /// GetExponent - Get the exponent:
4401 ///
4402 /// (float)(int)(((Op & 0x7f800000) >> 23) - 127);
4403 ///
4404 /// where Op is the hexadecimal representation of floating point value.
4406  const TargetLowering &TLI, const SDLoc &dl) {
4407  SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
4408  DAG.getConstant(0x7f800000, dl, MVT::i32));
4409  SDValue t1 = DAG.getNode(
4410  ISD::SRL, dl, MVT::i32, t0,
4411  DAG.getConstant(23, dl, TLI.getPointerTy(DAG.getDataLayout())));
4412  SDValue t2 = DAG.getNode(ISD::SUB, dl, MVT::i32, t1,
4413  DAG.getConstant(127, dl, MVT::i32));
4414  return DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, t2);
4415 }
4416 
4417 /// getF32Constant - Get 32-bit floating point constant.
4418 static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt,
4419  const SDLoc &dl) {
4420  return DAG.getConstantFP(APFloat(APFloat::IEEEsingle(), APInt(32, Flt)), dl,
4421  MVT::f32);
4422 }
4423 
4425  SelectionDAG &DAG) {
4426  // TODO: What fast-math-flags should be set on the floating-point nodes?
4427 
4428  // IntegerPartOfX = ((int32_t)(t0);
4429  SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0);
4430 
4431  // FractionalPartOfX = t0 - (float)IntegerPartOfX;
4432  SDValue t1 = DAG.getNode(ISD::SINT_TO_FP, dl, MVT::f32, IntegerPartOfX);
4433  SDValue X = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0, t1);
4434 
4435  // IntegerPartOfX <<= 23;
4436  IntegerPartOfX = DAG.getNode(
4437  ISD::SHL, dl, MVT::i32, IntegerPartOfX,
4439  DAG.getDataLayout())));
4440 
4441  SDValue TwoToFractionalPartOfX;
4442  if (LimitFloatPrecision <= 6) {
4443  // For floating-point precision of 6:
4444  //
4445  // TwoToFractionalPartOfX =
4446  // 0.997535578f +
4447  // (0.735607626f + 0.252464424f * x) * x;
4448  //
4449  // error 0.0144103317, which is 6 bits
4450  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4451  getF32Constant(DAG, 0x3e814304, dl));
4452  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4453  getF32Constant(DAG, 0x3f3c50c8, dl));
4454  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4455  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4456  getF32Constant(DAG, 0x3f7f5e7e, dl));
4457  } else if (LimitFloatPrecision <= 12) {
4458  // For floating-point precision of 12:
4459  //
4460  // TwoToFractionalPartOfX =
4461  // 0.999892986f +
4462  // (0.696457318f +
4463  // (0.224338339f + 0.792043434e-1f * x) * x) * x;
4464  //
4465  // error 0.000107046256, which is 13 to 14 bits
4466  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4467  getF32Constant(DAG, 0x3da235e3, dl));
4468  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4469  getF32Constant(DAG, 0x3e65b8f3, dl));
4470  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4471  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4472  getF32Constant(DAG, 0x3f324b07, dl));
4473  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4474  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4475  getF32Constant(DAG, 0x3f7ff8fd, dl));
4476  } else { // LimitFloatPrecision <= 18
4477  // For floating-point precision of 18:
4478  //
4479  // TwoToFractionalPartOfX =
4480  // 0.999999982f +
4481  // (0.693148872f +
4482  // (0.240227044f +
4483  // (0.554906021e-1f +
4484  // (0.961591928e-2f +
4485  // (0.136028312e-2f + 0.157059148e-3f *x)*x)*x)*x)*x)*x;
4486  // error 2.47208000*10^(-7), which is better than 18 bits
4487  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4488  getF32Constant(DAG, 0x3924b03e, dl));
4489  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4490  getF32Constant(DAG, 0x3ab24b87, dl));
4491  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4492  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4493  getF32Constant(DAG, 0x3c1d8c17, dl));
4494  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4495  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4496  getF32Constant(DAG, 0x3d634a1d, dl));
4497  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4498  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4499  getF32Constant(DAG, 0x3e75fe14, dl));
4500  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4501  SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10,
4502  getF32Constant(DAG, 0x3f317234, dl));
4503  SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X);
4504  TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12,
4505  getF32Constant(DAG, 0x3f800000, dl));
4506  }
4507 
4508  // Add the exponent into the result in integer domain.
4509  SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFractionalPartOfX);
4510  return DAG.getNode(ISD::BITCAST, dl, MVT::f32,
4511  DAG.getNode(ISD::ADD, dl, MVT::i32, t13, IntegerPartOfX));
4512 }
4513 
4514 /// expandExp - Lower an exp intrinsic. Handles the special sequences for
4515 /// limited-precision mode.
4516 static SDValue expandExp(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4517  const TargetLowering &TLI) {
4518  if (Op.getValueType() == MVT::f32 &&
4519  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4520 
4521  // Put the exponent in the right bit position for later addition to the
4522  // final result:
4523  //
4524  // #define LOG2OFe 1.4426950f
4525  // t0 = Op * LOG2OFe
4526 
4527  // TODO: What fast-math-flags should be set here?
4528  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op,
4529  getF32Constant(DAG, 0x3fb8aa3b, dl));
4530  return getLimitedPrecisionExp2(t0, dl, DAG);
4531  }
4532 
4533  // No special expansion.
4534  return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op);
4535 }
4536 
4537 /// expandLog - Lower a log intrinsic. Handles the special sequences for
4538 /// limited-precision mode.
4539 static SDValue expandLog(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4540  const TargetLowering &TLI) {
4541  // TODO: What fast-math-flags should be set on the floating-point nodes?
4542 
4543  if (Op.getValueType() == MVT::f32 &&
4544  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4545  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4546 
4547  // Scale the exponent by log(2) [0.69314718f].
4548  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4549  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4550  getF32Constant(DAG, 0x3f317218, dl));
4551 
4552  // Get the significand and build it into a floating-point number with
4553  // exponent of 1.
4554  SDValue X = GetSignificand(DAG, Op1, dl);
4555 
4556  SDValue LogOfMantissa;
4557  if (LimitFloatPrecision <= 6) {
4558  // For floating-point precision of 6:
4559  //
4560  // LogofMantissa =
4561  // -1.1609546f +
4562  // (1.4034025f - 0.23903021f * x) * x;
4563  //
4564  // error 0.0034276066, which is better than 8 bits
4565  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4566  getF32Constant(DAG, 0xbe74c456, dl));
4567  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4568  getF32Constant(DAG, 0x3fb3a2b1, dl));
4569  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4570  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4571  getF32Constant(DAG, 0x3f949a29, dl));
4572  } else if (LimitFloatPrecision <= 12) {
4573  // For floating-point precision of 12:
4574  //
4575  // LogOfMantissa =
4576  // -1.7417939f +
4577  // (2.8212026f +
4578  // (-1.4699568f +
4579  // (0.44717955f - 0.56570851e-1f * x) * x) * x) * x;
4580  //
4581  // error 0.000061011436, which is 14 bits
4582  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4583  getF32Constant(DAG, 0xbd67b6d6, dl));
4584  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4585  getF32Constant(DAG, 0x3ee4f4b8, dl));
4586  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4587  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4588  getF32Constant(DAG, 0x3fbc278b, dl));
4589  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4590  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4591  getF32Constant(DAG, 0x40348e95, dl));
4592  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4593  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4594  getF32Constant(DAG, 0x3fdef31a, dl));
4595  } else { // LimitFloatPrecision <= 18
4596  // For floating-point precision of 18:
4597  //
4598  // LogOfMantissa =
4599  // -2.1072184f +
4600  // (4.2372794f +
4601  // (-3.7029485f +
4602  // (2.2781945f +
4603  // (-0.87823314f +
4604  // (0.19073739f - 0.17809712e-1f * x) * x) * x) * x) * x)*x;
4605  //
4606  // error 0.0000023660568, which is better than 18 bits
4607  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4608  getF32Constant(DAG, 0xbc91e5ac, dl));
4609  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4610  getF32Constant(DAG, 0x3e4350aa, dl));
4611  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4612  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4613  getF32Constant(DAG, 0x3f60d3e3, dl));
4614  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4615  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4616  getF32Constant(DAG, 0x4011cdf0, dl));
4617  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4618  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4619  getF32Constant(DAG, 0x406cfd1c, dl));
4620  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4621  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4622  getF32Constant(DAG, 0x408797cb, dl));
4623  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4624  LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4625  getF32Constant(DAG, 0x4006dcab, dl));
4626  }
4627 
4628  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa);
4629  }
4630 
4631  // No special expansion.
4632  return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op);
4633 }
4634 
4635 /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for
4636 /// limited-precision mode.
4637 static SDValue expandLog2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4638  const TargetLowering &TLI) {
4639  // TODO: What fast-math-flags should be set on the floating-point nodes?
4640 
4641  if (Op.getValueType() == MVT::f32 &&
4642  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4643  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4644 
4645  // Get the exponent.
4646  SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
4647 
4648  // Get the significand and build it into a floating-point number with
4649  // exponent of 1.
4650  SDValue X = GetSignificand(DAG, Op1, dl);
4651 
4652  // Different possible minimax approximations of significand in
4653  // floating-point for various degrees of accuracy over [1,2].
4654  SDValue Log2ofMantissa;
4655  if (LimitFloatPrecision <= 6) {
4656  // For floating-point precision of 6:
4657  //
4658  // Log2ofMantissa = -1.6749035f + (2.0246817f - .34484768f * x) * x;
4659  //
4660  // error 0.0049451742, which is more than 7 bits
4661  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4662  getF32Constant(DAG, 0xbeb08fe0, dl));
4663  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4664  getF32Constant(DAG, 0x40019463, dl));
4665  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4666  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4667  getF32Constant(DAG, 0x3fd6633d, dl));
4668  } else if (LimitFloatPrecision <= 12) {
4669  // For floating-point precision of 12:
4670  //
4671  // Log2ofMantissa =
4672  // -2.51285454f +
4673  // (4.07009056f +
4674  // (-2.12067489f +
4675  // (.645142248f - 0.816157886e-1f * x) * x) * x) * x;
4676  //
4677  // error 0.0000876136000, which is better than 13 bits
4678  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4679  getF32Constant(DAG, 0xbda7262e, dl));
4680  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4681  getF32Constant(DAG, 0x3f25280b, dl));
4682  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4683  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4684  getF32Constant(DAG, 0x4007b923, dl));
4685  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4686  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4687  getF32Constant(DAG, 0x40823e2f, dl));
4688  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4689  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4690  getF32Constant(DAG, 0x4020d29c, dl));
4691  } else { // LimitFloatPrecision <= 18
4692  // For floating-point precision of 18:
4693  //
4694  // Log2ofMantissa =
4695  // -3.0400495f +
4696  // (6.1129976f +
4697  // (-5.3420409f +
4698  // (3.2865683f +
4699  // (-1.2669343f +
4700  // (0.27515199f -
4701  // 0.25691327e-1f * x) * x) * x) * x) * x) * x;
4702  //
4703  // error 0.0000018516, which is better than 18 bits
4704  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4705  getF32Constant(DAG, 0xbcd2769e, dl));
4706  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4707  getF32Constant(DAG, 0x3e8ce0b9, dl));
4708  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4709  SDValue t3 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4710  getF32Constant(DAG, 0x3fa22ae7, dl));
4711  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4712  SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4,
4713  getF32Constant(DAG, 0x40525723, dl));
4714  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4715  SDValue t7 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6,
4716  getF32Constant(DAG, 0x40aaf200, dl));
4717  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4718  SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8,
4719  getF32Constant(DAG, 0x40c39dad, dl));
4720  SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X);
4721  Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10,
4722  getF32Constant(DAG, 0x4042902c, dl));
4723  }
4724 
4725  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa);
4726  }
4727 
4728  // No special expansion.
4729  return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op);
4730 }
4731 
4732 /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for
4733 /// limited-precision mode.
4735  const TargetLowering &TLI) {
4736  // TODO: What fast-math-flags should be set on the floating-point nodes?
4737 
4738  if (Op.getValueType() == MVT::f32 &&
4739  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4740  SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op);
4741 
4742  // Scale the exponent by log10(2) [0.30102999f].
4743  SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
4744  SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
4745  getF32Constant(DAG, 0x3e9a209a, dl));
4746 
4747  // Get the significand and build it into a floating-point number with
4748  // exponent of 1.
4749  SDValue X = GetSignificand(DAG, Op1, dl);
4750 
4751  SDValue Log10ofMantissa;
4752  if (LimitFloatPrecision <= 6) {
4753  // For floating-point precision of 6:
4754  //
4755  // Log10ofMantissa =
4756  // -0.50419619f +
4757  // (0.60948995f - 0.10380950f * x) * x;
4758  //
4759  // error 0.0014886165, which is 6 bits
4760  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4761  getF32Constant(DAG, 0xbdd49a13, dl));
4762  SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0,
4763  getF32Constant(DAG, 0x3f1c0789, dl));
4764  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4765  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2,
4766  getF32Constant(DAG, 0x3f011300, dl));
4767  } else if (LimitFloatPrecision <= 12) {
4768  // For floating-point precision of 12:
4769  //
4770  // Log10ofMantissa =
4771  // -0.64831180f +
4772  // (0.91751397f +
4773  // (-0.31664806f + 0.47637168e-1f * x) * x) * x;
4774  //
4775  // error 0.00019228036, which is better than 12 bits
4776  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4777  getF32Constant(DAG, 0x3d431f31, dl));
4778  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4779  getF32Constant(DAG, 0x3ea21fb2, dl));
4780  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4781  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4782  getF32Constant(DAG, 0x3f6ae232, dl));
4783  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4784  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4785  getF32Constant(DAG, 0x3f25f7c3, dl));
4786  } else { // LimitFloatPrecision <= 18
4787  // For floating-point precision of 18:
4788  //
4789  // Log10ofMantissa =
4790  // -0.84299375f +
4791  // (1.5327582f +
4792  // (-1.0688956f +
4793  // (0.49102474f +
4794  // (-0.12539807f + 0.13508273e-1f * x) * x) * x) * x) * x;
4795  //
4796  // error 0.0000037995730, which is better than 18 bits
4797  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, X,
4798  getF32Constant(DAG, 0x3c5d51ce, dl));
4799  SDValue t1 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t0,
4800  getF32Constant(DAG, 0x3e00685a, dl));
4801  SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X);
4802  SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2,
4803  getF32Constant(DAG, 0x3efb6798, dl));
4804  SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X);
4805  SDValue t5 = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4,
4806  getF32Constant(DAG, 0x3f88d192, dl));
4807  SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X);
4808  SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6,
4809  getF32Constant(DAG, 0x3fc4316c, dl));
4810  SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X);
4811  Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8,
4812  getF32Constant(DAG, 0x3f57ce70, dl));
4813  }
4814 
4815  return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa);
4816  }
4817 
4818  // No special expansion.
4819  return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op);
4820 }
4821 
4822 /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for
4823 /// limited-precision mode.
4824 static SDValue expandExp2(const SDLoc &dl, SDValue Op, SelectionDAG &DAG,
4825  const TargetLowering &TLI) {
4826  if (Op.getValueType() == MVT::f32 &&
4827  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18)
4828  return getLimitedPrecisionExp2(Op, dl, DAG);
4829 
4830  // No special expansion.
4831  return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op);
4832 }
4833 
4834 /// visitPow - Lower a pow intrinsic. Handles the special sequences for
4835 /// limited-precision mode with x == 10.0f.
4836 static SDValue expandPow(const SDLoc &dl, SDValue LHS, SDValue RHS,
4837  SelectionDAG &DAG, const TargetLowering &TLI) {
4838  bool IsExp10 = false;
4839  if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 &&
4840  LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) {
4841  if (ConstantFPSDNode *LHSC = dyn_cast<ConstantFPSDNode>(LHS)) {
4842  APFloat Ten(10.0f);
4843  IsExp10 = LHSC->isExactlyValue(Ten);
4844  }
4845  }
4846 
4847  // TODO: What fast-math-flags should be set on the FMUL node?
4848  if (IsExp10) {
4849  // Put the exponent in the right bit position for later addition to the
4850  // final result:
4851  //
4852  // #define LOG2OF10 3.3219281f
4853  // t0 = Op * LOG2OF10;
4854  SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS,
4855  getF32Constant(DAG, 0x40549a78, dl));
4856  return getLimitedPrecisionExp2(t0, dl, DAG);
4857  }
4858 
4859  // No special expansion.
4860  return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS);
4861 }
4862 
4863 /// ExpandPowI - Expand a llvm.powi intrinsic.
4864 static SDValue ExpandPowI(const SDLoc &DL, SDValue LHS, SDValue RHS,
4865  SelectionDAG &DAG) {
4866  // If RHS is a constant, we can expand this out to a multiplication tree,
4867  // otherwise we end up lowering to a call to __powidf2 (for example). When
4868  // optimizing for size, we only want to do this if the expansion would produce
4869  // a small number of multiplies, otherwise we do the full expansion.
4870  if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
4871  // Get the exponent as a positive value.
4872  unsigned Val = RHSC->getSExtValue();
4873  if ((int)Val < 0) Val = -Val;
4874 
4875  // powi(x, 0) -> 1.0
4876  if (Val == 0)
4877  return DAG.getConstantFP(1.0, DL, LHS.getValueType());
4878 
4879  const Function &F = DAG.getMachineFunction().getFunction();
4880  if (!F.optForSize() ||
4881  // If optimizing for size, don't insert too many multiplies.
4882  // This inserts up to 5 multiplies.
4883  countPopulation(Val) + Log2_32(Val) < 7) {
4884  // We use the simple binary decomposition method to generate the multiply
4885  // sequence. There are more optimal ways to do this (for example,
4886  // powi(x,15) generates one more multiply than it should), but this has
4887  // the benefit of being both really simple and much better than a libcall.
4888  SDValue Res; // Logically starts equal to 1.0
4889  SDValue CurSquare = LHS;
4890  // TODO: Intrinsics should have fast-math-flags that propagate to these
4891  // nodes.
4892  while (Val) {
4893  if (Val & 1) {
4894  if (Res.getNode())
4895  Res = DAG.getNode(ISD::FMUL, DL,Res.getValueType(), Res, CurSquare);
4896  else
4897  Res = CurSquare; // 1.0*CurSquare.
4898  }
4899 
4900  CurSquare = DAG.getNode(ISD::FMUL, DL, CurSquare.getValueType(),
4901  CurSquare, CurSquare);
4902  Val >>= 1;
4903  }
4904 
4905  // If the original was negative, invert the result, producing 1/(x*x*x).
4906  if (RHSC->getSExtValue() < 0)
4907  Res = DAG.getNode(ISD::FDIV, DL, LHS.getValueType(),
4908  DAG.getConstantFP(1.0, DL, LHS.getValueType()), Res);
4909  return Res;
4910  }
4911  }
4912 
4913  // Otherwise, expand to a libcall.
4914  return DAG.getNode(ISD::FPOWI, DL, LHS.getValueType(), LHS, RHS);
4915 }
4916 
4917 // getUnderlyingArgReg - Find underlying register used for a truncated or
4918 // bitcasted argument.
4919 static unsigned getUnderlyingArgReg(const SDValue &N) {
4920  switch (N.getOpcode()) {
4921  case ISD::CopyFromReg:
4922  return cast<RegisterSDNode>(N.getOperand(1))->getReg();
4923  case ISD::BITCAST:
4924  case ISD::AssertZext:
4925  case ISD::AssertSext:
4926  case ISD::TRUNCATE:
4927  return getUnderlyingArgReg(N.getOperand(0));
4928  default:
4929  return 0;
4930  }
4931 }
4932 
4933 /// If the DbgValueInst is a dbg_value of a function argument, create the
4934 /// corresponding DBG_VALUE machine instruction for it now. At the end of
4935 /// instruction selection, they will be inserted to the entry BB.
4936 bool SelectionDAGBuilder::EmitFuncArgumentDbgValue(
4937  const Value *V, DILocalVariable *Variable, DIExpression *Expr,
4938  DILocation *DL, bool IsDbgDeclare, const SDValue &N) {
4939  const Argument *Arg = dyn_cast<Argument>(V);
4940  if (!Arg)
4941  return false;
4942 
4943  MachineFunction &MF = DAG.getMachineFunction();
4944  const TargetInstrInfo *TII = DAG.getSubtarget().getInstrInfo();
4945 
4946  bool IsIndirect = false;
4948  // Some arguments' frame index is recorded during argument lowering.
4949  int FI = FuncInfo.getArgumentFrameIndex(Arg);
4950  if (FI != std::numeric_limits<int>::max())
4951  Op = MachineOperand::CreateFI(FI);
4952 
4953  if (!Op && N.getNode()) {
4954  unsigned Reg = getUnderlyingArgReg(N);
4955  if (Reg && TargetRegisterInfo::isVirtualRegister(Reg)) {
4956  MachineRegisterInfo &RegInfo = MF.getRegInfo();
4957  unsigned PR = RegInfo.getLiveInPhysReg(Reg);
4958  if (PR)
4959  Reg = PR;
4960  }
4961  if (Reg) {
4962  Op = MachineOperand::CreateReg(Reg, false);
4963  IsIndirect = IsDbgDeclare;
4964  }
4965  }
4966 
4967  if (!Op && N.getNode())
4968  // Check if frame index is available.
4969  if (LoadSDNode *LNode = dyn_cast<LoadSDNode>(N.getNode()))
4970  if (FrameIndexSDNode *FINode =
4971  dyn_cast<FrameIndexSDNode>(LNode->getBasePtr().getNode()))
4972  Op = MachineOperand::CreateFI(FINode->getIndex());
4973 
4974  if (!Op) {
4975  // Check if ValueMap has reg number.
4976  DenseMap<const Value *, unsigned>::iterator VMI = FuncInfo.ValueMap.find(V);
4977  if (VMI != FuncInfo.ValueMap.end()) {
4978  const auto &TLI = DAG.getTargetLoweringInfo();
4979  RegsForValue RFV(V->getContext(), TLI, DAG.getDataLayout(), VMI->second,
4980  V->getType(), getABIRegCopyCC(V));
4981  if (RFV.occupiesMultipleRegs()) {
4982  unsigned Offset = 0;
4983  for (auto RegAndSize : RFV.getRegsAndSizes()) {
4984  Op = MachineOperand::CreateReg(RegAndSize.first, false);
4985  auto FragmentExpr = DIExpression::createFragmentExpression(
4986  Expr, Offset, RegAndSize.second);
4987  if (!FragmentExpr)
4988  continue;
4989  FuncInfo.ArgDbgValues.push_back(
4990  BuildMI(MF, DL, TII->get(TargetOpcode::DBG_VALUE), IsDbgDeclare,
4991  Op->