LLVM  8.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUSubtarget.h"
18 #include "R600Defines.h"
19 #include "R600FrameLowering.h"
20 #include "R600InstrInfo.h"
23 #include "Utils/AMDGPUBaseInfo.h"
24 #include "llvm/ADT/APFloat.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/IR/Constants.h"
40 #include "llvm/IR/DerivedTypes.h"
41 #include "llvm/Support/Casting.h"
42 #include "llvm/Support/Compiler.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
53 #include "R600GenCallingConv.inc"
54 
56  const R600Subtarget &STI)
57  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
58  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
59  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
60  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
61  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
62  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
63  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
64 
66 
67  // Legalize loads and stores to the private address space.
71 
72  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
73  // spaces, so it is custom lowered to handle those where it isn't.
74  for (MVT VT : MVT::integer_valuetypes()) {
78 
82 
86  }
87 
88  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
92 
96 
101 
104  // We need to include these since trunc STORES to PRIVATE need
105  // special handling to accommodate RMW
116 
117  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
120 
121  // Set condition code actions
134 
139 
142 
145 
149 
151 
156 
159 
166 
171 
172  // ADD, SUB overflow.
173  // TODO: turn these into Legal?
174  if (Subtarget->hasCARRY())
176 
177  if (Subtarget->hasBORROW())
179 
180  // Expand sign extension of vectors
181  if (!Subtarget->hasBFE())
183 
186 
187  if (!Subtarget->hasBFE())
191 
192  if (!Subtarget->hasBFE())
196 
200 
202 
204 
209 
214 
215  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
216  // to be Legal/Custom in order to avoid library calls.
220 
221  if (!Subtarget->hasFMA()) {
224  }
225 
226  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
227  // need it for R600.
228  if (!Subtarget->hasFP32Denormals())
230 
231  if (!Subtarget->hasBFI()) {
232  // fcopysign can be done in a single instruction with BFI.
235  }
236 
237  if (!Subtarget->hasBCNT(32))
239 
240  if (!Subtarget->hasBCNT(64))
242 
243  if (Subtarget->hasFFBH())
245 
246  if (Subtarget->hasFFBL())
248 
249  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
250  // need it for R600.
251  if (Subtarget->hasBFE())
252  setHasExtractBitsInsn(true);
253 
255 
256  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
257  for (MVT VT : ScalarIntVTs) {
262  }
263 
264  // LLVM will expand these to atomic_cmp_swap(0)
265  // and atomic_swap, respectively.
268 
269  // We need to custom lower some of the intrinsics
272 
274 
281 }
282 
283 static inline bool isEOP(MachineBasicBlock::iterator I) {
284  if (std::next(I) == I->getParent()->end())
285  return false;
286  return std::next(I)->getOpcode() == R600::RETURN;
287 }
288 
291  MachineBasicBlock *BB) const {
292  MachineFunction *MF = BB->getParent();
295  const R600InstrInfo *TII = Subtarget->getInstrInfo();
296 
297  switch (MI.getOpcode()) {
298  default:
299  // Replace LDS_*_RET instruction that don't have any uses with the
300  // equivalent LDS_*_NORET instruction.
301  if (TII->isLDSRetInstr(MI.getOpcode())) {
302  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
303  assert(DstIdx != -1);
304  MachineInstrBuilder NewMI;
305  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
306  // LDS_1A2D support and remove this special case.
307  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
308  MI.getOpcode() == R600::LDS_CMPST_RET)
309  return BB;
310 
311  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
312  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
313  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
314  NewMI.add(MI.getOperand(i));
315  }
316  } else {
318  }
319  break;
320 
321  case R600::FABS_R600: {
323  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
324  MI.getOperand(1).getReg());
325  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
326  break;
327  }
328 
329  case R600::FNEG_R600: {
331  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
332  MI.getOperand(1).getReg());
333  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
334  break;
335  }
336 
337  case R600::MASK_WRITE: {
338  unsigned maskedRegister = MI.getOperand(0).getReg();
340  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
341  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
342  break;
343  }
344 
345  case R600::MOV_IMM_F32:
346  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
347  .getFPImm()
348  ->getValueAPF()
349  .bitcastToAPInt()
350  .getZExtValue());
351  break;
352 
353  case R600::MOV_IMM_I32:
354  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
355  MI.getOperand(1).getImm());
356  break;
357 
358  case R600::MOV_IMM_GLOBAL_ADDR: {
359  //TODO: Perhaps combine this instruction with the next if possible
360  auto MIB = TII->buildDefaultInstruction(
361  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
362  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
363  //TODO: Ugh this is rather ugly
364  MIB->getOperand(Idx) = MI.getOperand(1);
365  break;
366  }
367 
368  case R600::CONST_COPY: {
370  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
371  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
372  MI.getOperand(1).getImm());
373  break;
374  }
375 
376  case R600::RAT_WRITE_CACHELESS_32_eg:
377  case R600::RAT_WRITE_CACHELESS_64_eg:
378  case R600::RAT_WRITE_CACHELESS_128_eg:
379  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
380  .add(MI.getOperand(0))
381  .add(MI.getOperand(1))
382  .addImm(isEOP(I)); // Set End of program bit
383  break;
384 
385  case R600::RAT_STORE_TYPED_eg:
386  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
387  .add(MI.getOperand(0))
388  .add(MI.getOperand(1))
389  .add(MI.getOperand(2))
390  .addImm(isEOP(I)); // Set End of program bit
391  break;
392 
393  case R600::BRANCH:
394  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
395  .add(MI.getOperand(0));
396  break;
397 
398  case R600::BRANCH_COND_f32: {
399  MachineInstr *NewMI =
400  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
401  R600::PREDICATE_BIT)
402  .add(MI.getOperand(1))
403  .addImm(R600::PRED_SETNE)
404  .addImm(0); // Flags
405  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
406  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
407  .add(MI.getOperand(0))
408  .addReg(R600::PREDICATE_BIT, RegState::Kill);
409  break;
410  }
411 
412  case R600::BRANCH_COND_i32: {
413  MachineInstr *NewMI =
414  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
415  R600::PREDICATE_BIT)
416  .add(MI.getOperand(1))
417  .addImm(R600::PRED_SETNE_INT)
418  .addImm(0); // Flags
419  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
420  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
421  .add(MI.getOperand(0))
422  .addReg(R600::PREDICATE_BIT, RegState::Kill);
423  break;
424  }
425 
426  case R600::EG_ExportSwz:
427  case R600::R600_ExportSwz: {
428  // Instruction is left unmodified if its not the last one of its type
429  bool isLastInstructionOfItsType = true;
430  unsigned InstExportType = MI.getOperand(1).getImm();
431  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
432  EndBlock = BB->end(); NextExportInst != EndBlock;
433  NextExportInst = std::next(NextExportInst)) {
434  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
435  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
436  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
437  .getImm();
438  if (CurrentInstExportType == InstExportType) {
439  isLastInstructionOfItsType = false;
440  break;
441  }
442  }
443  }
444  bool EOP = isEOP(I);
445  if (!EOP && !isLastInstructionOfItsType)
446  return BB;
447  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
448  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
449  .add(MI.getOperand(0))
450  .add(MI.getOperand(1))
451  .add(MI.getOperand(2))
452  .add(MI.getOperand(3))
453  .add(MI.getOperand(4))
454  .add(MI.getOperand(5))
455  .add(MI.getOperand(6))
456  .addImm(CfInst)
457  .addImm(EOP);
458  break;
459  }
460  case R600::RETURN: {
461  return BB;
462  }
463  }
464 
465  MI.eraseFromParent();
466  return BB;
467 }
468 
469 //===----------------------------------------------------------------------===//
470 // Custom DAG Lowering Operations
471 //===----------------------------------------------------------------------===//
472 
476  switch (Op.getOpcode()) {
477  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
478  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
479  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
480  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
481  case ISD::SRA_PARTS:
482  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
483  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
484  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
485  case ISD::FCOS:
486  case ISD::FSIN: return LowerTrig(Op, DAG);
487  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
488  case ISD::STORE: return LowerSTORE(Op, DAG);
489  case ISD::LOAD: {
490  SDValue Result = LowerLOAD(Op, DAG);
491  assert((!Result.getNode() ||
492  Result.getNode()->getNumValues() == 2) &&
493  "Load should return a value and a chain");
494  return Result;
495  }
496 
497  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
498  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
499  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
500  case ISD::INTRINSIC_VOID: {
501  SDValue Chain = Op.getOperand(0);
502  unsigned IntrinsicID =
503  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
504  switch (IntrinsicID) {
505  case Intrinsic::r600_store_swizzle: {
506  SDLoc DL(Op);
507  const SDValue Args[8] = {
508  Chain,
509  Op.getOperand(2), // Export Value
510  Op.getOperand(3), // ArrayBase
511  Op.getOperand(4), // Type
512  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
513  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
514  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
515  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
516  };
517  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
518  }
519 
520  // default for switch(IntrinsicID)
521  default: break;
522  }
523  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
524  break;
525  }
527  unsigned IntrinsicID =
528  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
529  EVT VT = Op.getValueType();
530  SDLoc DL(Op);
531  switch (IntrinsicID) {
532  case Intrinsic::r600_tex:
533  case Intrinsic::r600_texc: {
534  unsigned TextureOp;
535  switch (IntrinsicID) {
536  case Intrinsic::r600_tex:
537  TextureOp = 0;
538  break;
539  case Intrinsic::r600_texc:
540  TextureOp = 1;
541  break;
542  default:
543  llvm_unreachable("unhandled texture operation");
544  }
545 
546  SDValue TexArgs[19] = {
547  DAG.getConstant(TextureOp, DL, MVT::i32),
548  Op.getOperand(1),
549  DAG.getConstant(0, DL, MVT::i32),
550  DAG.getConstant(1, DL, MVT::i32),
551  DAG.getConstant(2, DL, MVT::i32),
552  DAG.getConstant(3, DL, MVT::i32),
553  Op.getOperand(2),
554  Op.getOperand(3),
555  Op.getOperand(4),
556  DAG.getConstant(0, DL, MVT::i32),
557  DAG.getConstant(1, DL, MVT::i32),
558  DAG.getConstant(2, DL, MVT::i32),
559  DAG.getConstant(3, DL, MVT::i32),
560  Op.getOperand(5),
561  Op.getOperand(6),
562  Op.getOperand(7),
563  Op.getOperand(8),
564  Op.getOperand(9),
565  Op.getOperand(10)
566  };
567  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
568  }
569  case Intrinsic::r600_dot4: {
570  SDValue Args[8] = {
572  DAG.getConstant(0, DL, MVT::i32)),
574  DAG.getConstant(0, DL, MVT::i32)),
576  DAG.getConstant(1, DL, MVT::i32)),
578  DAG.getConstant(1, DL, MVT::i32)),
580  DAG.getConstant(2, DL, MVT::i32)),
582  DAG.getConstant(2, DL, MVT::i32)),
584  DAG.getConstant(3, DL, MVT::i32)),
586  DAG.getConstant(3, DL, MVT::i32))
587  };
588  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
589  }
590 
591  case Intrinsic::r600_implicitarg_ptr: {
594  return DAG.getConstant(ByteOffset, DL, PtrVT);
595  }
596  case Intrinsic::r600_read_ngroups_x:
597  return LowerImplicitParameter(DAG, VT, DL, 0);
598  case Intrinsic::r600_read_ngroups_y:
599  return LowerImplicitParameter(DAG, VT, DL, 1);
600  case Intrinsic::r600_read_ngroups_z:
601  return LowerImplicitParameter(DAG, VT, DL, 2);
602  case Intrinsic::r600_read_global_size_x:
603  return LowerImplicitParameter(DAG, VT, DL, 3);
604  case Intrinsic::r600_read_global_size_y:
605  return LowerImplicitParameter(DAG, VT, DL, 4);
606  case Intrinsic::r600_read_global_size_z:
607  return LowerImplicitParameter(DAG, VT, DL, 5);
608  case Intrinsic::r600_read_local_size_x:
609  return LowerImplicitParameter(DAG, VT, DL, 6);
610  case Intrinsic::r600_read_local_size_y:
611  return LowerImplicitParameter(DAG, VT, DL, 7);
612  case Intrinsic::r600_read_local_size_z:
613  return LowerImplicitParameter(DAG, VT, DL, 8);
614 
615  case Intrinsic::r600_read_tgid_x:
616  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
617  R600::T1_X, VT);
618  case Intrinsic::r600_read_tgid_y:
619  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
620  R600::T1_Y, VT);
621  case Intrinsic::r600_read_tgid_z:
622  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
623  R600::T1_Z, VT);
624  case Intrinsic::r600_read_tidig_x:
625  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
626  R600::T0_X, VT);
627  case Intrinsic::r600_read_tidig_y:
628  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
629  R600::T0_Y, VT);
630  case Intrinsic::r600_read_tidig_z:
631  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
632  R600::T0_Z, VT);
633 
634  case Intrinsic::r600_recipsqrt_ieee:
635  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
636 
637  case Intrinsic::r600_recipsqrt_clamped:
638  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
639  default:
640  return Op;
641  }
642 
643  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
644  break;
645  }
646  } // end switch(Op.getOpcode())
647  return SDValue();
648 }
649 
652  SelectionDAG &DAG) const {
653  switch (N->getOpcode()) {
654  default:
656  return;
657  case ISD::FP_TO_UINT:
658  if (N->getValueType(0) == MVT::i1) {
659  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
660  return;
661  }
662  // Since we don't care about out of bounds values we can use FP_TO_SINT for
663  // uints too. The DAGLegalizer code for uint considers some extra cases
664  // which are not necessary here.
666  case ISD::FP_TO_SINT: {
667  if (N->getValueType(0) == MVT::i1) {
668  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
669  return;
670  }
671 
672  SDValue Result;
673  if (expandFP_TO_SINT(N, Result, DAG))
674  Results.push_back(Result);
675  return;
676  }
677  case ISD::SDIVREM: {
678  SDValue Op = SDValue(N, 1);
679  SDValue RES = LowerSDIVREM(Op, DAG);
680  Results.push_back(RES);
681  Results.push_back(RES.getValue(1));
682  break;
683  }
684  case ISD::UDIVREM: {
685  SDValue Op = SDValue(N, 0);
686  LowerUDIVREM64(Op, DAG, Results);
687  break;
688  }
689  }
690 }
691 
692 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
693  SDValue Vector) const {
694  SDLoc DL(Vector);
695  EVT VecVT = Vector.getValueType();
696  EVT EltVT = VecVT.getVectorElementType();
698 
699  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
700  Args.push_back(DAG.getNode(
701  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
702  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
703  }
704 
705  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
706 }
707 
708 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
709  SelectionDAG &DAG) const {
710  SDLoc DL(Op);
711  SDValue Vector = Op.getOperand(0);
712  SDValue Index = Op.getOperand(1);
713 
714  if (isa<ConstantSDNode>(Index) ||
716  return Op;
717 
718  Vector = vectorToVerticalVector(DAG, Vector);
719  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
720  Vector, Index);
721 }
722 
723 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
724  SelectionDAG &DAG) const {
725  SDLoc DL(Op);
726  SDValue Vector = Op.getOperand(0);
727  SDValue Value = Op.getOperand(1);
728  SDValue Index = Op.getOperand(2);
729 
730  if (isa<ConstantSDNode>(Index) ||
732  return Op;
733 
734  Vector = vectorToVerticalVector(DAG, Vector);
735  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
736  Vector, Value, Index);
737  return vectorToVerticalVector(DAG, Insert);
738 }
739 
740 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
741  SDValue Op,
742  SelectionDAG &DAG) const {
743  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
745  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
746 
747  const DataLayout &DL = DAG.getDataLayout();
748  const GlobalValue *GV = GSD->getGlobal();
749  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
750 
751  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
752  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
753 }
754 
755 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
756  // On hw >= R700, COS/SIN input must be between -1. and 1.
757  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
758  EVT VT = Op.getValueType();
759  SDValue Arg = Op.getOperand(0);
760  SDLoc DL(Op);
761 
762  // TODO: Should this propagate fast-math-flags?
763  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
764  DAG.getNode(ISD::FADD, DL, VT,
765  DAG.getNode(ISD::FMUL, DL, VT, Arg,
766  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
767  DAG.getConstantFP(0.5, DL, MVT::f32)));
768  unsigned TrigNode;
769  switch (Op.getOpcode()) {
770  case ISD::FCOS:
771  TrigNode = AMDGPUISD::COS_HW;
772  break;
773  case ISD::FSIN:
774  TrigNode = AMDGPUISD::SIN_HW;
775  break;
776  default:
777  llvm_unreachable("Wrong trig opcode");
778  }
779  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
780  DAG.getNode(ISD::FADD, DL, VT, FractPart,
781  DAG.getConstantFP(-0.5, DL, MVT::f32)));
782  if (Gen >= AMDGPUSubtarget::R700)
783  return TrigVal;
784  // On R600 hw, COS/SIN input must be between -Pi and Pi.
785  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
786  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
787 }
788 
789 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
790  SDLoc DL(Op);
791  EVT VT = Op.getValueType();
792 
793  SDValue Lo = Op.getOperand(0);
794  SDValue Hi = Op.getOperand(1);
795  SDValue Shift = Op.getOperand(2);
796  SDValue Zero = DAG.getConstant(0, DL, VT);
797  SDValue One = DAG.getConstant(1, DL, VT);
798 
799  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
800  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
801  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
802  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
803 
804  // The dance around Width1 is necessary for 0 special case.
805  // Without it the CompShift might be 32, producing incorrect results in
806  // Overflow. So we do the shift in two steps, the alternative is to
807  // add a conditional to filter the special case.
808 
809  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
810  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
811 
812  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
813  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
814  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
815 
816  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
817  SDValue LoBig = Zero;
818 
819  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
820  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
821 
822  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
823 }
824 
825 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
826  SDLoc DL(Op);
827  EVT VT = Op.getValueType();
828 
829  SDValue Lo = Op.getOperand(0);
830  SDValue Hi = Op.getOperand(1);
831  SDValue Shift = Op.getOperand(2);
832  SDValue Zero = DAG.getConstant(0, DL, VT);
833  SDValue One = DAG.getConstant(1, DL, VT);
834 
835  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
836 
837  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
838  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
839  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
840  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
841 
842  // The dance around Width1 is necessary for 0 special case.
843  // Without it the CompShift might be 32, producing incorrect results in
844  // Overflow. So we do the shift in two steps, the alternative is to
845  // add a conditional to filter the special case.
846 
847  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
848  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
849 
850  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
851  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
852  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
853 
854  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
855  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
856 
857  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
858  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
859 
860  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
861 }
862 
863 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
864  unsigned mainop, unsigned ovf) const {
865  SDLoc DL(Op);
866  EVT VT = Op.getValueType();
867 
868  SDValue Lo = Op.getOperand(0);
869  SDValue Hi = Op.getOperand(1);
870 
871  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
872  // Extend sign.
873  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
874  DAG.getValueType(MVT::i1));
875 
876  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
877 
878  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
879 }
880 
881 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
882  SDLoc DL(Op);
883  return DAG.getNode(
884  ISD::SETCC,
885  DL,
886  MVT::i1,
887  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
888  DAG.getCondCode(ISD::SETEQ));
889 }
890 
891 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
892  SDLoc DL(Op);
893  return DAG.getNode(
894  ISD::SETCC,
895  DL,
896  MVT::i1,
897  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
898  DAG.getCondCode(ISD::SETEQ));
899 }
900 
901 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
902  const SDLoc &DL,
903  unsigned DwordOffset) const {
904  unsigned ByteOffset = DwordOffset * 4;
905  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
907 
908  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
909  assert(isInt<16>(ByteOffset));
910 
911  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
912  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
914 }
915 
916 bool R600TargetLowering::isZero(SDValue Op) const {
917  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
918  return Cst->isNullValue();
919  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
920  return CstFP->isZero();
921  } else {
922  return false;
923  }
924 }
925 
926 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
927  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
928  return CFP->isExactlyValue(1.0);
929  }
930  return isAllOnesConstant(Op);
931 }
932 
933 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
934  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
935  return CFP->getValueAPF().isZero();
936  }
937  return isNullConstant(Op);
938 }
939 
940 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
941  SDLoc DL(Op);
942  EVT VT = Op.getValueType();
943 
944  SDValue LHS = Op.getOperand(0);
945  SDValue RHS = Op.getOperand(1);
946  SDValue True = Op.getOperand(2);
947  SDValue False = Op.getOperand(3);
948  SDValue CC = Op.getOperand(4);
949  SDValue Temp;
950 
951  if (VT == MVT::f32) {
952  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
953  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
954  if (MinMax)
955  return MinMax;
956  }
957 
958  // LHS and RHS are guaranteed to be the same value type
959  EVT CompareVT = LHS.getValueType();
960 
961  // Check if we can lower this to a native operation.
962 
963  // Try to lower to a SET* instruction:
964  //
965  // SET* can match the following patterns:
966  //
967  // select_cc f32, f32, -1, 0, cc_supported
968  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
969  // select_cc i32, i32, -1, 0, cc_supported
970  //
971 
972  // Move hardware True/False values to the correct operand.
973  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
974  ISD::CondCode InverseCC =
975  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
976  if (isHWTrueValue(False) && isHWFalseValue(True)) {
977  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
978  std::swap(False, True);
979  CC = DAG.getCondCode(InverseCC);
980  } else {
981  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
982  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
983  std::swap(False, True);
984  std::swap(LHS, RHS);
985  CC = DAG.getCondCode(SwapInvCC);
986  }
987  }
988  }
989 
990  if (isHWTrueValue(True) && isHWFalseValue(False) &&
991  (CompareVT == VT || VT == MVT::i32)) {
992  // This can be matched by a SET* instruction.
993  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
994  }
995 
996  // Try to lower to a CND* instruction:
997  //
998  // CND* can match the following patterns:
999  //
1000  // select_cc f32, 0.0, f32, f32, cc_supported
1001  // select_cc f32, 0.0, i32, i32, cc_supported
1002  // select_cc i32, 0, f32, f32, cc_supported
1003  // select_cc i32, 0, i32, i32, cc_supported
1004  //
1005 
1006  // Try to move the zero value to the RHS
1007  if (isZero(LHS)) {
1008  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1009  // Try swapping the operands
1010  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1011  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1012  std::swap(LHS, RHS);
1013  CC = DAG.getCondCode(CCSwapped);
1014  } else {
1015  // Try inverting the conditon and then swapping the operands
1016  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1017  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1018  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1019  std::swap(True, False);
1020  std::swap(LHS, RHS);
1021  CC = DAG.getCondCode(CCSwapped);
1022  }
1023  }
1024  }
1025  if (isZero(RHS)) {
1026  SDValue Cond = LHS;
1027  SDValue Zero = RHS;
1028  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1029  if (CompareVT != VT) {
1030  // Bitcast True / False to the correct types. This will end up being
1031  // a nop, but it allows us to define only a single pattern in the
1032  // .TD files for each CND* instruction rather than having to have
1033  // one pattern for integer True/False and one for fp True/False
1034  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1035  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1036  }
1037 
1038  switch (CCOpcode) {
1039  case ISD::SETONE:
1040  case ISD::SETUNE:
1041  case ISD::SETNE:
1042  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1043  Temp = True;
1044  True = False;
1045  False = Temp;
1046  break;
1047  default:
1048  break;
1049  }
1050  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1051  Cond, Zero,
1052  True, False,
1053  DAG.getCondCode(CCOpcode));
1054  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1055  }
1056 
1057  // If we make it this for it means we have no native instructions to handle
1058  // this SELECT_CC, so we must lower it.
1059  SDValue HWTrue, HWFalse;
1060 
1061  if (CompareVT == MVT::f32) {
1062  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1063  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1064  } else if (CompareVT == MVT::i32) {
1065  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1066  HWFalse = DAG.getConstant(0, DL, CompareVT);
1067  }
1068  else {
1069  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1070  }
1071 
1072  // Lower this unsupported SELECT_CC into a combination of two supported
1073  // SELECT_CC operations.
1074  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1075 
1076  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1077  Cond, HWFalse,
1078  True, False,
1079  DAG.getCondCode(ISD::SETNE));
1080 }
1081 
1082 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1083 /// convert these pointers to a register index. Each register holds
1084 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1085 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1086 /// for indirect addressing.
1087 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1088  unsigned StackWidth,
1089  SelectionDAG &DAG) const {
1090  unsigned SRLPad;
1091  switch(StackWidth) {
1092  case 1:
1093  SRLPad = 2;
1094  break;
1095  case 2:
1096  SRLPad = 3;
1097  break;
1098  case 4:
1099  SRLPad = 4;
1100  break;
1101  default: llvm_unreachable("Invalid stack width");
1102  }
1103 
1104  SDLoc DL(Ptr);
1105  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1106  DAG.getConstant(SRLPad, DL, MVT::i32));
1107 }
1108 
1109 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1110  unsigned ElemIdx,
1111  unsigned &Channel,
1112  unsigned &PtrIncr) const {
1113  switch (StackWidth) {
1114  default:
1115  case 1:
1116  Channel = 0;
1117  if (ElemIdx > 0) {
1118  PtrIncr = 1;
1119  } else {
1120  PtrIncr = 0;
1121  }
1122  break;
1123  case 2:
1124  Channel = ElemIdx % 2;
1125  if (ElemIdx == 2) {
1126  PtrIncr = 1;
1127  } else {
1128  PtrIncr = 0;
1129  }
1130  break;
1131  case 4:
1132  Channel = ElemIdx;
1133  PtrIncr = 0;
1134  break;
1135  }
1136 }
1137 
1138 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1139  SelectionDAG &DAG) const {
1140  SDLoc DL(Store);
1141  //TODO: Who creates the i8 stores?
1142  assert(Store->isTruncatingStore()
1143  || Store->getValue().getValueType() == MVT::i8);
1145 
1146  SDValue Mask;
1147  if (Store->getMemoryVT() == MVT::i8) {
1148  assert(Store->getAlignment() >= 1);
1149  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1150  } else if (Store->getMemoryVT() == MVT::i16) {
1151  assert(Store->getAlignment() >= 2);
1152  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1153  } else {
1154  llvm_unreachable("Unsupported private trunc store");
1155  }
1156 
1157  SDValue OldChain = Store->getChain();
1158  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1159  // Skip dummy
1160  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1161  SDValue BasePtr = Store->getBasePtr();
1162  SDValue Offset = Store->getOffset();
1163  EVT MemVT = Store->getMemoryVT();
1164 
1165  SDValue LoadPtr = BasePtr;
1166  if (!Offset.isUndef()) {
1167  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1168  }
1169 
1170  // Get dword location
1171  // TODO: this should be eliminated by the future SHR ptr, 2
1172  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1173  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1174 
1175  // Load dword
1176  // TODO: can we be smarter about machine pointer info?
1179  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1180 
1181  Chain = Dst.getValue(1);
1182 
1183  // Get offset in dword
1184  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1185  DAG.getConstant(0x3, DL, MVT::i32));
1186 
1187  // Convert byte offset to bit shift
1188  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1189  DAG.getConstant(3, DL, MVT::i32));
1190 
1191  // TODO: Contrary to the name of the functiom,
1192  // it also handles sub i32 non-truncating stores (like i1)
1193  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1194  Store->getValue());
1195 
1196  // Mask the value to the right type
1197  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1198 
1199  // Shift the value in place
1200  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1201  MaskedValue, ShiftAmt);
1202 
1203  // Shift the mask in place
1204  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1205 
1206  // Invert the mask. NOTE: if we had native ROL instructions we could
1207  // use inverted mask
1208  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1209 
1210  // Cleanup the target bits
1211  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1212 
1213  // Add the new bits
1214  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1215 
1216  // Store dword
1217  // TODO: Can we be smarter about MachinePointerInfo?
1218  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1219 
1220  // If we are part of expanded vector, make our neighbors depend on this store
1221  if (VectorTrunc) {
1222  // Make all other vector elements depend on this store
1223  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1224  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1225  }
1226  return NewStore;
1227 }
1228 
1229 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1230  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1231  unsigned AS = StoreNode->getAddressSpace();
1232 
1233  SDValue Chain = StoreNode->getChain();
1234  SDValue Ptr = StoreNode->getBasePtr();
1235  SDValue Value = StoreNode->getValue();
1236 
1237  EVT VT = Value.getValueType();
1238  EVT MemVT = StoreNode->getMemoryVT();
1239  EVT PtrVT = Ptr.getValueType();
1240 
1241  SDLoc DL(Op);
1242 
1243  // Neither LOCAL nor PRIVATE can do vectors at the moment
1244  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1245  VT.isVector()) {
1246  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1247  StoreNode->isTruncatingStore()) {
1248  // Add an extra level of chain to isolate this vector
1249  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1250  // TODO: can the chain be replaced without creating a new store?
1251  SDValue NewStore = DAG.getTruncStore(
1252  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1253  MemVT, StoreNode->getAlignment(),
1254  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1255  StoreNode = cast<StoreSDNode>(NewStore);
1256  }
1257 
1258  return scalarizeVectorStore(StoreNode, DAG);
1259  }
1260 
1261  unsigned Align = StoreNode->getAlignment();
1262  if (Align < MemVT.getStoreSize() &&
1263  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1264  return expandUnalignedStore(StoreNode, DAG);
1265  }
1266 
1267  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1268  DAG.getConstant(2, DL, PtrVT));
1269 
1270  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1271  // It is beneficial to create MSKOR here instead of combiner to avoid
1272  // artificial dependencies introduced by RMW
1273  if (StoreNode->isTruncatingStore()) {
1274  assert(VT.bitsLE(MVT::i32));
1275  SDValue MaskConstant;
1276  if (MemVT == MVT::i8) {
1277  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1278  } else {
1279  assert(MemVT == MVT::i16);
1280  assert(StoreNode->getAlignment() >= 2);
1281  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1282  }
1283 
1284  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1285  DAG.getConstant(0x00000003, DL, PtrVT));
1286  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1287  DAG.getConstant(3, DL, VT));
1288 
1289  // Put the mask in correct place
1290  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1291 
1292  // Put the value bits in correct place
1293  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1294  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1295 
1296  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1297  // vector instead.
1298  SDValue Src[4] = {
1299  ShiftedValue,
1300  DAG.getConstant(0, DL, MVT::i32),
1301  DAG.getConstant(0, DL, MVT::i32),
1302  Mask
1303  };
1304  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1305  SDValue Args[3] = { Chain, Input, DWordAddr };
1307  Op->getVTList(), Args, MemVT,
1308  StoreNode->getMemOperand());
1309  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1310  // Convert pointer from byte address to dword address.
1311  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1312 
1313  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1314  llvm_unreachable("Truncated and indexed stores not supported yet");
1315  } else {
1316  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1317  }
1318  return Chain;
1319  }
1320  }
1321 
1322  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1323  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1324  return SDValue();
1325 
1326  if (MemVT.bitsLT(MVT::i32))
1327  return lowerPrivateTruncStore(StoreNode, DAG);
1328 
1329  // Standard i32+ store, tag it with DWORDADDR to note that the address
1330  // has been shifted
1331  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1332  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1333  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1334  }
1335 
1336  // Tagged i32+ stores will be matched by patterns
1337  return SDValue();
1338 }
1339 
1340 // return (512 + (kc_bank << 12)
1341 static int
1343  switch (AddressSpace) {
1345  return 512;
1347  return 512 + 4096;
1349  return 512 + 4096 * 2;
1351  return 512 + 4096 * 3;
1353  return 512 + 4096 * 4;
1355  return 512 + 4096 * 5;
1357  return 512 + 4096 * 6;
1359  return 512 + 4096 * 7;
1361  return 512 + 4096 * 8;
1363  return 512 + 4096 * 9;
1365  return 512 + 4096 * 10;
1367  return 512 + 4096 * 11;
1369  return 512 + 4096 * 12;
1371  return 512 + 4096 * 13;
1373  return 512 + 4096 * 14;
1375  return 512 + 4096 * 15;
1376  default:
1377  return -1;
1378  }
1379 }
1380 
1381 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1382  SelectionDAG &DAG) const {
1383  SDLoc DL(Op);
1384  LoadSDNode *Load = cast<LoadSDNode>(Op);
1386  EVT MemVT = Load->getMemoryVT();
1387  assert(Load->getAlignment() >= MemVT.getStoreSize());
1388 
1389  SDValue BasePtr = Load->getBasePtr();
1390  SDValue Chain = Load->getChain();
1391  SDValue Offset = Load->getOffset();
1392 
1393  SDValue LoadPtr = BasePtr;
1394  if (!Offset.isUndef()) {
1395  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1396  }
1397 
1398  // Get dword location
1399  // NOTE: this should be eliminated by the future SHR ptr, 2
1400  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1401  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1402 
1403  // Load dword
1404  // TODO: can we be smarter about machine pointer info?
1407  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1408 
1409  // Get offset within the register.
1410  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1411  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1412 
1413  // Bit offset of target byte (byteIdx * 8).
1414  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1415  DAG.getConstant(3, DL, MVT::i32));
1416 
1417  // Shift to the right.
1418  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1419 
1420  // Eliminate the upper bits by setting them to ...
1421  EVT MemEltVT = MemVT.getScalarType();
1422 
1423  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1424  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1425  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1426  } else { // ... or zeros.
1427  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1428  }
1429 
1430  SDValue Ops[] = {
1431  Ret,
1432  Read.getValue(1) // This should be our output chain
1433  };
1434 
1435  return DAG.getMergeValues(Ops, DL);
1436 }
1437 
1438 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1439  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1440  unsigned AS = LoadNode->getAddressSpace();
1441  EVT MemVT = LoadNode->getMemoryVT();
1443 
1444  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1445  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1446  return lowerPrivateExtLoad(Op, DAG);
1447  }
1448 
1449  SDLoc DL(Op);
1450  EVT VT = Op.getValueType();
1451  SDValue Chain = LoadNode->getChain();
1452  SDValue Ptr = LoadNode->getBasePtr();
1453 
1454  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1455  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1456  VT.isVector()) {
1457  return scalarizeVectorLoad(LoadNode, DAG);
1458  }
1459 
1460  // This is still used for explicit load from addrspace(8)
1461  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1462  if (ConstantBlock > -1 &&
1463  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1464  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1465  SDValue Result;
1466  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1467  isa<ConstantSDNode>(Ptr)) {
1468  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1469  } else {
1470  //TODO: Does this even work?
1471  // non-constant ptr can't be folded, keeps it as a v4f32 load
1472  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1473  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1474  DAG.getConstant(4, DL, MVT::i32)),
1475  DAG.getConstant(LoadNode->getAddressSpace() -
1477  );
1478  }
1479 
1480  if (!VT.isVector()) {
1481  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1482  DAG.getConstant(0, DL, MVT::i32));
1483  }
1484 
1485  SDValue MergedValues[2] = {
1486  Result,
1487  Chain
1488  };
1489  return DAG.getMergeValues(MergedValues, DL);
1490  }
1491 
1492  // For most operations returning SDValue() will result in the node being
1493  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1494  // need to manually expand loads that may be legal in some address spaces and
1495  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1496  // compute shaders, since the data is sign extended when it is uploaded to the
1497  // buffer. However SEXT loads from other address spaces are not supported, so
1498  // we need to expand them here.
1499  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1500  EVT MemVT = LoadNode->getMemoryVT();
1501  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1502  SDValue NewLoad = DAG.getExtLoad(
1503  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1504  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1505  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1506  DAG.getValueType(MemVT));
1507 
1508  SDValue MergedValues[2] = { Res, Chain };
1509  return DAG.getMergeValues(MergedValues, DL);
1510  }
1511 
1512  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1513  return SDValue();
1514  }
1515 
1516  // DWORDADDR ISD marks already shifted address
1517  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1518  assert(VT == MVT::i32);
1519  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1520  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1521  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1522  }
1523  return SDValue();
1524 }
1525 
1526 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1527  SDValue Chain = Op.getOperand(0);
1528  SDValue Cond = Op.getOperand(1);
1529  SDValue Jump = Op.getOperand(2);
1530 
1531  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1532  Chain, Jump, Cond);
1533 }
1534 
1535 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1536  SelectionDAG &DAG) const {
1537  MachineFunction &MF = DAG.getMachineFunction();
1538  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1539 
1540  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1541 
1542  unsigned FrameIndex = FIN->getIndex();
1543  unsigned IgnoredFrameReg;
1544  unsigned Offset =
1545  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1546  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1547  Op.getValueType());
1548 }
1549 
1551  bool IsVarArg) const {
1552  switch (CC) {
1555  case CallingConv::C:
1556  case CallingConv::Fast:
1557  case CallingConv::Cold:
1558  llvm_unreachable("kernels should not be handled here");
1566  return CC_R600;
1567  default:
1568  report_fatal_error("Unsupported calling convention.");
1569  }
1570 }
1571 
1572 /// XXX Only kernel functions are supported, so we can assume for now that
1573 /// every function is a kernel function, but in the future we should use
1574 /// separate calling conventions for kernel and non-kernel functions.
1576  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1577  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1578  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1580  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1581  *DAG.getContext());
1582  MachineFunction &MF = DAG.getMachineFunction();
1584 
1585  if (AMDGPU::isShader(CallConv)) {
1586  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1587  } else {
1588  analyzeFormalArgumentsCompute(CCInfo, Ins);
1589  }
1590 
1591  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1592  CCValAssign &VA = ArgLocs[i];
1593  const ISD::InputArg &In = Ins[i];
1594  EVT VT = In.VT;
1595  EVT MemVT = VA.getLocVT();
1596  if (!VT.isVector() && MemVT.isVector()) {
1597  // Get load source type if scalarized.
1598  MemVT = MemVT.getVectorElementType();
1599  }
1600 
1601  if (AMDGPU::isShader(CallConv)) {
1602  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1603  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1604  InVals.push_back(Register);
1605  continue;
1606  }
1607 
1610 
1611  // i64 isn't a legal type, so the register type used ends up as i32, which
1612  // isn't expected here. It attempts to create this sextload, but it ends up
1613  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1614  // for <1 x i64>.
1615 
1616  // The first 36 bytes of the input buffer contains information about
1617  // thread group and global sizes.
1619  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1620  // FIXME: This should really check the extload type, but the handling of
1621  // extload vector parameters seems to be broken.
1622 
1623  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1624  Ext = ISD::SEXTLOAD;
1625  }
1626 
1627  // Compute the offset from the value.
1628  // XXX - I think PartOffset should give you this, but it seems to give the
1629  // size of the register which isn't useful.
1630 
1631  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1632  unsigned PartOffset = VA.getLocMemOffset();
1633  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1634 
1635  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1636  SDValue Arg = DAG.getLoad(
1637  ISD::UNINDEXED, Ext, VT, DL, Chain,
1638  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1639  PtrInfo,
1640  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1643 
1644  InVals.push_back(Arg);
1645  }
1646  return Chain;
1647 }
1648 
1650  EVT VT) const {
1651  if (!VT.isVector())
1652  return MVT::i32;
1654 }
1655 
1657  const SelectionDAG &DAG) const {
1658  // Local and Private addresses do not handle vectors. Limit to i32
1659  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1660  return (MemVT.getSizeInBits() <= 32);
1661  }
1662  return true;
1663 }
1664 
1666  unsigned AddrSpace,
1667  unsigned Align,
1668  bool *IsFast) const {
1669  if (IsFast)
1670  *IsFast = false;
1671 
1672  if (!VT.isSimple() || VT == MVT::Other)
1673  return false;
1674 
1675  if (VT.bitsLT(MVT::i32))
1676  return false;
1677 
1678  // TODO: This is a rough estimate.
1679  if (IsFast)
1680  *IsFast = true;
1681 
1682  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1683 }
1684 
1686  SelectionDAG &DAG, SDValue VectorEntry,
1687  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1688  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1689  assert(RemapSwizzle.empty());
1690  SDValue NewBldVec[4] = {
1691  VectorEntry.getOperand(0),
1692  VectorEntry.getOperand(1),
1693  VectorEntry.getOperand(2),
1694  VectorEntry.getOperand(3)
1695  };
1696 
1697  for (unsigned i = 0; i < 4; i++) {
1698  if (NewBldVec[i].isUndef())
1699  // We mask write here to teach later passes that the ith element of this
1700  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1701  // break false dependencies and additionnaly make assembly easier to read.
1702  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1703  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1704  if (C->isZero()) {
1705  RemapSwizzle[i] = 4; // SEL_0
1706  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1707  } else if (C->isExactlyValue(1.0)) {
1708  RemapSwizzle[i] = 5; // SEL_1
1709  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1710  }
1711  }
1712 
1713  if (NewBldVec[i].isUndef())
1714  continue;
1715  for (unsigned j = 0; j < i; j++) {
1716  if (NewBldVec[i] == NewBldVec[j]) {
1717  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1718  RemapSwizzle[i] = j;
1719  break;
1720  }
1721  }
1722  }
1723 
1724  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1725  NewBldVec);
1726 }
1727 
1729  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1730  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1731  assert(RemapSwizzle.empty());
1732  SDValue NewBldVec[4] = {
1733  VectorEntry.getOperand(0),
1734  VectorEntry.getOperand(1),
1735  VectorEntry.getOperand(2),
1736  VectorEntry.getOperand(3)
1737  };
1738  bool isUnmovable[4] = { false, false, false, false };
1739  for (unsigned i = 0; i < 4; i++) {
1740  RemapSwizzle[i] = i;
1741  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1742  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1743  ->getZExtValue();
1744  if (i == Idx)
1745  isUnmovable[Idx] = true;
1746  }
1747  }
1748 
1749  for (unsigned i = 0; i < 4; i++) {
1750  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1751  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1752  ->getZExtValue();
1753  if (isUnmovable[Idx])
1754  continue;
1755  // Swap i and Idx
1756  std::swap(NewBldVec[Idx], NewBldVec[i]);
1757  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1758  break;
1759  }
1760  }
1761 
1762  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1763  NewBldVec);
1764 }
1765 
1766 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1767  SelectionDAG &DAG,
1768  const SDLoc &DL) const {
1769  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1770  // Old -> New swizzle values
1771  DenseMap<unsigned, unsigned> SwizzleRemap;
1772 
1773  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1774  for (unsigned i = 0; i < 4; i++) {
1775  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1776  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1777  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1778  }
1779 
1780  SwizzleRemap.clear();
1781  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1782  for (unsigned i = 0; i < 4; i++) {
1783  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1784  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1785  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1786  }
1787 
1788  return BuildVector;
1789 }
1790 
1791 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1792  SelectionDAG &DAG) const {
1793  SDLoc DL(LoadNode);
1794  EVT VT = LoadNode->getValueType(0);
1795  SDValue Chain = LoadNode->getChain();
1796  SDValue Ptr = LoadNode->getBasePtr();
1797  assert (isa<ConstantSDNode>(Ptr));
1798 
1799  //TODO: Support smaller loads
1800  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1801  return SDValue();
1802 
1803  if (LoadNode->getAlignment() < 4)
1804  return SDValue();
1805 
1806  int ConstantBlock = ConstantAddressBlock(Block);
1807 
1808  SDValue Slots[4];
1809  for (unsigned i = 0; i < 4; i++) {
1810  // We want Const position encoded with the following formula :
1811  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1812  // const_index is Ptr computed by llvm using an alignment of 16.
1813  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1814  // then div by 4 at the ISel step
1815  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1816  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1817  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1818  }
1819  EVT NewVT = MVT::v4i32;
1820  unsigned NumElements = 4;
1821  if (VT.isVector()) {
1822  NewVT = VT;
1823  NumElements = VT.getVectorNumElements();
1824  }
1825  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1826  if (!VT.isVector()) {
1827  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1828  DAG.getConstant(0, DL, MVT::i32));
1829  }
1830  SDValue MergedValues[2] = {
1831  Result,
1832  Chain
1833  };
1834  return DAG.getMergeValues(MergedValues, DL);
1835 }
1836 
1837 //===----------------------------------------------------------------------===//
1838 // Custom DAG Optimizations
1839 //===----------------------------------------------------------------------===//
1840 
1842  DAGCombinerInfo &DCI) const {
1843  SelectionDAG &DAG = DCI.DAG;
1844  SDLoc DL(N);
1845 
1846  switch (N->getOpcode()) {
1847  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1848  case ISD::FP_ROUND: {
1849  SDValue Arg = N->getOperand(0);
1850  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1851  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1852  Arg.getOperand(0));
1853  }
1854  break;
1855  }
1856 
1857  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1858  // (i32 select_cc f32, f32, -1, 0 cc)
1859  //
1860  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1861  // this to one of the SET*_DX10 instructions.
1862  case ISD::FP_TO_SINT: {
1863  SDValue FNeg = N->getOperand(0);
1864  if (FNeg.getOpcode() != ISD::FNEG) {
1865  return SDValue();
1866  }
1867  SDValue SelectCC = FNeg.getOperand(0);
1868  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1869  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1870  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1871  !isHWTrueValue(SelectCC.getOperand(2)) ||
1872  !isHWFalseValue(SelectCC.getOperand(3))) {
1873  return SDValue();
1874  }
1875 
1876  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1877  SelectCC.getOperand(0), // LHS
1878  SelectCC.getOperand(1), // RHS
1879  DAG.getConstant(-1, DL, MVT::i32), // True
1880  DAG.getConstant(0, DL, MVT::i32), // False
1881  SelectCC.getOperand(4)); // CC
1882 
1883  break;
1884  }
1885 
1886  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1887  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1888  case ISD::INSERT_VECTOR_ELT: {
1889  SDValue InVec = N->getOperand(0);
1890  SDValue InVal = N->getOperand(1);
1891  SDValue EltNo = N->getOperand(2);
1892 
1893  // If the inserted element is an UNDEF, just use the input vector.
1894  if (InVal.isUndef())
1895  return InVec;
1896 
1897  EVT VT = InVec.getValueType();
1898 
1899  // If we can't generate a legal BUILD_VECTOR, exit
1901  return SDValue();
1902 
1903  // Check that we know which element is being inserted
1904  if (!isa<ConstantSDNode>(EltNo))
1905  return SDValue();
1906  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1907 
1908  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1909  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1910  // vector elements.
1912  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1913  Ops.append(InVec.getNode()->op_begin(),
1914  InVec.getNode()->op_end());
1915  } else if (InVec.isUndef()) {
1916  unsigned NElts = VT.getVectorNumElements();
1917  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1918  } else {
1919  return SDValue();
1920  }
1921 
1922  // Insert the element
1923  if (Elt < Ops.size()) {
1924  // All the operands of BUILD_VECTOR must have the same type;
1925  // we enforce that here.
1926  EVT OpVT = Ops[0].getValueType();
1927  if (InVal.getValueType() != OpVT)
1928  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1929  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1930  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1931  Ops[Elt] = InVal;
1932  }
1933 
1934  // Return the new vector
1935  return DAG.getBuildVector(VT, DL, Ops);
1936  }
1937 
1938  // Extract_vec (Build_vector) generated by custom lowering
1939  // also needs to be customly combined
1940  case ISD::EXTRACT_VECTOR_ELT: {
1941  SDValue Arg = N->getOperand(0);
1942  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1943  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1944  unsigned Element = Const->getZExtValue();
1945  return Arg->getOperand(Element);
1946  }
1947  }
1948  if (Arg.getOpcode() == ISD::BITCAST &&
1949  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1952  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1953  unsigned Element = Const->getZExtValue();
1954  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1955  Arg->getOperand(0).getOperand(Element));
1956  }
1957  }
1958  break;
1959  }
1960 
1961  case ISD::SELECT_CC: {
1962  // Try common optimizations
1964  return Ret;
1965 
1966  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1967  // selectcc x, y, a, b, inv(cc)
1968  //
1969  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1970  // selectcc x, y, a, b, cc
1971  SDValue LHS = N->getOperand(0);
1972  if (LHS.getOpcode() != ISD::SELECT_CC) {
1973  return SDValue();
1974  }
1975 
1976  SDValue RHS = N->getOperand(1);
1977  SDValue True = N->getOperand(2);
1978  SDValue False = N->getOperand(3);
1979  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1980 
1981  if (LHS.getOperand(2).getNode() != True.getNode() ||
1982  LHS.getOperand(3).getNode() != False.getNode() ||
1983  RHS.getNode() != False.getNode()) {
1984  return SDValue();
1985  }
1986 
1987  switch (NCC) {
1988  default: return SDValue();
1989  case ISD::SETNE: return LHS;
1990  case ISD::SETEQ: {
1991  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1992  LHSCC = ISD::getSetCCInverse(LHSCC,
1993  LHS.getOperand(0).getValueType().isInteger());
1994  if (DCI.isBeforeLegalizeOps() ||
1996  return DAG.getSelectCC(DL,
1997  LHS.getOperand(0),
1998  LHS.getOperand(1),
1999  LHS.getOperand(2),
2000  LHS.getOperand(3),
2001  LHSCC);
2002  break;
2003  }
2004  }
2005  return SDValue();
2006  }
2007 
2008  case AMDGPUISD::R600_EXPORT: {
2009  SDValue Arg = N->getOperand(1);
2010  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2011  break;
2012 
2013  SDValue NewArgs[8] = {
2014  N->getOperand(0), // Chain
2015  SDValue(),
2016  N->getOperand(2), // ArrayBase
2017  N->getOperand(3), // Type
2018  N->getOperand(4), // SWZ_X
2019  N->getOperand(5), // SWZ_Y
2020  N->getOperand(6), // SWZ_Z
2021  N->getOperand(7) // SWZ_W
2022  };
2023  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2024  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
2025  }
2026  case AMDGPUISD::TEXTURE_FETCH: {
2027  SDValue Arg = N->getOperand(1);
2028  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2029  break;
2030 
2031  SDValue NewArgs[19] = {
2032  N->getOperand(0),
2033  N->getOperand(1),
2034  N->getOperand(2),
2035  N->getOperand(3),
2036  N->getOperand(4),
2037  N->getOperand(5),
2038  N->getOperand(6),
2039  N->getOperand(7),
2040  N->getOperand(8),
2041  N->getOperand(9),
2042  N->getOperand(10),
2043  N->getOperand(11),
2044  N->getOperand(12),
2045  N->getOperand(13),
2046  N->getOperand(14),
2047  N->getOperand(15),
2048  N->getOperand(16),
2049  N->getOperand(17),
2050  N->getOperand(18),
2051  };
2052  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2053  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2054  }
2055 
2056  case ISD::LOAD: {
2057  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
2058  SDValue Ptr = LoadNode->getBasePtr();
2059  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
2060  isa<ConstantSDNode>(Ptr))
2061  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
2062  break;
2063  }
2064 
2065  default: break;
2066  }
2067 
2069 }
2070 
2071 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2072  SDValue &Src, SDValue &Neg, SDValue &Abs,
2073  SDValue &Sel, SDValue &Imm,
2074  SelectionDAG &DAG) const {
2075  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2076  if (!Src.isMachineOpcode())
2077  return false;
2078 
2079  switch (Src.getMachineOpcode()) {
2080  case R600::FNEG_R600:
2081  if (!Neg.getNode())
2082  return false;
2083  Src = Src.getOperand(0);
2084  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2085  return true;
2086  case R600::FABS_R600:
2087  if (!Abs.getNode())
2088  return false;
2089  Src = Src.getOperand(0);
2090  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2091  return true;
2092  case R600::CONST_COPY: {
2093  unsigned Opcode = ParentNode->getMachineOpcode();
2094  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2095 
2096  if (!Sel.getNode())
2097  return false;
2098 
2099  SDValue CstOffset = Src.getOperand(0);
2100  if (ParentNode->getValueType(0).isVector())
2101  return false;
2102 
2103  // Gather constants values
2104  int SrcIndices[] = {
2105  TII->getOperandIdx(Opcode, R600::OpName::src0),
2106  TII->getOperandIdx(Opcode, R600::OpName::src1),
2107  TII->getOperandIdx(Opcode, R600::OpName::src2),
2108  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2109  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2110  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2111  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2112  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2113  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2114  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2115  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2116  };
2117  std::vector<unsigned> Consts;
2118  for (int OtherSrcIdx : SrcIndices) {
2119  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2120  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2121  continue;
2122  if (HasDst) {
2123  OtherSrcIdx--;
2124  OtherSelIdx--;
2125  }
2126  if (RegisterSDNode *Reg =
2127  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2128  if (Reg->getReg() == R600::ALU_CONST) {
2129  ConstantSDNode *Cst
2130  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2131  Consts.push_back(Cst->getZExtValue());
2132  }
2133  }
2134  }
2135 
2136  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2137  Consts.push_back(Cst->getZExtValue());
2138  if (!TII->fitsConstReadLimitations(Consts)) {
2139  return false;
2140  }
2141 
2142  Sel = CstOffset;
2143  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2144  return true;
2145  }
2146  case R600::MOV_IMM_GLOBAL_ADDR:
2147  // Check if the Imm slot is used. Taken from below.
2148  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2149  return false;
2150  Imm = Src.getOperand(0);
2151  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2152  return true;
2153  case R600::MOV_IMM_I32:
2154  case R600::MOV_IMM_F32: {
2155  unsigned ImmReg = R600::ALU_LITERAL_X;
2156  uint64_t ImmValue = 0;
2157 
2158  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2160  float FloatValue = FPC->getValueAPF().convertToFloat();
2161  if (FloatValue == 0.0) {
2162  ImmReg = R600::ZERO;
2163  } else if (FloatValue == 0.5) {
2164  ImmReg = R600::HALF;
2165  } else if (FloatValue == 1.0) {
2166  ImmReg = R600::ONE;
2167  } else {
2168  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2169  }
2170  } else {
2172  uint64_t Value = C->getZExtValue();
2173  if (Value == 0) {
2174  ImmReg = R600::ZERO;
2175  } else if (Value == 1) {
2176  ImmReg = R600::ONE_INT;
2177  } else {
2178  ImmValue = Value;
2179  }
2180  }
2181 
2182  // Check that we aren't already using an immediate.
2183  // XXX: It's possible for an instruction to have more than one
2184  // immediate operand, but this is not supported yet.
2185  if (ImmReg == R600::ALU_LITERAL_X) {
2186  if (!Imm.getNode())
2187  return false;
2189  assert(C);
2190  if (C->getZExtValue())
2191  return false;
2192  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2193  }
2194  Src = DAG.getRegister(ImmReg, MVT::i32);
2195  return true;
2196  }
2197  default:
2198  return false;
2199  }
2200 }
2201 
2202 /// Fold the instructions after selecting them
2203 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2204  SelectionDAG &DAG) const {
2205  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2206  if (!Node->isMachineOpcode())
2207  return Node;
2208 
2209  unsigned Opcode = Node->getMachineOpcode();
2210  SDValue FakeOp;
2211 
2212  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2213 
2214  if (Opcode == R600::DOT_4) {
2215  int OperandIdx[] = {
2216  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2217  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2218  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2219  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2220  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2221  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2222  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2223  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2224  };
2225  int NegIdx[] = {
2226  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2227  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2228  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2229  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2230  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2231  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2232  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2233  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2234  };
2235  int AbsIdx[] = {
2236  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2237  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2238  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2239  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2240  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2241  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2242  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2243  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2244  };
2245  for (unsigned i = 0; i < 8; i++) {
2246  if (OperandIdx[i] < 0)
2247  return Node;
2248  SDValue &Src = Ops[OperandIdx[i] - 1];
2249  SDValue &Neg = Ops[NegIdx[i] - 1];
2250  SDValue &Abs = Ops[AbsIdx[i] - 1];
2251  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2252  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2253  if (HasDst)
2254  SelIdx--;
2255  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2256  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2257  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2258  }
2259  } else if (Opcode == R600::REG_SEQUENCE) {
2260  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2261  SDValue &Src = Ops[i];
2262  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2263  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2264  }
2265  } else {
2266  if (!TII->hasInstrModifiers(Opcode))
2267  return Node;
2268  int OperandIdx[] = {
2269  TII->getOperandIdx(Opcode, R600::OpName::src0),
2270  TII->getOperandIdx(Opcode, R600::OpName::src1),
2271  TII->getOperandIdx(Opcode, R600::OpName::src2)
2272  };
2273  int NegIdx[] = {
2274  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2275  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2276  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2277  };
2278  int AbsIdx[] = {
2279  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2280  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2281  -1
2282  };
2283  for (unsigned i = 0; i < 3; i++) {
2284  if (OperandIdx[i] < 0)
2285  return Node;
2286  SDValue &Src = Ops[OperandIdx[i] - 1];
2287  SDValue &Neg = Ops[NegIdx[i] - 1];
2288  SDValue FakeAbs;
2289  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2290  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2291  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2292  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2293  if (HasDst) {
2294  SelIdx--;
2295  ImmIdx--;
2296  }
2297  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2298  SDValue &Imm = Ops[ImmIdx];
2299  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2300  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2301  }
2302  }
2303 
2304  return Node;
2305 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:541
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:508
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
Address space for private memory.
Definition: AMDGPU.h:235
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1557
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Address space for indirect addressible parameter memory (VTX1)
Definition: AMDGPU.h:242
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:610
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
#define LLVM_FALLTHROUGH
Definition: Compiler.h:86
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
unsigned getReg() const
getReg - Returns the register number.
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:221
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:228
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1098
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:230
const SDValue & getChain() const
Function Alias Analysis Results
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:306
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:407
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:208
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:428
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
#define MO_FLAG_ABS
Definition: R600Defines.h:19
Shift and rotation operations.
Definition: ISDOpcodes.h:382
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:412
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:201
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:192
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:409
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:457
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:398
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:216
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:395
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:865
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:919
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:571
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
Class to represent pointers.
Definition: DerivedTypes.h:467
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:494
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:303
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
const R600FrameLowering * getFrameLowering() const override
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:610
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasFP32Denormals() const
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:235
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1378
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
Address space for constant memory (VTX2)
Definition: AMDGPU.h:233
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:260
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:35
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:894
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:308
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:247
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:388
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1392
size_t size() const
Definition: SmallVector.h:53
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getAddressSpace() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
const APFloat & getValueAPF() const
Definition: Constants.h:299
const R600RegisterInfo * getRegisterInfo() const override
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:198
static bool isUndef(ArrayRef< int > Mask)
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:195
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:222
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:847
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:729
Address space for local memory.
Definition: AMDGPU.h:234
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
int getLDSNoRetOp(uint16_t Opcode)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:604
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Promote Memory to Register
Definition: Mem2Reg.cpp:110
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:43
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:941
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:392
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:443
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:288
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:394
bool isLDSRetInstr(unsigned Opcode) const
bool hasBCNT(unsigned Size) const
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:700
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:458
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:574
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:93
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:189
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:977
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:20
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:282
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:415
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
Conversion operators.
Definition: ISDOpcodes.h:437
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:749
unsigned getLocReg() const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:446
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:414
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:554
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:745
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
LLVMContext * getContext() const
Definition: SelectionDAG.h:404
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:137
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:617
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.