LLVM  7.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600FrameLowering.h"
21 #include "R600InstrInfo.h"
23 #include "Utils/AMDGPUBaseInfo.h"
24 #include "llvm/ADT/APFloat.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
54  const R600Subtarget &STI)
55  : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
56  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
57  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
58  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
59  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
60  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
61  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
62 
64 
65  // Legalize loads and stores to the private address space.
69 
70  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
71  // spaces, so it is custom lowered to handle those where it isn't.
72  for (MVT VT : MVT::integer_valuetypes()) {
76 
80 
84  }
85 
86  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
90 
94 
99 
102  // We need to include these since trunc STORES to PRIVATE need
103  // special handling to accommodate RMW
114 
115  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
118 
119  // Set condition code actions
132 
137 
140 
143 
147 
149 
152 
159 
164 
165  // ADD, SUB overflow.
166  // TODO: turn these into Legal?
167  if (Subtarget->hasCARRY())
169 
170  if (Subtarget->hasBORROW())
172 
173  // Expand sign extension of vectors
174  if (!Subtarget->hasBFE())
176 
179 
180  if (!Subtarget->hasBFE())
184 
185  if (!Subtarget->hasBFE())
189 
193 
195 
197 
202 
207 
208  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
209  // to be Legal/Custom in order to avoid library calls.
213 
214  if (!Subtarget->hasFMA()) {
217  }
218 
220 
221  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
222  for (MVT VT : ScalarIntVTs) {
227  }
228 
229  // LLVM will expand these to atomic_cmp_swap(0)
230  // and atomic_swap, respectively.
233 
234  // We need to custom lower some of the intrinsics
237 
239 
246 }
247 
249  return static_cast<const R600Subtarget *>(Subtarget);
250 }
251 
252 static inline bool isEOP(MachineBasicBlock::iterator I) {
253  if (std::next(I) == I->getParent()->end())
254  return false;
255  return std::next(I)->getOpcode() == AMDGPU::RETURN;
256 }
257 
260  MachineBasicBlock *BB) const {
261  MachineFunction *MF = BB->getParent();
265 
266  switch (MI.getOpcode()) {
267  default:
268  // Replace LDS_*_RET instruction that don't have any uses with the
269  // equivalent LDS_*_NORET instruction.
270  if (TII->isLDSRetInstr(MI.getOpcode())) {
271  int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
272  assert(DstIdx != -1);
273  MachineInstrBuilder NewMI;
274  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
275  // LDS_1A2D support and remove this special case.
276  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
277  MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
278  return BB;
279 
280  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
281  TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
282  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
283  NewMI.add(MI.getOperand(i));
284  }
285  } else {
287  }
288  break;
289  case AMDGPU::CLAMP_R600: {
291  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
292  MI.getOperand(1).getReg());
293  TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
294  break;
295  }
296 
297  case AMDGPU::FABS_R600: {
299  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
300  MI.getOperand(1).getReg());
301  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
302  break;
303  }
304 
305  case AMDGPU::FNEG_R600: {
307  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
308  MI.getOperand(1).getReg());
309  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
310  break;
311  }
312 
313  case AMDGPU::MASK_WRITE: {
314  unsigned maskedRegister = MI.getOperand(0).getReg();
316  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
317  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
318  break;
319  }
320 
321  case AMDGPU::MOV_IMM_F32:
322  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
323  .getFPImm()
324  ->getValueAPF()
325  .bitcastToAPInt()
326  .getZExtValue());
327  break;
328 
329  case AMDGPU::MOV_IMM_I32:
330  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
331  MI.getOperand(1).getImm());
332  break;
333 
334  case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
335  //TODO: Perhaps combine this instruction with the next if possible
336  auto MIB = TII->buildDefaultInstruction(
337  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
338  int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
339  //TODO: Ugh this is rather ugly
340  MIB->getOperand(Idx) = MI.getOperand(1);
341  break;
342  }
343 
344  case AMDGPU::CONST_COPY: {
346  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
347  TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
348  MI.getOperand(1).getImm());
349  break;
350  }
351 
352  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
353  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
354  case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
355  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
356  .add(MI.getOperand(0))
357  .add(MI.getOperand(1))
358  .addImm(isEOP(I)); // Set End of program bit
359  break;
360 
361  case AMDGPU::RAT_STORE_TYPED_eg:
362  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
363  .add(MI.getOperand(0))
364  .add(MI.getOperand(1))
365  .add(MI.getOperand(2))
366  .addImm(isEOP(I)); // Set End of program bit
367  break;
368 
369  case AMDGPU::BRANCH:
370  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
371  .add(MI.getOperand(0));
372  break;
373 
374  case AMDGPU::BRANCH_COND_f32: {
375  MachineInstr *NewMI =
376  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
377  AMDGPU::PREDICATE_BIT)
378  .add(MI.getOperand(1))
379  .addImm(AMDGPU::PRED_SETNE)
380  .addImm(0); // Flags
381  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
382  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
383  .add(MI.getOperand(0))
384  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
385  break;
386  }
387 
388  case AMDGPU::BRANCH_COND_i32: {
389  MachineInstr *NewMI =
390  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
391  AMDGPU::PREDICATE_BIT)
392  .add(MI.getOperand(1))
393  .addImm(AMDGPU::PRED_SETNE_INT)
394  .addImm(0); // Flags
395  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
396  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
397  .add(MI.getOperand(0))
398  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
399  break;
400  }
401 
402  case AMDGPU::EG_ExportSwz:
403  case AMDGPU::R600_ExportSwz: {
404  // Instruction is left unmodified if its not the last one of its type
405  bool isLastInstructionOfItsType = true;
406  unsigned InstExportType = MI.getOperand(1).getImm();
407  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
408  EndBlock = BB->end(); NextExportInst != EndBlock;
409  NextExportInst = std::next(NextExportInst)) {
410  if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
411  NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
412  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
413  .getImm();
414  if (CurrentInstExportType == InstExportType) {
415  isLastInstructionOfItsType = false;
416  break;
417  }
418  }
419  }
420  bool EOP = isEOP(I);
421  if (!EOP && !isLastInstructionOfItsType)
422  return BB;
423  unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
424  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
425  .add(MI.getOperand(0))
426  .add(MI.getOperand(1))
427  .add(MI.getOperand(2))
428  .add(MI.getOperand(3))
429  .add(MI.getOperand(4))
430  .add(MI.getOperand(5))
431  .add(MI.getOperand(6))
432  .addImm(CfInst)
433  .addImm(EOP);
434  break;
435  }
436  case AMDGPU::RETURN: {
437  return BB;
438  }
439  }
440 
441  MI.eraseFromParent();
442  return BB;
443 }
444 
445 //===----------------------------------------------------------------------===//
446 // Custom DAG Lowering Operations
447 //===----------------------------------------------------------------------===//
448 
452  switch (Op.getOpcode()) {
453  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
454  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
455  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
456  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
457  case ISD::SRA_PARTS:
458  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
459  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
460  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
461  case ISD::FCOS:
462  case ISD::FSIN: return LowerTrig(Op, DAG);
463  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
464  case ISD::STORE: return LowerSTORE(Op, DAG);
465  case ISD::LOAD: {
466  SDValue Result = LowerLOAD(Op, DAG);
467  assert((!Result.getNode() ||
468  Result.getNode()->getNumValues() == 2) &&
469  "Load should return a value and a chain");
470  return Result;
471  }
472 
473  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
474  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
475  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
476  case ISD::INTRINSIC_VOID: {
477  SDValue Chain = Op.getOperand(0);
478  unsigned IntrinsicID =
479  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
480  switch (IntrinsicID) {
481  case AMDGPUIntrinsic::r600_store_swizzle: {
482  SDLoc DL(Op);
483  const SDValue Args[8] = {
484  Chain,
485  Op.getOperand(2), // Export Value
486  Op.getOperand(3), // ArrayBase
487  Op.getOperand(4), // Type
488  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
489  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
490  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
491  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
492  };
493  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
494  }
495 
496  // default for switch(IntrinsicID)
497  default: break;
498  }
499  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
500  break;
501  }
503  unsigned IntrinsicID =
504  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
505  EVT VT = Op.getValueType();
506  SDLoc DL(Op);
507  switch (IntrinsicID) {
508  case AMDGPUIntrinsic::r600_tex:
509  case AMDGPUIntrinsic::r600_texc: {
510  unsigned TextureOp;
511  switch (IntrinsicID) {
512  case AMDGPUIntrinsic::r600_tex:
513  TextureOp = 0;
514  break;
515  case AMDGPUIntrinsic::r600_texc:
516  TextureOp = 1;
517  break;
518  default:
519  llvm_unreachable("unhandled texture operation");
520  }
521 
522  SDValue TexArgs[19] = {
523  DAG.getConstant(TextureOp, DL, MVT::i32),
524  Op.getOperand(1),
525  DAG.getConstant(0, DL, MVT::i32),
526  DAG.getConstant(1, DL, MVT::i32),
527  DAG.getConstant(2, DL, MVT::i32),
528  DAG.getConstant(3, DL, MVT::i32),
529  Op.getOperand(2),
530  Op.getOperand(3),
531  Op.getOperand(4),
532  DAG.getConstant(0, DL, MVT::i32),
533  DAG.getConstant(1, DL, MVT::i32),
534  DAG.getConstant(2, DL, MVT::i32),
535  DAG.getConstant(3, DL, MVT::i32),
536  Op.getOperand(5),
537  Op.getOperand(6),
538  Op.getOperand(7),
539  Op.getOperand(8),
540  Op.getOperand(9),
541  Op.getOperand(10)
542  };
543  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
544  }
545  case AMDGPUIntrinsic::r600_dot4: {
546  SDValue Args[8] = {
548  DAG.getConstant(0, DL, MVT::i32)),
550  DAG.getConstant(0, DL, MVT::i32)),
552  DAG.getConstant(1, DL, MVT::i32)),
554  DAG.getConstant(1, DL, MVT::i32)),
556  DAG.getConstant(2, DL, MVT::i32)),
558  DAG.getConstant(2, DL, MVT::i32)),
560  DAG.getConstant(3, DL, MVT::i32)),
562  DAG.getConstant(3, DL, MVT::i32))
563  };
564  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
565  }
566 
567  case Intrinsic::r600_implicitarg_ptr: {
570  return DAG.getConstant(ByteOffset, DL, PtrVT);
571  }
572  case Intrinsic::r600_read_ngroups_x:
573  return LowerImplicitParameter(DAG, VT, DL, 0);
574  case Intrinsic::r600_read_ngroups_y:
575  return LowerImplicitParameter(DAG, VT, DL, 1);
576  case Intrinsic::r600_read_ngroups_z:
577  return LowerImplicitParameter(DAG, VT, DL, 2);
578  case Intrinsic::r600_read_global_size_x:
579  return LowerImplicitParameter(DAG, VT, DL, 3);
580  case Intrinsic::r600_read_global_size_y:
581  return LowerImplicitParameter(DAG, VT, DL, 4);
582  case Intrinsic::r600_read_global_size_z:
583  return LowerImplicitParameter(DAG, VT, DL, 5);
584  case Intrinsic::r600_read_local_size_x:
585  return LowerImplicitParameter(DAG, VT, DL, 6);
586  case Intrinsic::r600_read_local_size_y:
587  return LowerImplicitParameter(DAG, VT, DL, 7);
588  case Intrinsic::r600_read_local_size_z:
589  return LowerImplicitParameter(DAG, VT, DL, 8);
590 
591  case Intrinsic::r600_read_tgid_x:
592  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
593  AMDGPU::T1_X, VT);
594  case Intrinsic::r600_read_tgid_y:
595  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
596  AMDGPU::T1_Y, VT);
597  case Intrinsic::r600_read_tgid_z:
598  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
599  AMDGPU::T1_Z, VT);
600  case Intrinsic::r600_read_tidig_x:
601  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
602  AMDGPU::T0_X, VT);
603  case Intrinsic::r600_read_tidig_y:
604  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
605  AMDGPU::T0_Y, VT);
606  case Intrinsic::r600_read_tidig_z:
607  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
608  AMDGPU::T0_Z, VT);
609 
610  case Intrinsic::r600_recipsqrt_ieee:
611  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
612 
613  case Intrinsic::r600_recipsqrt_clamped:
614  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
615  default:
616  return Op;
617  }
618 
619  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
620  break;
621  }
622  } // end switch(Op.getOpcode())
623  return SDValue();
624 }
625 
628  SelectionDAG &DAG) const {
629  switch (N->getOpcode()) {
630  default:
632  return;
633  case ISD::FP_TO_UINT:
634  if (N->getValueType(0) == MVT::i1) {
635  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
636  return;
637  }
638  // Since we don't care about out of bounds values we can use FP_TO_SINT for
639  // uints too. The DAGLegalizer code for uint considers some extra cases
640  // which are not necessary here.
642  case ISD::FP_TO_SINT: {
643  if (N->getValueType(0) == MVT::i1) {
644  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
645  return;
646  }
647 
648  SDValue Result;
649  if (expandFP_TO_SINT(N, Result, DAG))
650  Results.push_back(Result);
651  return;
652  }
653  case ISD::SDIVREM: {
654  SDValue Op = SDValue(N, 1);
655  SDValue RES = LowerSDIVREM(Op, DAG);
656  Results.push_back(RES);
657  Results.push_back(RES.getValue(1));
658  break;
659  }
660  case ISD::UDIVREM: {
661  SDValue Op = SDValue(N, 0);
662  LowerUDIVREM64(Op, DAG, Results);
663  break;
664  }
665  }
666 }
667 
668 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
669  SDValue Vector) const {
670  SDLoc DL(Vector);
671  EVT VecVT = Vector.getValueType();
672  EVT EltVT = VecVT.getVectorElementType();
674 
675  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
676  Args.push_back(DAG.getNode(
677  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
678  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
679  }
680 
681  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
682 }
683 
684 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
685  SelectionDAG &DAG) const {
686  SDLoc DL(Op);
687  SDValue Vector = Op.getOperand(0);
688  SDValue Index = Op.getOperand(1);
689 
690  if (isa<ConstantSDNode>(Index) ||
692  return Op;
693 
694  Vector = vectorToVerticalVector(DAG, Vector);
695  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
696  Vector, Index);
697 }
698 
699 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
700  SelectionDAG &DAG) const {
701  SDLoc DL(Op);
702  SDValue Vector = Op.getOperand(0);
703  SDValue Value = Op.getOperand(1);
704  SDValue Index = Op.getOperand(2);
705 
706  if (isa<ConstantSDNode>(Index) ||
708  return Op;
709 
710  Vector = vectorToVerticalVector(DAG, Vector);
711  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
712  Vector, Value, Index);
713  return vectorToVerticalVector(DAG, Insert);
714 }
715 
716 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
717  SDValue Op,
718  SelectionDAG &DAG) const {
719  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
721  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
722 
723  const DataLayout &DL = DAG.getDataLayout();
724  const GlobalValue *GV = GSD->getGlobal();
725  MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
726 
727  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
728  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
729 }
730 
731 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
732  // On hw >= R700, COS/SIN input must be between -1. and 1.
733  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
734  EVT VT = Op.getValueType();
735  SDValue Arg = Op.getOperand(0);
736  SDLoc DL(Op);
737 
738  // TODO: Should this propagate fast-math-flags?
739  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
740  DAG.getNode(ISD::FADD, DL, VT,
741  DAG.getNode(ISD::FMUL, DL, VT, Arg,
742  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
743  DAG.getConstantFP(0.5, DL, MVT::f32)));
744  unsigned TrigNode;
745  switch (Op.getOpcode()) {
746  case ISD::FCOS:
747  TrigNode = AMDGPUISD::COS_HW;
748  break;
749  case ISD::FSIN:
750  TrigNode = AMDGPUISD::SIN_HW;
751  break;
752  default:
753  llvm_unreachable("Wrong trig opcode");
754  }
755  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
756  DAG.getNode(ISD::FADD, DL, VT, FractPart,
757  DAG.getConstantFP(-0.5, DL, MVT::f32)));
758  if (Gen >= R600Subtarget::R700)
759  return TrigVal;
760  // On R600 hw, COS/SIN input must be between -Pi and Pi.
761  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
762  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
763 }
764 
765 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
766  SDLoc DL(Op);
767  EVT VT = Op.getValueType();
768 
769  SDValue Lo = Op.getOperand(0);
770  SDValue Hi = Op.getOperand(1);
771  SDValue Shift = Op.getOperand(2);
772  SDValue Zero = DAG.getConstant(0, DL, VT);
773  SDValue One = DAG.getConstant(1, DL, VT);
774 
775  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
776  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
777  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
778  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
779 
780  // The dance around Width1 is necessary for 0 special case.
781  // Without it the CompShift might be 32, producing incorrect results in
782  // Overflow. So we do the shift in two steps, the alternative is to
783  // add a conditional to filter the special case.
784 
785  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
786  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
787 
788  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
789  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
790  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
791 
792  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
793  SDValue LoBig = Zero;
794 
795  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
796  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
797 
798  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
799 }
800 
801 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
802  SDLoc DL(Op);
803  EVT VT = Op.getValueType();
804 
805  SDValue Lo = Op.getOperand(0);
806  SDValue Hi = Op.getOperand(1);
807  SDValue Shift = Op.getOperand(2);
808  SDValue Zero = DAG.getConstant(0, DL, VT);
809  SDValue One = DAG.getConstant(1, DL, VT);
810 
811  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
812 
813  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
814  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
815  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
816  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
817 
818  // The dance around Width1 is necessary for 0 special case.
819  // Without it the CompShift might be 32, producing incorrect results in
820  // Overflow. So we do the shift in two steps, the alternative is to
821  // add a conditional to filter the special case.
822 
823  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
824  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
825 
826  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
827  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
828  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
829 
830  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
831  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
832 
833  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
834  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
835 
836  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
837 }
838 
839 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
840  unsigned mainop, unsigned ovf) const {
841  SDLoc DL(Op);
842  EVT VT = Op.getValueType();
843 
844  SDValue Lo = Op.getOperand(0);
845  SDValue Hi = Op.getOperand(1);
846 
847  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
848  // Extend sign.
849  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
850  DAG.getValueType(MVT::i1));
851 
852  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
853 
854  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
855 }
856 
857 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
858  SDLoc DL(Op);
859  return DAG.getNode(
860  ISD::SETCC,
861  DL,
862  MVT::i1,
863  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
864  DAG.getCondCode(ISD::SETEQ));
865 }
866 
867 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
868  SDLoc DL(Op);
869  return DAG.getNode(
870  ISD::SETCC,
871  DL,
872  MVT::i1,
873  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
874  DAG.getCondCode(ISD::SETEQ));
875 }
876 
877 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
878  const SDLoc &DL,
879  unsigned DwordOffset) const {
880  unsigned ByteOffset = DwordOffset * 4;
881  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
883 
884  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
885  assert(isInt<16>(ByteOffset));
886 
887  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
888  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
890 }
891 
892 bool R600TargetLowering::isZero(SDValue Op) const {
893  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
894  return Cst->isNullValue();
895  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
896  return CstFP->isZero();
897  } else {
898  return false;
899  }
900 }
901 
902 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
903  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
904  return CFP->isExactlyValue(1.0);
905  }
906  return isAllOnesConstant(Op);
907 }
908 
909 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
910  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
911  return CFP->getValueAPF().isZero();
912  }
913  return isNullConstant(Op);
914 }
915 
916 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
917  SDLoc DL(Op);
918  EVT VT = Op.getValueType();
919 
920  SDValue LHS = Op.getOperand(0);
921  SDValue RHS = Op.getOperand(1);
922  SDValue True = Op.getOperand(2);
923  SDValue False = Op.getOperand(3);
924  SDValue CC = Op.getOperand(4);
925  SDValue Temp;
926 
927  if (VT == MVT::f32) {
928  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
929  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
930  if (MinMax)
931  return MinMax;
932  }
933 
934  // LHS and RHS are guaranteed to be the same value type
935  EVT CompareVT = LHS.getValueType();
936 
937  // Check if we can lower this to a native operation.
938 
939  // Try to lower to a SET* instruction:
940  //
941  // SET* can match the following patterns:
942  //
943  // select_cc f32, f32, -1, 0, cc_supported
944  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
945  // select_cc i32, i32, -1, 0, cc_supported
946  //
947 
948  // Move hardware True/False values to the correct operand.
949  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
950  ISD::CondCode InverseCC =
951  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
952  if (isHWTrueValue(False) && isHWFalseValue(True)) {
953  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
954  std::swap(False, True);
955  CC = DAG.getCondCode(InverseCC);
956  } else {
957  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
958  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
959  std::swap(False, True);
960  std::swap(LHS, RHS);
961  CC = DAG.getCondCode(SwapInvCC);
962  }
963  }
964  }
965 
966  if (isHWTrueValue(True) && isHWFalseValue(False) &&
967  (CompareVT == VT || VT == MVT::i32)) {
968  // This can be matched by a SET* instruction.
969  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
970  }
971 
972  // Try to lower to a CND* instruction:
973  //
974  // CND* can match the following patterns:
975  //
976  // select_cc f32, 0.0, f32, f32, cc_supported
977  // select_cc f32, 0.0, i32, i32, cc_supported
978  // select_cc i32, 0, f32, f32, cc_supported
979  // select_cc i32, 0, i32, i32, cc_supported
980  //
981 
982  // Try to move the zero value to the RHS
983  if (isZero(LHS)) {
984  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
985  // Try swapping the operands
986  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
987  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
988  std::swap(LHS, RHS);
989  CC = DAG.getCondCode(CCSwapped);
990  } else {
991  // Try inverting the conditon and then swapping the operands
992  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
993  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
994  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
995  std::swap(True, False);
996  std::swap(LHS, RHS);
997  CC = DAG.getCondCode(CCSwapped);
998  }
999  }
1000  }
1001  if (isZero(RHS)) {
1002  SDValue Cond = LHS;
1003  SDValue Zero = RHS;
1004  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1005  if (CompareVT != VT) {
1006  // Bitcast True / False to the correct types. This will end up being
1007  // a nop, but it allows us to define only a single pattern in the
1008  // .TD files for each CND* instruction rather than having to have
1009  // one pattern for integer True/False and one for fp True/False
1010  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1011  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1012  }
1013 
1014  switch (CCOpcode) {
1015  case ISD::SETONE:
1016  case ISD::SETUNE:
1017  case ISD::SETNE:
1018  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1019  Temp = True;
1020  True = False;
1021  False = Temp;
1022  break;
1023  default:
1024  break;
1025  }
1026  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1027  Cond, Zero,
1028  True, False,
1029  DAG.getCondCode(CCOpcode));
1030  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1031  }
1032 
1033  // If we make it this for it means we have no native instructions to handle
1034  // this SELECT_CC, so we must lower it.
1035  SDValue HWTrue, HWFalse;
1036 
1037  if (CompareVT == MVT::f32) {
1038  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1039  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1040  } else if (CompareVT == MVT::i32) {
1041  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1042  HWFalse = DAG.getConstant(0, DL, CompareVT);
1043  }
1044  else {
1045  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1046  }
1047 
1048  // Lower this unsupported SELECT_CC into a combination of two supported
1049  // SELECT_CC operations.
1050  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1051 
1052  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1053  Cond, HWFalse,
1054  True, False,
1055  DAG.getCondCode(ISD::SETNE));
1056 }
1057 
1058 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1059 /// convert these pointers to a register index. Each register holds
1060 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1061 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1062 /// for indirect addressing.
1063 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1064  unsigned StackWidth,
1065  SelectionDAG &DAG) const {
1066  unsigned SRLPad;
1067  switch(StackWidth) {
1068  case 1:
1069  SRLPad = 2;
1070  break;
1071  case 2:
1072  SRLPad = 3;
1073  break;
1074  case 4:
1075  SRLPad = 4;
1076  break;
1077  default: llvm_unreachable("Invalid stack width");
1078  }
1079 
1080  SDLoc DL(Ptr);
1081  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1082  DAG.getConstant(SRLPad, DL, MVT::i32));
1083 }
1084 
1085 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1086  unsigned ElemIdx,
1087  unsigned &Channel,
1088  unsigned &PtrIncr) const {
1089  switch (StackWidth) {
1090  default:
1091  case 1:
1092  Channel = 0;
1093  if (ElemIdx > 0) {
1094  PtrIncr = 1;
1095  } else {
1096  PtrIncr = 0;
1097  }
1098  break;
1099  case 2:
1100  Channel = ElemIdx % 2;
1101  if (ElemIdx == 2) {
1102  PtrIncr = 1;
1103  } else {
1104  PtrIncr = 0;
1105  }
1106  break;
1107  case 4:
1108  Channel = ElemIdx;
1109  PtrIncr = 0;
1110  break;
1111  }
1112 }
1113 
1114 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1115  SelectionDAG &DAG) const {
1116  SDLoc DL(Store);
1117  //TODO: Who creates the i8 stores?
1118  assert(Store->isTruncatingStore()
1119  || Store->getValue().getValueType() == MVT::i8);
1121 
1122  SDValue Mask;
1123  if (Store->getMemoryVT() == MVT::i8) {
1124  assert(Store->getAlignment() >= 1);
1125  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1126  } else if (Store->getMemoryVT() == MVT::i16) {
1127  assert(Store->getAlignment() >= 2);
1128  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1129  } else {
1130  llvm_unreachable("Unsupported private trunc store");
1131  }
1132 
1133  SDValue OldChain = Store->getChain();
1134  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1135  // Skip dummy
1136  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1137  SDValue BasePtr = Store->getBasePtr();
1138  SDValue Offset = Store->getOffset();
1139  EVT MemVT = Store->getMemoryVT();
1140 
1141  SDValue LoadPtr = BasePtr;
1142  if (!Offset.isUndef()) {
1143  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1144  }
1145 
1146  // Get dword location
1147  // TODO: this should be eliminated by the future SHR ptr, 2
1148  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1149  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1150 
1151  // Load dword
1152  // TODO: can we be smarter about machine pointer info?
1155  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1156 
1157  Chain = Dst.getValue(1);
1158 
1159  // Get offset in dword
1160  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1161  DAG.getConstant(0x3, DL, MVT::i32));
1162 
1163  // Convert byte offset to bit shift
1164  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1165  DAG.getConstant(3, DL, MVT::i32));
1166 
1167  // TODO: Contrary to the name of the functiom,
1168  // it also handles sub i32 non-truncating stores (like i1)
1169  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1170  Store->getValue());
1171 
1172  // Mask the value to the right type
1173  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1174 
1175  // Shift the value in place
1176  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1177  MaskedValue, ShiftAmt);
1178 
1179  // Shift the mask in place
1180  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1181 
1182  // Invert the mask. NOTE: if we had native ROL instructions we could
1183  // use inverted mask
1184  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1185 
1186  // Cleanup the target bits
1187  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1188 
1189  // Add the new bits
1190  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1191 
1192  // Store dword
1193  // TODO: Can we be smarter about MachinePointerInfo?
1194  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1195 
1196  // If we are part of expanded vector, make our neighbors depend on this store
1197  if (VectorTrunc) {
1198  // Make all other vector elements depend on this store
1199  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1200  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1201  }
1202  return NewStore;
1203 }
1204 
1205 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1206  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1207  unsigned AS = StoreNode->getAddressSpace();
1208 
1209  SDValue Chain = StoreNode->getChain();
1210  SDValue Ptr = StoreNode->getBasePtr();
1211  SDValue Value = StoreNode->getValue();
1212 
1213  EVT VT = Value.getValueType();
1214  EVT MemVT = StoreNode->getMemoryVT();
1215  EVT PtrVT = Ptr.getValueType();
1216 
1217  SDLoc DL(Op);
1218 
1219  // Neither LOCAL nor PRIVATE can do vectors at the moment
1220  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
1221  VT.isVector()) {
1222  if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
1223  StoreNode->isTruncatingStore()) {
1224  // Add an extra level of chain to isolate this vector
1225  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1226  // TODO: can the chain be replaced without creating a new store?
1227  SDValue NewStore = DAG.getTruncStore(
1228  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1229  MemVT, StoreNode->getAlignment(),
1230  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1231  StoreNode = cast<StoreSDNode>(NewStore);
1232  }
1233 
1234  return scalarizeVectorStore(StoreNode, DAG);
1235  }
1236 
1237  unsigned Align = StoreNode->getAlignment();
1238  if (Align < MemVT.getStoreSize() &&
1239  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1240  return expandUnalignedStore(StoreNode, DAG);
1241  }
1242 
1243  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1244  DAG.getConstant(2, DL, PtrVT));
1245 
1246  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
1247  // It is beneficial to create MSKOR here instead of combiner to avoid
1248  // artificial dependencies introduced by RMW
1249  if (StoreNode->isTruncatingStore()) {
1250  assert(VT.bitsLE(MVT::i32));
1251  SDValue MaskConstant;
1252  if (MemVT == MVT::i8) {
1253  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1254  } else {
1255  assert(MemVT == MVT::i16);
1256  assert(StoreNode->getAlignment() >= 2);
1257  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1258  }
1259 
1260  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1261  DAG.getConstant(0x00000003, DL, PtrVT));
1262  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1263  DAG.getConstant(3, DL, VT));
1264 
1265  // Put the mask in correct place
1266  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1267 
1268  // Put the value bits in correct place
1269  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1270  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1271 
1272  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1273  // vector instead.
1274  SDValue Src[4] = {
1275  ShiftedValue,
1276  DAG.getConstant(0, DL, MVT::i32),
1277  DAG.getConstant(0, DL, MVT::i32),
1278  Mask
1279  };
1280  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1281  SDValue Args[3] = { Chain, Input, DWordAddr };
1283  Op->getVTList(), Args, MemVT,
1284  StoreNode->getMemOperand());
1285  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1286  // Convert pointer from byte address to dword address.
1287  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1288 
1289  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1290  llvm_unreachable("Truncated and indexed stores not supported yet");
1291  } else {
1292  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1293  }
1294  return Chain;
1295  }
1296  }
1297 
1298  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1299  if (AS != AMDGPUASI.PRIVATE_ADDRESS)
1300  return SDValue();
1301 
1302  if (MemVT.bitsLT(MVT::i32))
1303  return lowerPrivateTruncStore(StoreNode, DAG);
1304 
1305  // Standard i32+ store, tag it with DWORDADDR to note that the address
1306  // has been shifted
1307  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1308  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1309  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1310  }
1311 
1312  // Tagged i32+ stores will be matched by patterns
1313  return SDValue();
1314 }
1315 
1316 // return (512 + (kc_bank << 12)
1317 static int
1319  switch (AddressSpace) {
1321  return 512;
1323  return 512 + 4096;
1325  return 512 + 4096 * 2;
1327  return 512 + 4096 * 3;
1329  return 512 + 4096 * 4;
1331  return 512 + 4096 * 5;
1333  return 512 + 4096 * 6;
1335  return 512 + 4096 * 7;
1337  return 512 + 4096 * 8;
1339  return 512 + 4096 * 9;
1341  return 512 + 4096 * 10;
1343  return 512 + 4096 * 11;
1345  return 512 + 4096 * 12;
1347  return 512 + 4096 * 13;
1349  return 512 + 4096 * 14;
1351  return 512 + 4096 * 15;
1352  default:
1353  return -1;
1354  }
1355 }
1356 
1357 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1358  SelectionDAG &DAG) const {
1359  SDLoc DL(Op);
1360  LoadSDNode *Load = cast<LoadSDNode>(Op);
1361  ISD::LoadExtType ExtType = Load->getExtensionType();
1362  EVT MemVT = Load->getMemoryVT();
1363  assert(Load->getAlignment() >= MemVT.getStoreSize());
1364 
1365  SDValue BasePtr = Load->getBasePtr();
1366  SDValue Chain = Load->getChain();
1367  SDValue Offset = Load->getOffset();
1368 
1369  SDValue LoadPtr = BasePtr;
1370  if (!Offset.isUndef()) {
1371  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1372  }
1373 
1374  // Get dword location
1375  // NOTE: this should be eliminated by the future SHR ptr, 2
1376  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1377  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1378 
1379  // Load dword
1380  // TODO: can we be smarter about machine pointer info?
1383  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1384 
1385  // Get offset within the register.
1386  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1387  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1388 
1389  // Bit offset of target byte (byteIdx * 8).
1390  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1391  DAG.getConstant(3, DL, MVT::i32));
1392 
1393  // Shift to the right.
1394  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1395 
1396  // Eliminate the upper bits by setting them to ...
1397  EVT MemEltVT = MemVT.getScalarType();
1398 
1399  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1400  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1401  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1402  } else { // ... or zeros.
1403  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1404  }
1405 
1406  SDValue Ops[] = {
1407  Ret,
1408  Read.getValue(1) // This should be our output chain
1409  };
1410 
1411  return DAG.getMergeValues(Ops, DL);
1412 }
1413 
1414 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1415  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1416  unsigned AS = LoadNode->getAddressSpace();
1417  EVT MemVT = LoadNode->getMemoryVT();
1418  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1419 
1420  if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
1421  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1422  return lowerPrivateExtLoad(Op, DAG);
1423  }
1424 
1425  SDLoc DL(Op);
1426  EVT VT = Op.getValueType();
1427  SDValue Chain = LoadNode->getChain();
1428  SDValue Ptr = LoadNode->getBasePtr();
1429 
1430  if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
1431  LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
1432  VT.isVector()) {
1433  return scalarizeVectorLoad(LoadNode, DAG);
1434  }
1435 
1436  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1437  if (ConstantBlock > -1 &&
1438  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1439  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1440  SDValue Result;
1441  if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1442  isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1443  isa<ConstantSDNode>(Ptr)) {
1444  SDValue Slots[4];
1445  for (unsigned i = 0; i < 4; i++) {
1446  // We want Const position encoded with the following formula :
1447  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1448  // const_index is Ptr computed by llvm using an alignment of 16.
1449  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1450  // then div by 4 at the ISel step
1451  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1452  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1453  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1454  }
1455  EVT NewVT = MVT::v4i32;
1456  unsigned NumElements = 4;
1457  if (VT.isVector()) {
1458  NewVT = VT;
1459  NumElements = VT.getVectorNumElements();
1460  }
1461  Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1462  } else {
1463  // non-constant ptr can't be folded, keeps it as a v4f32 load
1464  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1465  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1466  DAG.getConstant(4, DL, MVT::i32)),
1467  DAG.getConstant(LoadNode->getAddressSpace() -
1469  );
1470  }
1471 
1472  if (!VT.isVector()) {
1473  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1474  DAG.getConstant(0, DL, MVT::i32));
1475  }
1476 
1477  SDValue MergedValues[2] = {
1478  Result,
1479  Chain
1480  };
1481  return DAG.getMergeValues(MergedValues, DL);
1482  }
1483 
1484  // For most operations returning SDValue() will result in the node being
1485  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1486  // need to manually expand loads that may be legal in some address spaces and
1487  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1488  // compute shaders, since the data is sign extended when it is uploaded to the
1489  // buffer. However SEXT loads from other address spaces are not supported, so
1490  // we need to expand them here.
1491  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1492  EVT MemVT = LoadNode->getMemoryVT();
1493  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1494  SDValue NewLoad = DAG.getExtLoad(
1495  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1496  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1497  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1498  DAG.getValueType(MemVT));
1499 
1500  SDValue MergedValues[2] = { Res, Chain };
1501  return DAG.getMergeValues(MergedValues, DL);
1502  }
1503 
1504  if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
1505  return SDValue();
1506  }
1507 
1508  // DWORDADDR ISD marks already shifted address
1509  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1510  assert(VT == MVT::i32);
1511  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1512  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1513  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1514  }
1515  return SDValue();
1516 }
1517 
1518 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1519  SDValue Chain = Op.getOperand(0);
1520  SDValue Cond = Op.getOperand(1);
1521  SDValue Jump = Op.getOperand(2);
1522 
1523  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1524  Chain, Jump, Cond);
1525 }
1526 
1527 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1528  SelectionDAG &DAG) const {
1529  MachineFunction &MF = DAG.getMachineFunction();
1531 
1532  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1533 
1534  unsigned FrameIndex = FIN->getIndex();
1535  unsigned IgnoredFrameReg;
1536  unsigned Offset =
1537  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1538  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1539  Op.getValueType());
1540 }
1541 
1542 /// XXX Only kernel functions are supported, so we can assume for now that
1543 /// every function is a kernel function, but in the future we should use
1544 /// separate calling conventions for kernel and non-kernel functions.
1546  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1547  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1548  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1550  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1551  *DAG.getContext());
1552  MachineFunction &MF = DAG.getMachineFunction();
1553  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
1554 
1556 
1557  if (AMDGPU::isShader(CallConv)) {
1558  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1559  } else {
1560  analyzeFormalArgumentsCompute(CCInfo, Ins);
1561  }
1562 
1563  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1564  CCValAssign &VA = ArgLocs[i];
1565  const ISD::InputArg &In = Ins[i];
1566  EVT VT = In.VT;
1567  EVT MemVT = VA.getLocVT();
1568  if (!VT.isVector() && MemVT.isVector()) {
1569  // Get load source type if scalarized.
1570  MemVT = MemVT.getVectorElementType();
1571  }
1572 
1573  if (AMDGPU::isShader(CallConv)) {
1574  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1575  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1576  InVals.push_back(Register);
1577  continue;
1578  }
1579 
1582 
1583  // i64 isn't a legal type, so the register type used ends up as i32, which
1584  // isn't expected here. It attempts to create this sextload, but it ends up
1585  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1586  // for <1 x i64>.
1587 
1588  // The first 36 bytes of the input buffer contains information about
1589  // thread group and global sizes.
1591  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1592  // FIXME: This should really check the extload type, but the handling of
1593  // extload vector parameters seems to be broken.
1594 
1595  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1596  Ext = ISD::SEXTLOAD;
1597  }
1598 
1599  // Compute the offset from the value.
1600  // XXX - I think PartOffset should give you this, but it seems to give the
1601  // size of the register which isn't useful.
1602 
1603  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1604  unsigned PartOffset = VA.getLocMemOffset();
1606 
1607  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1608  SDValue Arg = DAG.getLoad(
1609  ISD::UNINDEXED, Ext, VT, DL, Chain,
1610  DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
1611  MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
1614 
1615  // 4 is the preferred alignment for the CONSTANT memory space.
1616  InVals.push_back(Arg);
1617  MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
1618  }
1619  return Chain;
1620 }
1621 
1623  EVT VT) const {
1624  if (!VT.isVector())
1625  return MVT::i32;
1627 }
1628 
1630  const SelectionDAG &DAG) const {
1631  // Local and Private addresses do not handle vectors. Limit to i32
1632  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
1633  return (MemVT.getSizeInBits() <= 32);
1634  }
1635  return true;
1636 }
1637 
1639  unsigned AddrSpace,
1640  unsigned Align,
1641  bool *IsFast) const {
1642  if (IsFast)
1643  *IsFast = false;
1644 
1645  if (!VT.isSimple() || VT == MVT::Other)
1646  return false;
1647 
1648  if (VT.bitsLT(MVT::i32))
1649  return false;
1650 
1651  // TODO: This is a rough estimate.
1652  if (IsFast)
1653  *IsFast = true;
1654 
1655  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1656 }
1657 
1659  SelectionDAG &DAG, SDValue VectorEntry,
1660  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1661  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1662  assert(RemapSwizzle.empty());
1663  SDValue NewBldVec[4] = {
1664  VectorEntry.getOperand(0),
1665  VectorEntry.getOperand(1),
1666  VectorEntry.getOperand(2),
1667  VectorEntry.getOperand(3)
1668  };
1669 
1670  for (unsigned i = 0; i < 4; i++) {
1671  if (NewBldVec[i].isUndef())
1672  // We mask write here to teach later passes that the ith element of this
1673  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1674  // break false dependencies and additionnaly make assembly easier to read.
1675  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1676  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1677  if (C->isZero()) {
1678  RemapSwizzle[i] = 4; // SEL_0
1679  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1680  } else if (C->isExactlyValue(1.0)) {
1681  RemapSwizzle[i] = 5; // SEL_1
1682  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1683  }
1684  }
1685 
1686  if (NewBldVec[i].isUndef())
1687  continue;
1688  for (unsigned j = 0; j < i; j++) {
1689  if (NewBldVec[i] == NewBldVec[j]) {
1690  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1691  RemapSwizzle[i] = j;
1692  break;
1693  }
1694  }
1695  }
1696 
1697  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1698  NewBldVec);
1699 }
1700 
1702  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1703  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1704  assert(RemapSwizzle.empty());
1705  SDValue NewBldVec[4] = {
1706  VectorEntry.getOperand(0),
1707  VectorEntry.getOperand(1),
1708  VectorEntry.getOperand(2),
1709  VectorEntry.getOperand(3)
1710  };
1711  bool isUnmovable[4] = { false, false, false, false };
1712  for (unsigned i = 0; i < 4; i++) {
1713  RemapSwizzle[i] = i;
1714  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1715  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1716  ->getZExtValue();
1717  if (i == Idx)
1718  isUnmovable[Idx] = true;
1719  }
1720  }
1721 
1722  for (unsigned i = 0; i < 4; i++) {
1723  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1724  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1725  ->getZExtValue();
1726  if (isUnmovable[Idx])
1727  continue;
1728  // Swap i and Idx
1729  std::swap(NewBldVec[Idx], NewBldVec[i]);
1730  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1731  break;
1732  }
1733  }
1734 
1735  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1736  NewBldVec);
1737 }
1738 
1739 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1740  SelectionDAG &DAG,
1741  const SDLoc &DL) const {
1742  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1743  // Old -> New swizzle values
1744  DenseMap<unsigned, unsigned> SwizzleRemap;
1745 
1746  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1747  for (unsigned i = 0; i < 4; i++) {
1748  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1749  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1750  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1751  }
1752 
1753  SwizzleRemap.clear();
1754  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1755  for (unsigned i = 0; i < 4; i++) {
1756  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1757  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1758  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1759  }
1760 
1761  return BuildVector;
1762 }
1763 
1764 //===----------------------------------------------------------------------===//
1765 // Custom DAG Optimizations
1766 //===----------------------------------------------------------------------===//
1767 
1769  DAGCombinerInfo &DCI) const {
1770  SelectionDAG &DAG = DCI.DAG;
1771  SDLoc DL(N);
1772 
1773  switch (N->getOpcode()) {
1774  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1775  case ISD::FP_ROUND: {
1776  SDValue Arg = N->getOperand(0);
1777  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1778  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1779  Arg.getOperand(0));
1780  }
1781  break;
1782  }
1783 
1784  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1785  // (i32 select_cc f32, f32, -1, 0 cc)
1786  //
1787  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1788  // this to one of the SET*_DX10 instructions.
1789  case ISD::FP_TO_SINT: {
1790  SDValue FNeg = N->getOperand(0);
1791  if (FNeg.getOpcode() != ISD::FNEG) {
1792  return SDValue();
1793  }
1794  SDValue SelectCC = FNeg.getOperand(0);
1795  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1796  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1797  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1798  !isHWTrueValue(SelectCC.getOperand(2)) ||
1799  !isHWFalseValue(SelectCC.getOperand(3))) {
1800  return SDValue();
1801  }
1802 
1803  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1804  SelectCC.getOperand(0), // LHS
1805  SelectCC.getOperand(1), // RHS
1806  DAG.getConstant(-1, DL, MVT::i32), // True
1807  DAG.getConstant(0, DL, MVT::i32), // False
1808  SelectCC.getOperand(4)); // CC
1809 
1810  break;
1811  }
1812 
1813  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1814  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1815  case ISD::INSERT_VECTOR_ELT: {
1816  SDValue InVec = N->getOperand(0);
1817  SDValue InVal = N->getOperand(1);
1818  SDValue EltNo = N->getOperand(2);
1819 
1820  // If the inserted element is an UNDEF, just use the input vector.
1821  if (InVal.isUndef())
1822  return InVec;
1823 
1824  EVT VT = InVec.getValueType();
1825 
1826  // If we can't generate a legal BUILD_VECTOR, exit
1828  return SDValue();
1829 
1830  // Check that we know which element is being inserted
1831  if (!isa<ConstantSDNode>(EltNo))
1832  return SDValue();
1833  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1834 
1835  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1836  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1837  // vector elements.
1839  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1840  Ops.append(InVec.getNode()->op_begin(),
1841  InVec.getNode()->op_end());
1842  } else if (InVec.isUndef()) {
1843  unsigned NElts = VT.getVectorNumElements();
1844  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1845  } else {
1846  return SDValue();
1847  }
1848 
1849  // Insert the element
1850  if (Elt < Ops.size()) {
1851  // All the operands of BUILD_VECTOR must have the same type;
1852  // we enforce that here.
1853  EVT OpVT = Ops[0].getValueType();
1854  if (InVal.getValueType() != OpVT)
1855  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1856  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1857  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1858  Ops[Elt] = InVal;
1859  }
1860 
1861  // Return the new vector
1862  return DAG.getBuildVector(VT, DL, Ops);
1863  }
1864 
1865  // Extract_vec (Build_vector) generated by custom lowering
1866  // also needs to be customly combined
1867  case ISD::EXTRACT_VECTOR_ELT: {
1868  SDValue Arg = N->getOperand(0);
1869  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1870  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1871  unsigned Element = Const->getZExtValue();
1872  return Arg->getOperand(Element);
1873  }
1874  }
1875  if (Arg.getOpcode() == ISD::BITCAST &&
1876  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1879  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1880  unsigned Element = Const->getZExtValue();
1881  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1882  Arg->getOperand(0).getOperand(Element));
1883  }
1884  }
1885  break;
1886  }
1887 
1888  case ISD::SELECT_CC: {
1889  // Try common optimizations
1891  return Ret;
1892 
1893  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1894  // selectcc x, y, a, b, inv(cc)
1895  //
1896  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1897  // selectcc x, y, a, b, cc
1898  SDValue LHS = N->getOperand(0);
1899  if (LHS.getOpcode() != ISD::SELECT_CC) {
1900  return SDValue();
1901  }
1902 
1903  SDValue RHS = N->getOperand(1);
1904  SDValue True = N->getOperand(2);
1905  SDValue False = N->getOperand(3);
1906  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1907 
1908  if (LHS.getOperand(2).getNode() != True.getNode() ||
1909  LHS.getOperand(3).getNode() != False.getNode() ||
1910  RHS.getNode() != False.getNode()) {
1911  return SDValue();
1912  }
1913 
1914  switch (NCC) {
1915  default: return SDValue();
1916  case ISD::SETNE: return LHS;
1917  case ISD::SETEQ: {
1918  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1919  LHSCC = ISD::getSetCCInverse(LHSCC,
1920  LHS.getOperand(0).getValueType().isInteger());
1921  if (DCI.isBeforeLegalizeOps() ||
1923  return DAG.getSelectCC(DL,
1924  LHS.getOperand(0),
1925  LHS.getOperand(1),
1926  LHS.getOperand(2),
1927  LHS.getOperand(3),
1928  LHSCC);
1929  break;
1930  }
1931  }
1932  return SDValue();
1933  }
1934 
1935  case AMDGPUISD::R600_EXPORT: {
1936  SDValue Arg = N->getOperand(1);
1937  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1938  break;
1939 
1940  SDValue NewArgs[8] = {
1941  N->getOperand(0), // Chain
1942  SDValue(),
1943  N->getOperand(2), // ArrayBase
1944  N->getOperand(3), // Type
1945  N->getOperand(4), // SWZ_X
1946  N->getOperand(5), // SWZ_Y
1947  N->getOperand(6), // SWZ_Z
1948  N->getOperand(7) // SWZ_W
1949  };
1950  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1951  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1952  }
1953  case AMDGPUISD::TEXTURE_FETCH: {
1954  SDValue Arg = N->getOperand(1);
1955  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1956  break;
1957 
1958  SDValue NewArgs[19] = {
1959  N->getOperand(0),
1960  N->getOperand(1),
1961  N->getOperand(2),
1962  N->getOperand(3),
1963  N->getOperand(4),
1964  N->getOperand(5),
1965  N->getOperand(6),
1966  N->getOperand(7),
1967  N->getOperand(8),
1968  N->getOperand(9),
1969  N->getOperand(10),
1970  N->getOperand(11),
1971  N->getOperand(12),
1972  N->getOperand(13),
1973  N->getOperand(14),
1974  N->getOperand(15),
1975  N->getOperand(16),
1976  N->getOperand(17),
1977  N->getOperand(18),
1978  };
1979  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1980  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1981  }
1982  default: break;
1983  }
1984 
1986 }
1987 
1988 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1989  SDValue &Src, SDValue &Neg, SDValue &Abs,
1990  SDValue &Sel, SDValue &Imm,
1991  SelectionDAG &DAG) const {
1993  if (!Src.isMachineOpcode())
1994  return false;
1995 
1996  switch (Src.getMachineOpcode()) {
1997  case AMDGPU::FNEG_R600:
1998  if (!Neg.getNode())
1999  return false;
2000  Src = Src.getOperand(0);
2001  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2002  return true;
2003  case AMDGPU::FABS_R600:
2004  if (!Abs.getNode())
2005  return false;
2006  Src = Src.getOperand(0);
2007  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2008  return true;
2009  case AMDGPU::CONST_COPY: {
2010  unsigned Opcode = ParentNode->getMachineOpcode();
2011  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2012 
2013  if (!Sel.getNode())
2014  return false;
2015 
2016  SDValue CstOffset = Src.getOperand(0);
2017  if (ParentNode->getValueType(0).isVector())
2018  return false;
2019 
2020  // Gather constants values
2021  int SrcIndices[] = {
2022  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2023  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2024  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2025  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2026  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2027  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2028  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2029  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2030  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2031  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2032  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2033  };
2034  std::vector<unsigned> Consts;
2035  for (int OtherSrcIdx : SrcIndices) {
2036  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2037  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2038  continue;
2039  if (HasDst) {
2040  OtherSrcIdx--;
2041  OtherSelIdx--;
2042  }
2043  if (RegisterSDNode *Reg =
2044  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2045  if (Reg->getReg() == AMDGPU::ALU_CONST) {
2046  ConstantSDNode *Cst
2047  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2048  Consts.push_back(Cst->getZExtValue());
2049  }
2050  }
2051  }
2052 
2053  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2054  Consts.push_back(Cst->getZExtValue());
2055  if (!TII->fitsConstReadLimitations(Consts)) {
2056  return false;
2057  }
2058 
2059  Sel = CstOffset;
2060  Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2061  return true;
2062  }
2063  case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2064  // Check if the Imm slot is used. Taken from below.
2065  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2066  return false;
2067  Imm = Src.getOperand(0);
2068  Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2069  return true;
2070  case AMDGPU::MOV_IMM_I32:
2071  case AMDGPU::MOV_IMM_F32: {
2072  unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2073  uint64_t ImmValue = 0;
2074 
2075  if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2077  float FloatValue = FPC->getValueAPF().convertToFloat();
2078  if (FloatValue == 0.0) {
2079  ImmReg = AMDGPU::ZERO;
2080  } else if (FloatValue == 0.5) {
2081  ImmReg = AMDGPU::HALF;
2082  } else if (FloatValue == 1.0) {
2083  ImmReg = AMDGPU::ONE;
2084  } else {
2085  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2086  }
2087  } else {
2089  uint64_t Value = C->getZExtValue();
2090  if (Value == 0) {
2091  ImmReg = AMDGPU::ZERO;
2092  } else if (Value == 1) {
2093  ImmReg = AMDGPU::ONE_INT;
2094  } else {
2095  ImmValue = Value;
2096  }
2097  }
2098 
2099  // Check that we aren't already using an immediate.
2100  // XXX: It's possible for an instruction to have more than one
2101  // immediate operand, but this is not supported yet.
2102  if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2103  if (!Imm.getNode())
2104  return false;
2106  assert(C);
2107  if (C->getZExtValue())
2108  return false;
2109  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2110  }
2111  Src = DAG.getRegister(ImmReg, MVT::i32);
2112  return true;
2113  }
2114  default:
2115  return false;
2116  }
2117 }
2118 
2119 /// \brief Fold the instructions after selecting them
2120 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2121  SelectionDAG &DAG) const {
2123  if (!Node->isMachineOpcode())
2124  return Node;
2125 
2126  unsigned Opcode = Node->getMachineOpcode();
2127  SDValue FakeOp;
2128 
2129  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2130 
2131  if (Opcode == AMDGPU::DOT_4) {
2132  int OperandIdx[] = {
2133  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2134  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2135  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2136  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2137  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2138  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2139  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2140  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2141  };
2142  int NegIdx[] = {
2143  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2144  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2145  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2146  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2147  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2148  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2149  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2150  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2151  };
2152  int AbsIdx[] = {
2153  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2154  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2155  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2156  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2157  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2158  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2159  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2160  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2161  };
2162  for (unsigned i = 0; i < 8; i++) {
2163  if (OperandIdx[i] < 0)
2164  return Node;
2165  SDValue &Src = Ops[OperandIdx[i] - 1];
2166  SDValue &Neg = Ops[NegIdx[i] - 1];
2167  SDValue &Abs = Ops[AbsIdx[i] - 1];
2168  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2169  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2170  if (HasDst)
2171  SelIdx--;
2172  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2173  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2174  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2175  }
2176  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2177  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2178  SDValue &Src = Ops[i];
2179  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2180  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2181  }
2182  } else if (Opcode == AMDGPU::CLAMP_R600) {
2183  SDValue Src = Node->getOperand(0);
2184  if (!Src.isMachineOpcode() ||
2185  !TII->hasInstrModifiers(Src.getMachineOpcode()))
2186  return Node;
2187  int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2188  AMDGPU::OpName::clamp);
2189  if (ClampIdx < 0)
2190  return Node;
2191  SDLoc DL(Node);
2192  std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
2193  Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2194  return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2195  Node->getVTList(), Ops);
2196  } else {
2197  if (!TII->hasInstrModifiers(Opcode))
2198  return Node;
2199  int OperandIdx[] = {
2200  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2201  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2202  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2203  };
2204  int NegIdx[] = {
2205  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2206  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2207  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2208  };
2209  int AbsIdx[] = {
2210  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2211  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2212  -1
2213  };
2214  for (unsigned i = 0; i < 3; i++) {
2215  if (OperandIdx[i] < 0)
2216  return Node;
2217  SDValue &Src = Ops[OperandIdx[i] - 1];
2218  SDValue &Neg = Ops[NegIdx[i] - 1];
2219  SDValue FakeAbs;
2220  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2221  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2222  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2223  int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2224  if (HasDst) {
2225  SelIdx--;
2226  ImmIdx--;
2227  }
2228  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2229  SDValue &Imm = Ops[ImmIdx];
2230  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2231  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2232  }
2233  }
2234 
2235  return Node;
2236 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:546
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:513
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:618
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:223
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:224
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SDValue & getValue() const
SDVTList getVTList() const
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:228
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1098
const SDValue & getChain() const
Function Alias Analysis Results
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:405
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:210
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:434
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
#define MO_FLAG_ABS
Definition: R600Defines.h:19
Shift and rotation operations.
Definition: ISDOpcodes.h:380
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:296
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:205
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
int getLDSNoRetOp(uint16_t Opcode)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:293
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:449
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:390
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:387
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
const AMDGPUSubtarget * Subtarget
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:201
Address space for indirect addressible parameter memory (VTX1)
Definition: AMDGPU.h:233
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:855
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:918
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:561
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
Class to represent pointers.
Definition: DerivedTypes.h:467
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:499
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:303
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
const R600FrameLowering * getFrameLowering() const override
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:235
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1355
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:260
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:893
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions.
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:308
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:247
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:232
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1369
const AMDGPUAS & AS
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
unsigned getAddressSpace() const
const APFloat & getValueAPF() const
Definition: Constants.h:299
const R600RegisterInfo * getRegisterInfo() const override
static bool isUndef(ArrayRef< int > Mask)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:315
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:210
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:862
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:719
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:612
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Promote Memory to Register
Definition: Mem2Reg.cpp:110
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:924
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:390
Interface for the AMDGPU Implementation of the Intrinsic Info class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:449
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:396
bool isLDSRetInstr(unsigned Opcode) const
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
const R600Subtarget * getSubtarget() const
Representation of each machine instruction.
Definition: MachineInstr.h:60
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:363
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:690
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:464
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:582
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:93
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
void setABIArgOffset(unsigned NewOffset)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:278
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:949
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:59
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:20
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:413
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:198
Conversion operators.
Definition: ISDOpcodes.h:443
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:757
unsigned getLocReg() const
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
uint64_t getZExtValue() const
Address space for local memory.
Definition: AMDGPU.h:226
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:452
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
Address space for constant memory (VTX2)
Definition: AMDGPU.h:225
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:298
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:562
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:753
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
LLVMContext * getContext() const
Definition: SelectionDAG.h:396
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:216
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:607
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.