LLVM  6.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600FrameLowering.h"
21 #include "R600InstrInfo.h"
23 #include "Utils/AMDGPUBaseInfo.h"
24 #include "llvm/ADT/APFloat.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
54  const R600Subtarget &STI)
55  : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
56  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
57  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
58  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
59  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
60  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
61  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
62 
64 
65  // Legalize loads and stores to the private address space.
69 
70  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
71  // spaces, so it is custom lowered to handle those where it isn't.
72  for (MVT VT : MVT::integer_valuetypes()) {
76 
80 
84  }
85 
86  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
90 
94 
99 
102  // We need to include these since trunc STORES to PRIVATE need
103  // special handling to accommodate RMW
114 
115  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
118 
119  // Set condition code actions
132 
137 
140 
143 
147 
149 
152 
159 
164 
165  // ADD, SUB overflow.
166  // TODO: turn these into Legal?
167  if (Subtarget->hasCARRY())
169 
170  if (Subtarget->hasBORROW())
172 
173  // Expand sign extension of vectors
174  if (!Subtarget->hasBFE())
176 
179 
180  if (!Subtarget->hasBFE())
184 
185  if (!Subtarget->hasBFE())
189 
193 
195 
197 
202 
207 
208  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
209  // to be Legal/Custom in order to avoid library calls.
213 
215 
216  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
217  for (MVT VT : ScalarIntVTs) {
222  }
223 
224  // LLVM will expand these to atomic_cmp_swap(0)
225  // and atomic_swap, respectively.
228 
229  // We need to custom lower some of the intrinsics
232 
234 
241 }
242 
244  return static_cast<const R600Subtarget *>(Subtarget);
245 }
246 
247 static inline bool isEOP(MachineBasicBlock::iterator I) {
248  if (std::next(I) == I->getParent()->end())
249  return false;
250  return std::next(I)->getOpcode() == AMDGPU::RETURN;
251 }
252 
255  MachineBasicBlock *BB) const {
256  MachineFunction *MF = BB->getParent();
260 
261  switch (MI.getOpcode()) {
262  default:
263  // Replace LDS_*_RET instruction that don't have any uses with the
264  // equivalent LDS_*_NORET instruction.
265  if (TII->isLDSRetInstr(MI.getOpcode())) {
266  int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
267  assert(DstIdx != -1);
268  MachineInstrBuilder NewMI;
269  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
270  // LDS_1A2D support and remove this special case.
271  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
272  MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
273  return BB;
274 
275  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
276  TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
277  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
278  NewMI.add(MI.getOperand(i));
279  }
280  } else {
282  }
283  break;
284  case AMDGPU::CLAMP_R600: {
286  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
287  MI.getOperand(1).getReg());
288  TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
289  break;
290  }
291 
292  case AMDGPU::FABS_R600: {
294  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
295  MI.getOperand(1).getReg());
296  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
297  break;
298  }
299 
300  case AMDGPU::FNEG_R600: {
302  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
303  MI.getOperand(1).getReg());
304  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
305  break;
306  }
307 
308  case AMDGPU::MASK_WRITE: {
309  unsigned maskedRegister = MI.getOperand(0).getReg();
311  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
312  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
313  break;
314  }
315 
316  case AMDGPU::MOV_IMM_F32:
317  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
318  .getFPImm()
319  ->getValueAPF()
320  .bitcastToAPInt()
321  .getZExtValue());
322  break;
323 
324  case AMDGPU::MOV_IMM_I32:
325  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
326  MI.getOperand(1).getImm());
327  break;
328 
329  case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
330  //TODO: Perhaps combine this instruction with the next if possible
331  auto MIB = TII->buildDefaultInstruction(
332  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
333  int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
334  //TODO: Ugh this is rather ugly
335  MIB->getOperand(Idx) = MI.getOperand(1);
336  break;
337  }
338 
339  case AMDGPU::CONST_COPY: {
341  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
342  TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
343  MI.getOperand(1).getImm());
344  break;
345  }
346 
347  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
348  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
349  case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
350  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
351  .add(MI.getOperand(0))
352  .add(MI.getOperand(1))
353  .addImm(isEOP(I)); // Set End of program bit
354  break;
355 
356  case AMDGPU::RAT_STORE_TYPED_eg:
357  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
358  .add(MI.getOperand(0))
359  .add(MI.getOperand(1))
360  .add(MI.getOperand(2))
361  .addImm(isEOP(I)); // Set End of program bit
362  break;
363 
364  case AMDGPU::BRANCH:
365  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
366  .add(MI.getOperand(0));
367  break;
368 
369  case AMDGPU::BRANCH_COND_f32: {
370  MachineInstr *NewMI =
371  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
372  AMDGPU::PREDICATE_BIT)
373  .add(MI.getOperand(1))
374  .addImm(AMDGPU::PRED_SETNE)
375  .addImm(0); // Flags
376  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
377  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
378  .add(MI.getOperand(0))
379  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
380  break;
381  }
382 
383  case AMDGPU::BRANCH_COND_i32: {
384  MachineInstr *NewMI =
385  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
386  AMDGPU::PREDICATE_BIT)
387  .add(MI.getOperand(1))
388  .addImm(AMDGPU::PRED_SETNE_INT)
389  .addImm(0); // Flags
390  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
391  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
392  .add(MI.getOperand(0))
393  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
394  break;
395  }
396 
397  case AMDGPU::EG_ExportSwz:
398  case AMDGPU::R600_ExportSwz: {
399  // Instruction is left unmodified if its not the last one of its type
400  bool isLastInstructionOfItsType = true;
401  unsigned InstExportType = MI.getOperand(1).getImm();
402  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
403  EndBlock = BB->end(); NextExportInst != EndBlock;
404  NextExportInst = std::next(NextExportInst)) {
405  if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
406  NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
407  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
408  .getImm();
409  if (CurrentInstExportType == InstExportType) {
410  isLastInstructionOfItsType = false;
411  break;
412  }
413  }
414  }
415  bool EOP = isEOP(I);
416  if (!EOP && !isLastInstructionOfItsType)
417  return BB;
418  unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
419  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
420  .add(MI.getOperand(0))
421  .add(MI.getOperand(1))
422  .add(MI.getOperand(2))
423  .add(MI.getOperand(3))
424  .add(MI.getOperand(4))
425  .add(MI.getOperand(5))
426  .add(MI.getOperand(6))
427  .addImm(CfInst)
428  .addImm(EOP);
429  break;
430  }
431  case AMDGPU::RETURN: {
432  return BB;
433  }
434  }
435 
436  MI.eraseFromParent();
437  return BB;
438 }
439 
440 //===----------------------------------------------------------------------===//
441 // Custom DAG Lowering Operations
442 //===----------------------------------------------------------------------===//
443 
447  switch (Op.getOpcode()) {
448  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
449  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
450  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
451  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
452  case ISD::SRA_PARTS:
453  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
454  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
455  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
456  case ISD::FCOS:
457  case ISD::FSIN: return LowerTrig(Op, DAG);
458  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
459  case ISD::STORE: return LowerSTORE(Op, DAG);
460  case ISD::LOAD: {
461  SDValue Result = LowerLOAD(Op, DAG);
462  assert((!Result.getNode() ||
463  Result.getNode()->getNumValues() == 2) &&
464  "Load should return a value and a chain");
465  return Result;
466  }
467 
468  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
469  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
470  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
471  case ISD::INTRINSIC_VOID: {
472  SDValue Chain = Op.getOperand(0);
473  unsigned IntrinsicID =
474  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
475  switch (IntrinsicID) {
476  case AMDGPUIntrinsic::r600_store_swizzle: {
477  SDLoc DL(Op);
478  const SDValue Args[8] = {
479  Chain,
480  Op.getOperand(2), // Export Value
481  Op.getOperand(3), // ArrayBase
482  Op.getOperand(4), // Type
483  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
484  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
485  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
486  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
487  };
488  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
489  }
490 
491  // default for switch(IntrinsicID)
492  default: break;
493  }
494  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
495  break;
496  }
498  unsigned IntrinsicID =
499  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
500  EVT VT = Op.getValueType();
501  SDLoc DL(Op);
502  switch (IntrinsicID) {
503  case AMDGPUIntrinsic::r600_tex:
504  case AMDGPUIntrinsic::r600_texc: {
505  unsigned TextureOp;
506  switch (IntrinsicID) {
507  case AMDGPUIntrinsic::r600_tex:
508  TextureOp = 0;
509  break;
510  case AMDGPUIntrinsic::r600_texc:
511  TextureOp = 1;
512  break;
513  default:
514  llvm_unreachable("unhandled texture operation");
515  }
516 
517  SDValue TexArgs[19] = {
518  DAG.getConstant(TextureOp, DL, MVT::i32),
519  Op.getOperand(1),
520  DAG.getConstant(0, DL, MVT::i32),
521  DAG.getConstant(1, DL, MVT::i32),
522  DAG.getConstant(2, DL, MVT::i32),
523  DAG.getConstant(3, DL, MVT::i32),
524  Op.getOperand(2),
525  Op.getOperand(3),
526  Op.getOperand(4),
527  DAG.getConstant(0, DL, MVT::i32),
528  DAG.getConstant(1, DL, MVT::i32),
529  DAG.getConstant(2, DL, MVT::i32),
530  DAG.getConstant(3, DL, MVT::i32),
531  Op.getOperand(5),
532  Op.getOperand(6),
533  Op.getOperand(7),
534  Op.getOperand(8),
535  Op.getOperand(9),
536  Op.getOperand(10)
537  };
538  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
539  }
540  case AMDGPUIntrinsic::r600_dot4: {
541  SDValue Args[8] = {
543  DAG.getConstant(0, DL, MVT::i32)),
545  DAG.getConstant(0, DL, MVT::i32)),
547  DAG.getConstant(1, DL, MVT::i32)),
549  DAG.getConstant(1, DL, MVT::i32)),
551  DAG.getConstant(2, DL, MVT::i32)),
553  DAG.getConstant(2, DL, MVT::i32)),
555  DAG.getConstant(3, DL, MVT::i32)),
557  DAG.getConstant(3, DL, MVT::i32))
558  };
559  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
560  }
561 
562  case Intrinsic::r600_implicitarg_ptr: {
565  return DAG.getConstant(ByteOffset, DL, PtrVT);
566  }
567  case Intrinsic::r600_read_ngroups_x:
568  return LowerImplicitParameter(DAG, VT, DL, 0);
569  case Intrinsic::r600_read_ngroups_y:
570  return LowerImplicitParameter(DAG, VT, DL, 1);
571  case Intrinsic::r600_read_ngroups_z:
572  return LowerImplicitParameter(DAG, VT, DL, 2);
573  case Intrinsic::r600_read_global_size_x:
574  return LowerImplicitParameter(DAG, VT, DL, 3);
575  case Intrinsic::r600_read_global_size_y:
576  return LowerImplicitParameter(DAG, VT, DL, 4);
577  case Intrinsic::r600_read_global_size_z:
578  return LowerImplicitParameter(DAG, VT, DL, 5);
579  case Intrinsic::r600_read_local_size_x:
580  return LowerImplicitParameter(DAG, VT, DL, 6);
581  case Intrinsic::r600_read_local_size_y:
582  return LowerImplicitParameter(DAG, VT, DL, 7);
583  case Intrinsic::r600_read_local_size_z:
584  return LowerImplicitParameter(DAG, VT, DL, 8);
585 
586  case Intrinsic::r600_read_tgid_x:
587  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
588  AMDGPU::T1_X, VT);
589  case Intrinsic::r600_read_tgid_y:
590  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
591  AMDGPU::T1_Y, VT);
592  case Intrinsic::r600_read_tgid_z:
593  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
594  AMDGPU::T1_Z, VT);
595  case Intrinsic::r600_read_tidig_x:
596  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
597  AMDGPU::T0_X, VT);
598  case Intrinsic::r600_read_tidig_y:
599  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
600  AMDGPU::T0_Y, VT);
601  case Intrinsic::r600_read_tidig_z:
602  return CreateLiveInRegisterRaw(DAG, &AMDGPU::R600_TReg32RegClass,
603  AMDGPU::T0_Z, VT);
604 
605  case Intrinsic::r600_recipsqrt_ieee:
606  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
607 
608  case Intrinsic::r600_recipsqrt_clamped:
609  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
610  default:
611  return Op;
612  }
613 
614  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
615  break;
616  }
617  } // end switch(Op.getOpcode())
618  return SDValue();
619 }
620 
623  SelectionDAG &DAG) const {
624  switch (N->getOpcode()) {
625  default:
627  return;
628  case ISD::FP_TO_UINT:
629  if (N->getValueType(0) == MVT::i1) {
630  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
631  return;
632  }
633  // Since we don't care about out of bounds values we can use FP_TO_SINT for
634  // uints too. The DAGLegalizer code for uint considers some extra cases
635  // which are not necessary here.
637  case ISD::FP_TO_SINT: {
638  if (N->getValueType(0) == MVT::i1) {
639  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
640  return;
641  }
642 
643  SDValue Result;
644  if (expandFP_TO_SINT(N, Result, DAG))
645  Results.push_back(Result);
646  return;
647  }
648  case ISD::SDIVREM: {
649  SDValue Op = SDValue(N, 1);
650  SDValue RES = LowerSDIVREM(Op, DAG);
651  Results.push_back(RES);
652  Results.push_back(RES.getValue(1));
653  break;
654  }
655  case ISD::UDIVREM: {
656  SDValue Op = SDValue(N, 0);
657  LowerUDIVREM64(Op, DAG, Results);
658  break;
659  }
660  }
661 }
662 
663 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
664  SDValue Vector) const {
665  SDLoc DL(Vector);
666  EVT VecVT = Vector.getValueType();
667  EVT EltVT = VecVT.getVectorElementType();
669 
670  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
671  Args.push_back(DAG.getNode(
672  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
673  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
674  }
675 
676  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
677 }
678 
679 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
680  SelectionDAG &DAG) const {
681  SDLoc DL(Op);
682  SDValue Vector = Op.getOperand(0);
683  SDValue Index = Op.getOperand(1);
684 
685  if (isa<ConstantSDNode>(Index) ||
687  return Op;
688 
689  Vector = vectorToVerticalVector(DAG, Vector);
690  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
691  Vector, Index);
692 }
693 
694 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
695  SelectionDAG &DAG) const {
696  SDLoc DL(Op);
697  SDValue Vector = Op.getOperand(0);
698  SDValue Value = Op.getOperand(1);
699  SDValue Index = Op.getOperand(2);
700 
701  if (isa<ConstantSDNode>(Index) ||
703  return Op;
704 
705  Vector = vectorToVerticalVector(DAG, Vector);
706  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
707  Vector, Value, Index);
708  return vectorToVerticalVector(DAG, Insert);
709 }
710 
711 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
712  SDValue Op,
713  SelectionDAG &DAG) const {
714  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
716  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
717 
718  const DataLayout &DL = DAG.getDataLayout();
719  const GlobalValue *GV = GSD->getGlobal();
720  MVT ConstPtrVT = getPointerTy(DL, AMDGPUASI.CONSTANT_ADDRESS);
721 
722  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
723  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
724 }
725 
726 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
727  // On hw >= R700, COS/SIN input must be between -1. and 1.
728  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
729  EVT VT = Op.getValueType();
730  SDValue Arg = Op.getOperand(0);
731  SDLoc DL(Op);
732 
733  // TODO: Should this propagate fast-math-flags?
734  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
735  DAG.getNode(ISD::FADD, DL, VT,
736  DAG.getNode(ISD::FMUL, DL, VT, Arg,
737  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
738  DAG.getConstantFP(0.5, DL, MVT::f32)));
739  unsigned TrigNode;
740  switch (Op.getOpcode()) {
741  case ISD::FCOS:
742  TrigNode = AMDGPUISD::COS_HW;
743  break;
744  case ISD::FSIN:
745  TrigNode = AMDGPUISD::SIN_HW;
746  break;
747  default:
748  llvm_unreachable("Wrong trig opcode");
749  }
750  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
751  DAG.getNode(ISD::FADD, DL, VT, FractPart,
752  DAG.getConstantFP(-0.5, DL, MVT::f32)));
753  if (Gen >= R600Subtarget::R700)
754  return TrigVal;
755  // On R600 hw, COS/SIN input must be between -Pi and Pi.
756  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
757  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
758 }
759 
760 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
761  SDLoc DL(Op);
762  EVT VT = Op.getValueType();
763 
764  SDValue Lo = Op.getOperand(0);
765  SDValue Hi = Op.getOperand(1);
766  SDValue Shift = Op.getOperand(2);
767  SDValue Zero = DAG.getConstant(0, DL, VT);
768  SDValue One = DAG.getConstant(1, DL, VT);
769 
770  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
771  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
772  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
773  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
774 
775  // The dance around Width1 is necessary for 0 special case.
776  // Without it the CompShift might be 32, producing incorrect results in
777  // Overflow. So we do the shift in two steps, the alternative is to
778  // add a conditional to filter the special case.
779 
780  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
781  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
782 
783  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
784  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
785  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
786 
787  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
788  SDValue LoBig = Zero;
789 
790  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
791  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
792 
793  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
794 }
795 
796 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
797  SDLoc DL(Op);
798  EVT VT = Op.getValueType();
799 
800  SDValue Lo = Op.getOperand(0);
801  SDValue Hi = Op.getOperand(1);
802  SDValue Shift = Op.getOperand(2);
803  SDValue Zero = DAG.getConstant(0, DL, VT);
804  SDValue One = DAG.getConstant(1, DL, VT);
805 
806  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
807 
808  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
809  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
810  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
811  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
812 
813  // The dance around Width1 is necessary for 0 special case.
814  // Without it the CompShift might be 32, producing incorrect results in
815  // Overflow. So we do the shift in two steps, the alternative is to
816  // add a conditional to filter the special case.
817 
818  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
819  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
820 
821  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
822  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
823  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
824 
825  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
826  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
827 
828  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
829  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
830 
831  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
832 }
833 
834 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
835  unsigned mainop, unsigned ovf) const {
836  SDLoc DL(Op);
837  EVT VT = Op.getValueType();
838 
839  SDValue Lo = Op.getOperand(0);
840  SDValue Hi = Op.getOperand(1);
841 
842  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
843  // Extend sign.
844  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
845  DAG.getValueType(MVT::i1));
846 
847  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
848 
849  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
850 }
851 
852 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
853  SDLoc DL(Op);
854  return DAG.getNode(
855  ISD::SETCC,
856  DL,
857  MVT::i1,
858  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
859  DAG.getCondCode(ISD::SETEQ));
860 }
861 
862 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
863  SDLoc DL(Op);
864  return DAG.getNode(
865  ISD::SETCC,
866  DL,
867  MVT::i1,
868  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
869  DAG.getCondCode(ISD::SETEQ));
870 }
871 
872 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
873  const SDLoc &DL,
874  unsigned DwordOffset) const {
875  unsigned ByteOffset = DwordOffset * 4;
876  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
878 
879  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
880  assert(isInt<16>(ByteOffset));
881 
882  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
883  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
885 }
886 
887 bool R600TargetLowering::isZero(SDValue Op) const {
888  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
889  return Cst->isNullValue();
890  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
891  return CstFP->isZero();
892  } else {
893  return false;
894  }
895 }
896 
897 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
898  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
899  return CFP->isExactlyValue(1.0);
900  }
901  return isAllOnesConstant(Op);
902 }
903 
904 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
905  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
906  return CFP->getValueAPF().isZero();
907  }
908  return isNullConstant(Op);
909 }
910 
911 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
912  SDLoc DL(Op);
913  EVT VT = Op.getValueType();
914 
915  SDValue LHS = Op.getOperand(0);
916  SDValue RHS = Op.getOperand(1);
917  SDValue True = Op.getOperand(2);
918  SDValue False = Op.getOperand(3);
919  SDValue CC = Op.getOperand(4);
920  SDValue Temp;
921 
922  if (VT == MVT::f32) {
923  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
924  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
925  if (MinMax)
926  return MinMax;
927  }
928 
929  // LHS and RHS are guaranteed to be the same value type
930  EVT CompareVT = LHS.getValueType();
931 
932  // Check if we can lower this to a native operation.
933 
934  // Try to lower to a SET* instruction:
935  //
936  // SET* can match the following patterns:
937  //
938  // select_cc f32, f32, -1, 0, cc_supported
939  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
940  // select_cc i32, i32, -1, 0, cc_supported
941  //
942 
943  // Move hardware True/False values to the correct operand.
944  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
945  ISD::CondCode InverseCC =
946  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
947  if (isHWTrueValue(False) && isHWFalseValue(True)) {
948  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
949  std::swap(False, True);
950  CC = DAG.getCondCode(InverseCC);
951  } else {
952  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
953  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
954  std::swap(False, True);
955  std::swap(LHS, RHS);
956  CC = DAG.getCondCode(SwapInvCC);
957  }
958  }
959  }
960 
961  if (isHWTrueValue(True) && isHWFalseValue(False) &&
962  (CompareVT == VT || VT == MVT::i32)) {
963  // This can be matched by a SET* instruction.
964  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
965  }
966 
967  // Try to lower to a CND* instruction:
968  //
969  // CND* can match the following patterns:
970  //
971  // select_cc f32, 0.0, f32, f32, cc_supported
972  // select_cc f32, 0.0, i32, i32, cc_supported
973  // select_cc i32, 0, f32, f32, cc_supported
974  // select_cc i32, 0, i32, i32, cc_supported
975  //
976 
977  // Try to move the zero value to the RHS
978  if (isZero(LHS)) {
979  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
980  // Try swapping the operands
981  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
982  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
983  std::swap(LHS, RHS);
984  CC = DAG.getCondCode(CCSwapped);
985  } else {
986  // Try inverting the conditon and then swapping the operands
987  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
988  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
989  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
990  std::swap(True, False);
991  std::swap(LHS, RHS);
992  CC = DAG.getCondCode(CCSwapped);
993  }
994  }
995  }
996  if (isZero(RHS)) {
997  SDValue Cond = LHS;
998  SDValue Zero = RHS;
999  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1000  if (CompareVT != VT) {
1001  // Bitcast True / False to the correct types. This will end up being
1002  // a nop, but it allows us to define only a single pattern in the
1003  // .TD files for each CND* instruction rather than having to have
1004  // one pattern for integer True/False and one for fp True/False
1005  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1006  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1007  }
1008 
1009  switch (CCOpcode) {
1010  case ISD::SETONE:
1011  case ISD::SETUNE:
1012  case ISD::SETNE:
1013  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1014  Temp = True;
1015  True = False;
1016  False = Temp;
1017  break;
1018  default:
1019  break;
1020  }
1021  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1022  Cond, Zero,
1023  True, False,
1024  DAG.getCondCode(CCOpcode));
1025  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1026  }
1027 
1028  // If we make it this for it means we have no native instructions to handle
1029  // this SELECT_CC, so we must lower it.
1030  SDValue HWTrue, HWFalse;
1031 
1032  if (CompareVT == MVT::f32) {
1033  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1034  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1035  } else if (CompareVT == MVT::i32) {
1036  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1037  HWFalse = DAG.getConstant(0, DL, CompareVT);
1038  }
1039  else {
1040  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1041  }
1042 
1043  // Lower this unsupported SELECT_CC into a combination of two supported
1044  // SELECT_CC operations.
1045  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1046 
1047  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1048  Cond, HWFalse,
1049  True, False,
1050  DAG.getCondCode(ISD::SETNE));
1051 }
1052 
1053 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1054 /// convert these pointers to a register index. Each register holds
1055 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1056 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1057 /// for indirect addressing.
1058 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1059  unsigned StackWidth,
1060  SelectionDAG &DAG) const {
1061  unsigned SRLPad;
1062  switch(StackWidth) {
1063  case 1:
1064  SRLPad = 2;
1065  break;
1066  case 2:
1067  SRLPad = 3;
1068  break;
1069  case 4:
1070  SRLPad = 4;
1071  break;
1072  default: llvm_unreachable("Invalid stack width");
1073  }
1074 
1075  SDLoc DL(Ptr);
1076  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1077  DAG.getConstant(SRLPad, DL, MVT::i32));
1078 }
1079 
1080 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1081  unsigned ElemIdx,
1082  unsigned &Channel,
1083  unsigned &PtrIncr) const {
1084  switch (StackWidth) {
1085  default:
1086  case 1:
1087  Channel = 0;
1088  if (ElemIdx > 0) {
1089  PtrIncr = 1;
1090  } else {
1091  PtrIncr = 0;
1092  }
1093  break;
1094  case 2:
1095  Channel = ElemIdx % 2;
1096  if (ElemIdx == 2) {
1097  PtrIncr = 1;
1098  } else {
1099  PtrIncr = 0;
1100  }
1101  break;
1102  case 4:
1103  Channel = ElemIdx;
1104  PtrIncr = 0;
1105  break;
1106  }
1107 }
1108 
1109 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1110  SelectionDAG &DAG) const {
1111  SDLoc DL(Store);
1112  //TODO: Who creates the i8 stores?
1113  assert(Store->isTruncatingStore()
1114  || Store->getValue().getValueType() == MVT::i8);
1116 
1117  SDValue Mask;
1118  if (Store->getMemoryVT() == MVT::i8) {
1119  assert(Store->getAlignment() >= 1);
1120  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1121  } else if (Store->getMemoryVT() == MVT::i16) {
1122  assert(Store->getAlignment() >= 2);
1123  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1124  } else {
1125  llvm_unreachable("Unsupported private trunc store");
1126  }
1127 
1128  SDValue OldChain = Store->getChain();
1129  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1130  // Skip dummy
1131  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1132  SDValue BasePtr = Store->getBasePtr();
1133  SDValue Offset = Store->getOffset();
1134  EVT MemVT = Store->getMemoryVT();
1135 
1136  SDValue LoadPtr = BasePtr;
1137  if (!Offset.isUndef()) {
1138  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1139  }
1140 
1141  // Get dword location
1142  // TODO: this should be eliminated by the future SHR ptr, 2
1143  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1144  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1145 
1146  // Load dword
1147  // TODO: can we be smarter about machine pointer info?
1150  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1151 
1152  Chain = Dst.getValue(1);
1153 
1154  // Get offset in dword
1155  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1156  DAG.getConstant(0x3, DL, MVT::i32));
1157 
1158  // Convert byte offset to bit shift
1159  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1160  DAG.getConstant(3, DL, MVT::i32));
1161 
1162  // TODO: Contrary to the name of the functiom,
1163  // it also handles sub i32 non-truncating stores (like i1)
1164  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1165  Store->getValue());
1166 
1167  // Mask the value to the right type
1168  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1169 
1170  // Shift the value in place
1171  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1172  MaskedValue, ShiftAmt);
1173 
1174  // Shift the mask in place
1175  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1176 
1177  // Invert the mask. NOTE: if we had native ROL instructions we could
1178  // use inverted mask
1179  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1180 
1181  // Cleanup the target bits
1182  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1183 
1184  // Add the new bits
1185  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1186 
1187  // Store dword
1188  // TODO: Can we be smarter about MachinePointerInfo?
1189  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1190 
1191  // If we are part of expanded vector, make our neighbors depend on this store
1192  if (VectorTrunc) {
1193  // Make all other vector elements depend on this store
1194  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1195  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1196  }
1197  return NewStore;
1198 }
1199 
1200 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1201  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1202  unsigned AS = StoreNode->getAddressSpace();
1203 
1204  SDValue Chain = StoreNode->getChain();
1205  SDValue Ptr = StoreNode->getBasePtr();
1206  SDValue Value = StoreNode->getValue();
1207 
1208  EVT VT = Value.getValueType();
1209  EVT MemVT = StoreNode->getMemoryVT();
1210  EVT PtrVT = Ptr.getValueType();
1211 
1212  SDLoc DL(Op);
1213 
1214  // Neither LOCAL nor PRIVATE can do vectors at the moment
1215  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS) &&
1216  VT.isVector()) {
1217  if ((AS == AMDGPUASI.PRIVATE_ADDRESS) &&
1218  StoreNode->isTruncatingStore()) {
1219  // Add an extra level of chain to isolate this vector
1220  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1221  // TODO: can the chain be replaced without creating a new store?
1222  SDValue NewStore = DAG.getTruncStore(
1223  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1224  MemVT, StoreNode->getAlignment(),
1225  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1226  StoreNode = cast<StoreSDNode>(NewStore);
1227  }
1228 
1229  return scalarizeVectorStore(StoreNode, DAG);
1230  }
1231 
1232  unsigned Align = StoreNode->getAlignment();
1233  if (Align < MemVT.getStoreSize() &&
1234  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1235  return expandUnalignedStore(StoreNode, DAG);
1236  }
1237 
1238  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1239  DAG.getConstant(2, DL, PtrVT));
1240 
1241  if (AS == AMDGPUASI.GLOBAL_ADDRESS) {
1242  // It is beneficial to create MSKOR here instead of combiner to avoid
1243  // artificial dependencies introduced by RMW
1244  if (StoreNode->isTruncatingStore()) {
1245  assert(VT.bitsLE(MVT::i32));
1246  SDValue MaskConstant;
1247  if (MemVT == MVT::i8) {
1248  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1249  } else {
1250  assert(MemVT == MVT::i16);
1251  assert(StoreNode->getAlignment() >= 2);
1252  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1253  }
1254 
1255  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1256  DAG.getConstant(0x00000003, DL, PtrVT));
1257  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1258  DAG.getConstant(3, DL, VT));
1259 
1260  // Put the mask in correct place
1261  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1262 
1263  // Put the value bits in correct place
1264  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1265  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1266 
1267  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1268  // vector instead.
1269  SDValue Src[4] = {
1270  ShiftedValue,
1271  DAG.getConstant(0, DL, MVT::i32),
1272  DAG.getConstant(0, DL, MVT::i32),
1273  Mask
1274  };
1275  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1276  SDValue Args[3] = { Chain, Input, DWordAddr };
1278  Op->getVTList(), Args, MemVT,
1279  StoreNode->getMemOperand());
1280  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1281  // Convert pointer from byte address to dword address.
1282  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1283 
1284  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1285  llvm_unreachable("Truncated and indexed stores not supported yet");
1286  } else {
1287  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1288  }
1289  return Chain;
1290  }
1291  }
1292 
1293  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1294  if (AS != AMDGPUASI.PRIVATE_ADDRESS)
1295  return SDValue();
1296 
1297  if (MemVT.bitsLT(MVT::i32))
1298  return lowerPrivateTruncStore(StoreNode, DAG);
1299 
1300  // Standard i32+ store, tag it with DWORDADDR to note that the address
1301  // has been shifted
1302  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1303  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1304  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1305  }
1306 
1307  // Tagged i32+ stores will be matched by patterns
1308  return SDValue();
1309 }
1310 
1311 // return (512 + (kc_bank << 12)
1312 static int
1314  switch (AddressSpace) {
1316  return 512;
1318  return 512 + 4096;
1320  return 512 + 4096 * 2;
1322  return 512 + 4096 * 3;
1324  return 512 + 4096 * 4;
1326  return 512 + 4096 * 5;
1328  return 512 + 4096 * 6;
1330  return 512 + 4096 * 7;
1332  return 512 + 4096 * 8;
1334  return 512 + 4096 * 9;
1336  return 512 + 4096 * 10;
1338  return 512 + 4096 * 11;
1340  return 512 + 4096 * 12;
1342  return 512 + 4096 * 13;
1344  return 512 + 4096 * 14;
1346  return 512 + 4096 * 15;
1347  default:
1348  return -1;
1349  }
1350 }
1351 
1352 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1353  SelectionDAG &DAG) const {
1354  SDLoc DL(Op);
1355  LoadSDNode *Load = cast<LoadSDNode>(Op);
1356  ISD::LoadExtType ExtType = Load->getExtensionType();
1357  EVT MemVT = Load->getMemoryVT();
1358  assert(Load->getAlignment() >= MemVT.getStoreSize());
1359 
1360  SDValue BasePtr = Load->getBasePtr();
1361  SDValue Chain = Load->getChain();
1362  SDValue Offset = Load->getOffset();
1363 
1364  SDValue LoadPtr = BasePtr;
1365  if (!Offset.isUndef()) {
1366  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1367  }
1368 
1369  // Get dword location
1370  // NOTE: this should be eliminated by the future SHR ptr, 2
1371  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1372  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1373 
1374  // Load dword
1375  // TODO: can we be smarter about machine pointer info?
1378  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1379 
1380  // Get offset within the register.
1381  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1382  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1383 
1384  // Bit offset of target byte (byteIdx * 8).
1385  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1386  DAG.getConstant(3, DL, MVT::i32));
1387 
1388  // Shift to the right.
1389  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1390 
1391  // Eliminate the upper bits by setting them to ...
1392  EVT MemEltVT = MemVT.getScalarType();
1393 
1394  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1395  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1396  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1397  } else { // ... or zeros.
1398  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1399  }
1400 
1401  SDValue Ops[] = {
1402  Ret,
1403  Read.getValue(1) // This should be our output chain
1404  };
1405 
1406  return DAG.getMergeValues(Ops, DL);
1407 }
1408 
1409 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1410  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1411  unsigned AS = LoadNode->getAddressSpace();
1412  EVT MemVT = LoadNode->getMemoryVT();
1413  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1414 
1415  if (AS == AMDGPUASI.PRIVATE_ADDRESS &&
1416  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1417  return lowerPrivateExtLoad(Op, DAG);
1418  }
1419 
1420  SDLoc DL(Op);
1421  EVT VT = Op.getValueType();
1422  SDValue Chain = LoadNode->getChain();
1423  SDValue Ptr = LoadNode->getBasePtr();
1424 
1425  if ((LoadNode->getAddressSpace() == AMDGPUASI.LOCAL_ADDRESS ||
1426  LoadNode->getAddressSpace() == AMDGPUASI.PRIVATE_ADDRESS) &&
1427  VT.isVector()) {
1428  return scalarizeVectorLoad(LoadNode, DAG);
1429  }
1430 
1431  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1432  if (ConstantBlock > -1 &&
1433  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1434  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1435  SDValue Result;
1436  if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1437  isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1438  isa<ConstantSDNode>(Ptr)) {
1439  SDValue Slots[4];
1440  for (unsigned i = 0; i < 4; i++) {
1441  // We want Const position encoded with the following formula :
1442  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1443  // const_index is Ptr computed by llvm using an alignment of 16.
1444  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1445  // then div by 4 at the ISel step
1446  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1447  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1448  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1449  }
1450  EVT NewVT = MVT::v4i32;
1451  unsigned NumElements = 4;
1452  if (VT.isVector()) {
1453  NewVT = VT;
1454  NumElements = VT.getVectorNumElements();
1455  }
1456  Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1457  } else {
1458  // non-constant ptr can't be folded, keeps it as a v4f32 load
1459  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1460  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1461  DAG.getConstant(4, DL, MVT::i32)),
1462  DAG.getConstant(LoadNode->getAddressSpace() -
1464  );
1465  }
1466 
1467  if (!VT.isVector()) {
1468  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1469  DAG.getConstant(0, DL, MVT::i32));
1470  }
1471 
1472  SDValue MergedValues[2] = {
1473  Result,
1474  Chain
1475  };
1476  return DAG.getMergeValues(MergedValues, DL);
1477  }
1478 
1479  // For most operations returning SDValue() will result in the node being
1480  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1481  // need to manually expand loads that may be legal in some address spaces and
1482  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1483  // compute shaders, since the data is sign extended when it is uploaded to the
1484  // buffer. However SEXT loads from other address spaces are not supported, so
1485  // we need to expand them here.
1486  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1487  EVT MemVT = LoadNode->getMemoryVT();
1488  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1489  SDValue NewLoad = DAG.getExtLoad(
1490  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1491  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1492  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1493  DAG.getValueType(MemVT));
1494 
1495  SDValue MergedValues[2] = { Res, Chain };
1496  return DAG.getMergeValues(MergedValues, DL);
1497  }
1498 
1499  if (LoadNode->getAddressSpace() != AMDGPUASI.PRIVATE_ADDRESS) {
1500  return SDValue();
1501  }
1502 
1503  // DWORDADDR ISD marks already shifted address
1504  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1505  assert(VT == MVT::i32);
1506  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1507  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1508  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1509  }
1510  return SDValue();
1511 }
1512 
1513 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1514  SDValue Chain = Op.getOperand(0);
1515  SDValue Cond = Op.getOperand(1);
1516  SDValue Jump = Op.getOperand(2);
1517 
1518  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1519  Chain, Jump, Cond);
1520 }
1521 
1522 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1523  SelectionDAG &DAG) const {
1524  MachineFunction &MF = DAG.getMachineFunction();
1526 
1527  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1528 
1529  unsigned FrameIndex = FIN->getIndex();
1530  unsigned IgnoredFrameReg;
1531  unsigned Offset =
1532  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1533  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1534  Op.getValueType());
1535 }
1536 
1537 /// XXX Only kernel functions are supported, so we can assume for now that
1538 /// every function is a kernel function, but in the future we should use
1539 /// separate calling conventions for kernel and non-kernel functions.
1541  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1542  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1543  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1545  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1546  *DAG.getContext());
1547  MachineFunction &MF = DAG.getMachineFunction();
1548  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
1549 
1551 
1552  if (AMDGPU::isShader(CallConv)) {
1553  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1554  } else {
1555  analyzeFormalArgumentsCompute(CCInfo, Ins);
1556  }
1557 
1558  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1559  CCValAssign &VA = ArgLocs[i];
1560  const ISD::InputArg &In = Ins[i];
1561  EVT VT = In.VT;
1562  EVT MemVT = VA.getLocVT();
1563  if (!VT.isVector() && MemVT.isVector()) {
1564  // Get load source type if scalarized.
1565  MemVT = MemVT.getVectorElementType();
1566  }
1567 
1568  if (AMDGPU::isShader(CallConv)) {
1569  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1570  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1571  InVals.push_back(Register);
1572  continue;
1573  }
1574 
1577 
1578  // i64 isn't a legal type, so the register type used ends up as i32, which
1579  // isn't expected here. It attempts to create this sextload, but it ends up
1580  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1581  // for <1 x i64>.
1582 
1583  // The first 36 bytes of the input buffer contains information about
1584  // thread group and global sizes.
1586  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1587  // FIXME: This should really check the extload type, but the handling of
1588  // extload vector parameters seems to be broken.
1589 
1590  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1591  Ext = ISD::SEXTLOAD;
1592  }
1593 
1594  // Compute the offset from the value.
1595  // XXX - I think PartOffset should give you this, but it seems to give the
1596  // size of the register which isn't useful.
1597 
1598  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1599  unsigned PartOffset = VA.getLocMemOffset();
1601 
1602  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1603  SDValue Arg = DAG.getLoad(
1604  ISD::UNINDEXED, Ext, VT, DL, Chain,
1605  DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
1606  MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
1609 
1610  // 4 is the preferred alignment for the CONSTANT memory space.
1611  InVals.push_back(Arg);
1612  MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
1613  }
1614  return Chain;
1615 }
1616 
1618  EVT VT) const {
1619  if (!VT.isVector())
1620  return MVT::i32;
1622 }
1623 
1625  const SelectionDAG &DAG) const {
1626  // Local and Private addresses do not handle vectors. Limit to i32
1627  if ((AS == AMDGPUASI.LOCAL_ADDRESS || AS == AMDGPUASI.PRIVATE_ADDRESS)) {
1628  return (MemVT.getSizeInBits() <= 32);
1629  }
1630  return true;
1631 }
1632 
1634  unsigned AddrSpace,
1635  unsigned Align,
1636  bool *IsFast) const {
1637  if (IsFast)
1638  *IsFast = false;
1639 
1640  if (!VT.isSimple() || VT == MVT::Other)
1641  return false;
1642 
1643  if (VT.bitsLT(MVT::i32))
1644  return false;
1645 
1646  // TODO: This is a rough estimate.
1647  if (IsFast)
1648  *IsFast = true;
1649 
1650  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1651 }
1652 
1654  SelectionDAG &DAG, SDValue VectorEntry,
1655  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1656  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1657  assert(RemapSwizzle.empty());
1658  SDValue NewBldVec[4] = {
1659  VectorEntry.getOperand(0),
1660  VectorEntry.getOperand(1),
1661  VectorEntry.getOperand(2),
1662  VectorEntry.getOperand(3)
1663  };
1664 
1665  for (unsigned i = 0; i < 4; i++) {
1666  if (NewBldVec[i].isUndef())
1667  // We mask write here to teach later passes that the ith element of this
1668  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1669  // break false dependencies and additionnaly make assembly easier to read.
1670  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1671  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1672  if (C->isZero()) {
1673  RemapSwizzle[i] = 4; // SEL_0
1674  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1675  } else if (C->isExactlyValue(1.0)) {
1676  RemapSwizzle[i] = 5; // SEL_1
1677  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1678  }
1679  }
1680 
1681  if (NewBldVec[i].isUndef())
1682  continue;
1683  for (unsigned j = 0; j < i; j++) {
1684  if (NewBldVec[i] == NewBldVec[j]) {
1685  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1686  RemapSwizzle[i] = j;
1687  break;
1688  }
1689  }
1690  }
1691 
1692  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1693  NewBldVec);
1694 }
1695 
1697  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1698  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1699  assert(RemapSwizzle.empty());
1700  SDValue NewBldVec[4] = {
1701  VectorEntry.getOperand(0),
1702  VectorEntry.getOperand(1),
1703  VectorEntry.getOperand(2),
1704  VectorEntry.getOperand(3)
1705  };
1706  bool isUnmovable[4] = { false, false, false, false };
1707  for (unsigned i = 0; i < 4; i++) {
1708  RemapSwizzle[i] = i;
1709  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1710  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1711  ->getZExtValue();
1712  if (i == Idx)
1713  isUnmovable[Idx] = true;
1714  }
1715  }
1716 
1717  for (unsigned i = 0; i < 4; i++) {
1718  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1719  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1720  ->getZExtValue();
1721  if (isUnmovable[Idx])
1722  continue;
1723  // Swap i and Idx
1724  std::swap(NewBldVec[Idx], NewBldVec[i]);
1725  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1726  break;
1727  }
1728  }
1729 
1730  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1731  NewBldVec);
1732 }
1733 
1734 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1735  SelectionDAG &DAG,
1736  const SDLoc &DL) const {
1737  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1738  // Old -> New swizzle values
1739  DenseMap<unsigned, unsigned> SwizzleRemap;
1740 
1741  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1742  for (unsigned i = 0; i < 4; i++) {
1743  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1744  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1745  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1746  }
1747 
1748  SwizzleRemap.clear();
1749  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1750  for (unsigned i = 0; i < 4; i++) {
1751  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1752  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1753  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1754  }
1755 
1756  return BuildVector;
1757 }
1758 
1759 //===----------------------------------------------------------------------===//
1760 // Custom DAG Optimizations
1761 //===----------------------------------------------------------------------===//
1762 
1764  DAGCombinerInfo &DCI) const {
1765  SelectionDAG &DAG = DCI.DAG;
1766  SDLoc DL(N);
1767 
1768  switch (N->getOpcode()) {
1769  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1770  case ISD::FP_ROUND: {
1771  SDValue Arg = N->getOperand(0);
1772  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1773  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1774  Arg.getOperand(0));
1775  }
1776  break;
1777  }
1778 
1779  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1780  // (i32 select_cc f32, f32, -1, 0 cc)
1781  //
1782  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1783  // this to one of the SET*_DX10 instructions.
1784  case ISD::FP_TO_SINT: {
1785  SDValue FNeg = N->getOperand(0);
1786  if (FNeg.getOpcode() != ISD::FNEG) {
1787  return SDValue();
1788  }
1789  SDValue SelectCC = FNeg.getOperand(0);
1790  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1791  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1792  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1793  !isHWTrueValue(SelectCC.getOperand(2)) ||
1794  !isHWFalseValue(SelectCC.getOperand(3))) {
1795  return SDValue();
1796  }
1797 
1798  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1799  SelectCC.getOperand(0), // LHS
1800  SelectCC.getOperand(1), // RHS
1801  DAG.getConstant(-1, DL, MVT::i32), // True
1802  DAG.getConstant(0, DL, MVT::i32), // False
1803  SelectCC.getOperand(4)); // CC
1804 
1805  break;
1806  }
1807 
1808  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1809  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1810  case ISD::INSERT_VECTOR_ELT: {
1811  SDValue InVec = N->getOperand(0);
1812  SDValue InVal = N->getOperand(1);
1813  SDValue EltNo = N->getOperand(2);
1814 
1815  // If the inserted element is an UNDEF, just use the input vector.
1816  if (InVal.isUndef())
1817  return InVec;
1818 
1819  EVT VT = InVec.getValueType();
1820 
1821  // If we can't generate a legal BUILD_VECTOR, exit
1823  return SDValue();
1824 
1825  // Check that we know which element is being inserted
1826  if (!isa<ConstantSDNode>(EltNo))
1827  return SDValue();
1828  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1829 
1830  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1831  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1832  // vector elements.
1834  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1835  Ops.append(InVec.getNode()->op_begin(),
1836  InVec.getNode()->op_end());
1837  } else if (InVec.isUndef()) {
1838  unsigned NElts = VT.getVectorNumElements();
1839  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1840  } else {
1841  return SDValue();
1842  }
1843 
1844  // Insert the element
1845  if (Elt < Ops.size()) {
1846  // All the operands of BUILD_VECTOR must have the same type;
1847  // we enforce that here.
1848  EVT OpVT = Ops[0].getValueType();
1849  if (InVal.getValueType() != OpVT)
1850  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1851  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1852  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1853  Ops[Elt] = InVal;
1854  }
1855 
1856  // Return the new vector
1857  return DAG.getBuildVector(VT, DL, Ops);
1858  }
1859 
1860  // Extract_vec (Build_vector) generated by custom lowering
1861  // also needs to be customly combined
1862  case ISD::EXTRACT_VECTOR_ELT: {
1863  SDValue Arg = N->getOperand(0);
1864  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1865  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1866  unsigned Element = Const->getZExtValue();
1867  return Arg->getOperand(Element);
1868  }
1869  }
1870  if (Arg.getOpcode() == ISD::BITCAST &&
1871  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1874  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1875  unsigned Element = Const->getZExtValue();
1876  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1877  Arg->getOperand(0).getOperand(Element));
1878  }
1879  }
1880  break;
1881  }
1882 
1883  case ISD::SELECT_CC: {
1884  // Try common optimizations
1886  return Ret;
1887 
1888  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1889  // selectcc x, y, a, b, inv(cc)
1890  //
1891  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1892  // selectcc x, y, a, b, cc
1893  SDValue LHS = N->getOperand(0);
1894  if (LHS.getOpcode() != ISD::SELECT_CC) {
1895  return SDValue();
1896  }
1897 
1898  SDValue RHS = N->getOperand(1);
1899  SDValue True = N->getOperand(2);
1900  SDValue False = N->getOperand(3);
1901  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1902 
1903  if (LHS.getOperand(2).getNode() != True.getNode() ||
1904  LHS.getOperand(3).getNode() != False.getNode() ||
1905  RHS.getNode() != False.getNode()) {
1906  return SDValue();
1907  }
1908 
1909  switch (NCC) {
1910  default: return SDValue();
1911  case ISD::SETNE: return LHS;
1912  case ISD::SETEQ: {
1913  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1914  LHSCC = ISD::getSetCCInverse(LHSCC,
1915  LHS.getOperand(0).getValueType().isInteger());
1916  if (DCI.isBeforeLegalizeOps() ||
1918  return DAG.getSelectCC(DL,
1919  LHS.getOperand(0),
1920  LHS.getOperand(1),
1921  LHS.getOperand(2),
1922  LHS.getOperand(3),
1923  LHSCC);
1924  break;
1925  }
1926  }
1927  return SDValue();
1928  }
1929 
1930  case AMDGPUISD::R600_EXPORT: {
1931  SDValue Arg = N->getOperand(1);
1932  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1933  break;
1934 
1935  SDValue NewArgs[8] = {
1936  N->getOperand(0), // Chain
1937  SDValue(),
1938  N->getOperand(2), // ArrayBase
1939  N->getOperand(3), // Type
1940  N->getOperand(4), // SWZ_X
1941  N->getOperand(5), // SWZ_Y
1942  N->getOperand(6), // SWZ_Z
1943  N->getOperand(7) // SWZ_W
1944  };
1945  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1946  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1947  }
1948  case AMDGPUISD::TEXTURE_FETCH: {
1949  SDValue Arg = N->getOperand(1);
1950  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1951  break;
1952 
1953  SDValue NewArgs[19] = {
1954  N->getOperand(0),
1955  N->getOperand(1),
1956  N->getOperand(2),
1957  N->getOperand(3),
1958  N->getOperand(4),
1959  N->getOperand(5),
1960  N->getOperand(6),
1961  N->getOperand(7),
1962  N->getOperand(8),
1963  N->getOperand(9),
1964  N->getOperand(10),
1965  N->getOperand(11),
1966  N->getOperand(12),
1967  N->getOperand(13),
1968  N->getOperand(14),
1969  N->getOperand(15),
1970  N->getOperand(16),
1971  N->getOperand(17),
1972  N->getOperand(18),
1973  };
1974  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1975  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1976  }
1977  default: break;
1978  }
1979 
1981 }
1982 
1983 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1984  SDValue &Src, SDValue &Neg, SDValue &Abs,
1985  SDValue &Sel, SDValue &Imm,
1986  SelectionDAG &DAG) const {
1988  if (!Src.isMachineOpcode())
1989  return false;
1990 
1991  switch (Src.getMachineOpcode()) {
1992  case AMDGPU::FNEG_R600:
1993  if (!Neg.getNode())
1994  return false;
1995  Src = Src.getOperand(0);
1996  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1997  return true;
1998  case AMDGPU::FABS_R600:
1999  if (!Abs.getNode())
2000  return false;
2001  Src = Src.getOperand(0);
2002  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2003  return true;
2004  case AMDGPU::CONST_COPY: {
2005  unsigned Opcode = ParentNode->getMachineOpcode();
2006  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2007 
2008  if (!Sel.getNode())
2009  return false;
2010 
2011  SDValue CstOffset = Src.getOperand(0);
2012  if (ParentNode->getValueType(0).isVector())
2013  return false;
2014 
2015  // Gather constants values
2016  int SrcIndices[] = {
2017  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2018  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2019  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2020  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2021  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2022  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2023  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2024  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2025  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2026  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2027  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2028  };
2029  std::vector<unsigned> Consts;
2030  for (int OtherSrcIdx : SrcIndices) {
2031  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2032  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2033  continue;
2034  if (HasDst) {
2035  OtherSrcIdx--;
2036  OtherSelIdx--;
2037  }
2038  if (RegisterSDNode *Reg =
2039  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2040  if (Reg->getReg() == AMDGPU::ALU_CONST) {
2041  ConstantSDNode *Cst
2042  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2043  Consts.push_back(Cst->getZExtValue());
2044  }
2045  }
2046  }
2047 
2048  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2049  Consts.push_back(Cst->getZExtValue());
2050  if (!TII->fitsConstReadLimitations(Consts)) {
2051  return false;
2052  }
2053 
2054  Sel = CstOffset;
2055  Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2056  return true;
2057  }
2058  case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2059  // Check if the Imm slot is used. Taken from below.
2060  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2061  return false;
2062  Imm = Src.getOperand(0);
2063  Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2064  return true;
2065  case AMDGPU::MOV_IMM_I32:
2066  case AMDGPU::MOV_IMM_F32: {
2067  unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2068  uint64_t ImmValue = 0;
2069 
2070  if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2072  float FloatValue = FPC->getValueAPF().convertToFloat();
2073  if (FloatValue == 0.0) {
2074  ImmReg = AMDGPU::ZERO;
2075  } else if (FloatValue == 0.5) {
2076  ImmReg = AMDGPU::HALF;
2077  } else if (FloatValue == 1.0) {
2078  ImmReg = AMDGPU::ONE;
2079  } else {
2080  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2081  }
2082  } else {
2084  uint64_t Value = C->getZExtValue();
2085  if (Value == 0) {
2086  ImmReg = AMDGPU::ZERO;
2087  } else if (Value == 1) {
2088  ImmReg = AMDGPU::ONE_INT;
2089  } else {
2090  ImmValue = Value;
2091  }
2092  }
2093 
2094  // Check that we aren't already using an immediate.
2095  // XXX: It's possible for an instruction to have more than one
2096  // immediate operand, but this is not supported yet.
2097  if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2098  if (!Imm.getNode())
2099  return false;
2101  assert(C);
2102  if (C->getZExtValue())
2103  return false;
2104  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2105  }
2106  Src = DAG.getRegister(ImmReg, MVT::i32);
2107  return true;
2108  }
2109  default:
2110  return false;
2111  }
2112 }
2113 
2114 /// \brief Fold the instructions after selecting them
2115 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2116  SelectionDAG &DAG) const {
2118  if (!Node->isMachineOpcode())
2119  return Node;
2120 
2121  unsigned Opcode = Node->getMachineOpcode();
2122  SDValue FakeOp;
2123 
2124  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2125 
2126  if (Opcode == AMDGPU::DOT_4) {
2127  int OperandIdx[] = {
2128  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2129  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2130  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2131  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2132  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2133  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2134  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2135  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2136  };
2137  int NegIdx[] = {
2138  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2139  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2140  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2141  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2142  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2143  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2144  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2145  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2146  };
2147  int AbsIdx[] = {
2148  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2149  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2150  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2151  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2152  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2153  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2154  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2155  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2156  };
2157  for (unsigned i = 0; i < 8; i++) {
2158  if (OperandIdx[i] < 0)
2159  return Node;
2160  SDValue &Src = Ops[OperandIdx[i] - 1];
2161  SDValue &Neg = Ops[NegIdx[i] - 1];
2162  SDValue &Abs = Ops[AbsIdx[i] - 1];
2163  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2164  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2165  if (HasDst)
2166  SelIdx--;
2167  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2168  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2169  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2170  }
2171  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2172  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2173  SDValue &Src = Ops[i];
2174  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2175  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2176  }
2177  } else if (Opcode == AMDGPU::CLAMP_R600) {
2178  SDValue Src = Node->getOperand(0);
2179  if (!Src.isMachineOpcode() ||
2180  !TII->hasInstrModifiers(Src.getMachineOpcode()))
2181  return Node;
2182  int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2183  AMDGPU::OpName::clamp);
2184  if (ClampIdx < 0)
2185  return Node;
2186  SDLoc DL(Node);
2187  std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
2188  Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2189  return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2190  Node->getVTList(), Ops);
2191  } else {
2192  if (!TII->hasInstrModifiers(Opcode))
2193  return Node;
2194  int OperandIdx[] = {
2195  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2196  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2197  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2198  };
2199  int NegIdx[] = {
2200  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2201  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2202  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2203  };
2204  int AbsIdx[] = {
2205  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2206  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2207  -1
2208  };
2209  for (unsigned i = 0; i < 3; i++) {
2210  if (OperandIdx[i] < 0)
2211  return Node;
2212  SDValue &Src = Ops[OperandIdx[i] - 1];
2213  SDValue &Neg = Ops[NegIdx[i] - 1];
2214  SDValue FakeAbs;
2215  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2216  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2217  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2218  int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2219  if (HasDst) {
2220  SelIdx--;
2221  ImmIdx--;
2222  }
2223  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2224  SDValue &Imm = Ops[ImmIdx];
2225  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2226  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2227  }
2228  }
2229 
2230  return Node;
2231 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:545
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:512
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:109
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1542
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:617
Compute iterated dominance frontiers using a linear time algorithm.
Definition: AllocatorList.h:24
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:260
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:136
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SDValue & getValue() const
SDVTList getVTList() const
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:228
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:253
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:617
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1098
const SDValue & getChain() const
Function Alias Analysis Results
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:298
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:141
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:404
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:224
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:433
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:451
#define MO_FLAG_ABS
Definition: R600Defines.h:19
Shift and rotation operations.
Definition: ISDOpcodes.h:379
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:293
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:205
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
int getLDSNoRetOp(uint16_t Opcode)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:42
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Reg
All possible values of the reg field in the ModR/M byte.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:304
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:290
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:446
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:388
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:298
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:292
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:385
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
const AMDGPUSubtarget * Subtarget
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:830
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:916
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:558
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
Class to represent pointers.
Definition: DerivedTypes.h:467
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:498
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:302
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:229
const R600FrameLowering * getFrameLowering() const override
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:235
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1306
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:69
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:259
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:273
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:891
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions.
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:307
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:247
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Extended Value Type.
Definition: ValueTypes.h:34
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1320
const AMDGPUAS & AS
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
static CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg)
Selects the correct CCAssignFn for a given CallingConvention value.
unsigned getAddressSpace() const
const APFloat & getValueAPF() const
Definition: Constants.h:294
const R600RegisterInfo * getRegisterInfo() const override
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:314
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:265
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:209
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:864
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:712
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:96
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:611
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Promote Memory to Register
Definition: Mem2Reg.cpp:110
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:923
EVT getMemoryVT() const
Return the type of the in-memory value.
Address space for indirect addressible parameter memory (VTX1)
Definition: AMDGPU.h:230
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:389
Interface for the AMDGPU Implementation of the Intrinsic Info class.
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:241
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:448
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:398
bool isLDSRetInstr(unsigned Opcode) const
Address space for constant memory (VTX2)
Definition: AMDGPU.h:225
amdgpu Simplify well known AMD library false Value Value * Arg
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
const R600Subtarget * getSubtarget() const
Representation of each machine instruction.
Definition: MachineInstr.h:59
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:151
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:362
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:683
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:463
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:581
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:323
Address space for local memory.
Definition: AMDGPU.h:226
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:93
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
void setABIArgOffset(unsigned NewOffset)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:73
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:924
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:235
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:81
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:57
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:20
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:412
APInt bitcastToAPInt() const
Definition: APFloat.h:1094
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
Conversion operators.
Definition: ISDOpcodes.h:442
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:756
unsigned getLocReg() const
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:451
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:126
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:295
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:561
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:752
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
LLVMContext * getContext() const
Definition: SelectionDAG.h:393
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
unsigned PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:216
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:600
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.