LLVM  3.7.0
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600InstrInfo.h"
28 #include "llvm/IR/Argument.h"
29 #include "llvm/IR/Function.h"
30 
31 using namespace llvm;
32 
34  const AMDGPUSubtarget &STI)
35  : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
36  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
37  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
38  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
39  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
40  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
41  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
42 
44 
45  // Set condition code actions
58 
63 
66 
69 
73 
75 
79 
82 
88 
93 
94  // ADD, SUB overflow.
95  // TODO: turn these into Legal?
96  if (Subtarget->hasCARRY())
98 
99  if (Subtarget->hasBORROW())
101 
102  // Expand sign extension of vectors
103  if (!Subtarget->hasBFE())
105 
108 
109  if (!Subtarget->hasBFE())
113 
114  if (!Subtarget->hasBFE())
118 
122 
124 
125 
126  // Legalize loads and stores to the private address space.
130 
131  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
132  // spaces, so it is custom lowered to handle those where it isn't.
133  for (MVT VT : MVT::integer_valuetypes()) {
137 
141 
145  }
146 
153 
157 
162 
167 
173 
174  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
175  // to be Legal/Custom in order to avoid library calls.
179 
181 
182  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
183  for (MVT VT : ScalarIntVTs) {
188  }
189 
191 }
192 
194  MachineInstr * MI, MachineBasicBlock * BB) const {
195  MachineFunction * MF = BB->getParent();
196  MachineRegisterInfo &MRI = MF->getRegInfo();
198  const R600InstrInfo *TII =
199  static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
200 
201  switch (MI->getOpcode()) {
202  default:
203  // Replace LDS_*_RET instruction that don't have any uses with the
204  // equivalent LDS_*_NORET instruction.
205  if (TII->isLDSRetInstr(MI->getOpcode())) {
206  int DstIdx = TII->getOperandIdx(MI->getOpcode(), AMDGPU::OpName::dst);
207  assert(DstIdx != -1);
208  MachineInstrBuilder NewMI;
209  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
210  // LDS_1A2D support and remove this special case.
211  if (!MRI.use_empty(MI->getOperand(DstIdx).getReg()) ||
212  MI->getOpcode() == AMDGPU::LDS_CMPST_RET)
213  return BB;
214 
215  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
216  TII->get(AMDGPU::getLDSNoRetOp(MI->getOpcode())));
217  for (unsigned i = 1, e = MI->getNumOperands(); i < e; ++i) {
218  NewMI.addOperand(MI->getOperand(i));
219  }
220  } else {
222  }
223  break;
224  case AMDGPU::CLAMP_R600: {
225  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
226  AMDGPU::MOV,
227  MI->getOperand(0).getReg(),
228  MI->getOperand(1).getReg());
229  TII->addFlag(NewMI, 0, MO_FLAG_CLAMP);
230  break;
231  }
232 
233  case AMDGPU::FABS_R600: {
234  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
235  AMDGPU::MOV,
236  MI->getOperand(0).getReg(),
237  MI->getOperand(1).getReg());
238  TII->addFlag(NewMI, 0, MO_FLAG_ABS);
239  break;
240  }
241 
242  case AMDGPU::FNEG_R600: {
243  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, I,
244  AMDGPU::MOV,
245  MI->getOperand(0).getReg(),
246  MI->getOperand(1).getReg());
247  TII->addFlag(NewMI, 0, MO_FLAG_NEG);
248  break;
249  }
250 
251  case AMDGPU::MASK_WRITE: {
252  unsigned maskedRegister = MI->getOperand(0).getReg();
253  assert(TargetRegisterInfo::isVirtualRegister(maskedRegister));
254  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
255  TII->addFlag(defInstr, 0, MO_FLAG_MASK);
256  break;
257  }
258 
259  case AMDGPU::MOV_IMM_F32:
260  TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
261  MI->getOperand(1).getFPImm()->getValueAPF()
263  break;
264  case AMDGPU::MOV_IMM_I32:
265  TII->buildMovImm(*BB, I, MI->getOperand(0).getReg(),
266  MI->getOperand(1).getImm());
267  break;
268  case AMDGPU::CONST_COPY: {
269  MachineInstr *NewMI = TII->buildDefaultInstruction(*BB, MI, AMDGPU::MOV,
270  MI->getOperand(0).getReg(), AMDGPU::ALU_CONST);
271  TII->setImmOperand(NewMI, AMDGPU::OpName::src0_sel,
272  MI->getOperand(1).getImm());
273  break;
274  }
275 
276  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
277  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
278  case AMDGPU::RAT_WRITE_CACHELESS_128_eg: {
279  unsigned EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
280 
281  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
282  .addOperand(MI->getOperand(0))
283  .addOperand(MI->getOperand(1))
284  .addImm(EOP); // Set End of program bit
285  break;
286  }
287 
288  case AMDGPU::TXD: {
289  unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
290  unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
291  MachineOperand &RID = MI->getOperand(4);
292  MachineOperand &SID = MI->getOperand(5);
293  unsigned TextureId = MI->getOperand(6).getImm();
294  unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
295  unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
296 
297  switch (TextureId) {
298  case 5: // Rect
299  CTX = CTY = 0;
300  break;
301  case 6: // Shadow1D
302  SrcW = SrcZ;
303  break;
304  case 7: // Shadow2D
305  SrcW = SrcZ;
306  break;
307  case 8: // ShadowRect
308  CTX = CTY = 0;
309  SrcW = SrcZ;
310  break;
311  case 9: // 1DArray
312  SrcZ = SrcY;
313  CTZ = 0;
314  break;
315  case 10: // 2DArray
316  CTZ = 0;
317  break;
318  case 11: // Shadow1DArray
319  SrcZ = SrcY;
320  CTZ = 0;
321  break;
322  case 12: // Shadow2DArray
323  CTZ = 0;
324  break;
325  }
326  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
327  .addOperand(MI->getOperand(3))
328  .addImm(SrcX)
329  .addImm(SrcY)
330  .addImm(SrcZ)
331  .addImm(SrcW)
332  .addImm(0)
333  .addImm(0)
334  .addImm(0)
335  .addImm(0)
336  .addImm(1)
337  .addImm(2)
338  .addImm(3)
339  .addOperand(RID)
340  .addOperand(SID)
341  .addImm(CTX)
342  .addImm(CTY)
343  .addImm(CTZ)
344  .addImm(CTW);
345  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
346  .addOperand(MI->getOperand(2))
347  .addImm(SrcX)
348  .addImm(SrcY)
349  .addImm(SrcZ)
350  .addImm(SrcW)
351  .addImm(0)
352  .addImm(0)
353  .addImm(0)
354  .addImm(0)
355  .addImm(1)
356  .addImm(2)
357  .addImm(3)
358  .addOperand(RID)
359  .addOperand(SID)
360  .addImm(CTX)
361  .addImm(CTY)
362  .addImm(CTZ)
363  .addImm(CTW);
364  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_G))
365  .addOperand(MI->getOperand(0))
366  .addOperand(MI->getOperand(1))
367  .addImm(SrcX)
368  .addImm(SrcY)
369  .addImm(SrcZ)
370  .addImm(SrcW)
371  .addImm(0)
372  .addImm(0)
373  .addImm(0)
374  .addImm(0)
375  .addImm(1)
376  .addImm(2)
377  .addImm(3)
378  .addOperand(RID)
379  .addOperand(SID)
380  .addImm(CTX)
381  .addImm(CTY)
382  .addImm(CTZ)
383  .addImm(CTW)
386  break;
387  }
388 
389  case AMDGPU::TXD_SHADOW: {
390  unsigned T0 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
391  unsigned T1 = MRI.createVirtualRegister(&AMDGPU::R600_Reg128RegClass);
392  MachineOperand &RID = MI->getOperand(4);
393  MachineOperand &SID = MI->getOperand(5);
394  unsigned TextureId = MI->getOperand(6).getImm();
395  unsigned SrcX = 0, SrcY = 1, SrcZ = 2, SrcW = 3;
396  unsigned CTX = 1, CTY = 1, CTZ = 1, CTW = 1;
397 
398  switch (TextureId) {
399  case 5: // Rect
400  CTX = CTY = 0;
401  break;
402  case 6: // Shadow1D
403  SrcW = SrcZ;
404  break;
405  case 7: // Shadow2D
406  SrcW = SrcZ;
407  break;
408  case 8: // ShadowRect
409  CTX = CTY = 0;
410  SrcW = SrcZ;
411  break;
412  case 9: // 1DArray
413  SrcZ = SrcY;
414  CTZ = 0;
415  break;
416  case 10: // 2DArray
417  CTZ = 0;
418  break;
419  case 11: // Shadow1DArray
420  SrcZ = SrcY;
421  CTZ = 0;
422  break;
423  case 12: // Shadow2DArray
424  CTZ = 0;
425  break;
426  }
427 
428  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_H), T0)
429  .addOperand(MI->getOperand(3))
430  .addImm(SrcX)
431  .addImm(SrcY)
432  .addImm(SrcZ)
433  .addImm(SrcW)
434  .addImm(0)
435  .addImm(0)
436  .addImm(0)
437  .addImm(0)
438  .addImm(1)
439  .addImm(2)
440  .addImm(3)
441  .addOperand(RID)
442  .addOperand(SID)
443  .addImm(CTX)
444  .addImm(CTY)
445  .addImm(CTZ)
446  .addImm(CTW);
447  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SET_GRADIENTS_V), T1)
448  .addOperand(MI->getOperand(2))
449  .addImm(SrcX)
450  .addImm(SrcY)
451  .addImm(SrcZ)
452  .addImm(SrcW)
453  .addImm(0)
454  .addImm(0)
455  .addImm(0)
456  .addImm(0)
457  .addImm(1)
458  .addImm(2)
459  .addImm(3)
460  .addOperand(RID)
461  .addOperand(SID)
462  .addImm(CTX)
463  .addImm(CTY)
464  .addImm(CTZ)
465  .addImm(CTW);
466  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::TEX_SAMPLE_C_G))
467  .addOperand(MI->getOperand(0))
468  .addOperand(MI->getOperand(1))
469  .addImm(SrcX)
470  .addImm(SrcY)
471  .addImm(SrcZ)
472  .addImm(SrcW)
473  .addImm(0)
474  .addImm(0)
475  .addImm(0)
476  .addImm(0)
477  .addImm(1)
478  .addImm(2)
479  .addImm(3)
480  .addOperand(RID)
481  .addOperand(SID)
482  .addImm(CTX)
483  .addImm(CTY)
484  .addImm(CTZ)
485  .addImm(CTW)
488  break;
489  }
490 
491  case AMDGPU::BRANCH:
492  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
493  .addOperand(MI->getOperand(0));
494  break;
495 
496  case AMDGPU::BRANCH_COND_f32: {
497  MachineInstr *NewMI =
498  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
499  AMDGPU::PREDICATE_BIT)
500  .addOperand(MI->getOperand(1))
501  .addImm(OPCODE_IS_NOT_ZERO)
502  .addImm(0); // Flags
503  TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
504  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
505  .addOperand(MI->getOperand(0))
506  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
507  break;
508  }
509 
510  case AMDGPU::BRANCH_COND_i32: {
511  MachineInstr *NewMI =
512  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
513  AMDGPU::PREDICATE_BIT)
514  .addOperand(MI->getOperand(1))
515  .addImm(OPCODE_IS_NOT_ZERO_INT)
516  .addImm(0); // Flags
517  TII->addFlag(NewMI, 0, MO_FLAG_PUSH);
518  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
519  .addOperand(MI->getOperand(0))
520  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
521  break;
522  }
523 
524  case AMDGPU::EG_ExportSwz:
525  case AMDGPU::R600_ExportSwz: {
526  // Instruction is left unmodified if its not the last one of its type
527  bool isLastInstructionOfItsType = true;
528  unsigned InstExportType = MI->getOperand(1).getImm();
529  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
530  EndBlock = BB->end(); NextExportInst != EndBlock;
531  NextExportInst = std::next(NextExportInst)) {
532  if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
533  NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
534  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
535  .getImm();
536  if (CurrentInstExportType == InstExportType) {
537  isLastInstructionOfItsType = false;
538  break;
539  }
540  }
541  }
542  bool EOP = (std::next(I)->getOpcode() == AMDGPU::RETURN) ? 1 : 0;
543  if (!EOP && !isLastInstructionOfItsType)
544  return BB;
545  unsigned CfInst = (MI->getOpcode() == AMDGPU::EG_ExportSwz)? 84 : 40;
546  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI->getOpcode()))
547  .addOperand(MI->getOperand(0))
548  .addOperand(MI->getOperand(1))
549  .addOperand(MI->getOperand(2))
550  .addOperand(MI->getOperand(3))
551  .addOperand(MI->getOperand(4))
552  .addOperand(MI->getOperand(5))
553  .addOperand(MI->getOperand(6))
554  .addImm(CfInst)
555  .addImm(EOP);
556  break;
557  }
558  case AMDGPU::RETURN: {
559  // RETURN instructions must have the live-out registers as implicit uses,
560  // otherwise they appear dead.
562  MachineInstrBuilder MIB(*MF, MI);
563  for (unsigned i = 0, e = MFI->LiveOuts.size(); i != e; ++i)
564  MIB.addReg(MFI->LiveOuts[i], RegState::Implicit);
565  return BB;
566  }
567  }
568 
569  MI->eraseFromParent();
570  return BB;
571 }
572 
573 //===----------------------------------------------------------------------===//
574 // Custom DAG Lowering Operations
575 //===----------------------------------------------------------------------===//
576 
580  switch (Op.getOpcode()) {
581  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
582  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
583  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
584  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
585  case ISD::SRA_PARTS:
586  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
587  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
588  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
589  case ISD::FCOS:
590  case ISD::FSIN: return LowerTrig(Op, DAG);
591  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
592  case ISD::STORE: return LowerSTORE(Op, DAG);
593  case ISD::LOAD: {
594  SDValue Result = LowerLOAD(Op, DAG);
595  assert((!Result.getNode() ||
596  Result.getNode()->getNumValues() == 2) &&
597  "Load should return a value and a chain");
598  return Result;
599  }
600 
601  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
602  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
603  case ISD::INTRINSIC_VOID: {
604  SDValue Chain = Op.getOperand(0);
605  unsigned IntrinsicID =
606  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
607  switch (IntrinsicID) {
608  case AMDGPUIntrinsic::AMDGPU_store_output: {
609  int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(3))->getZExtValue();
610  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
611  MFI->LiveOuts.push_back(Reg);
612  return DAG.getCopyToReg(Chain, SDLoc(Op), Reg, Op.getOperand(2));
613  }
614  case AMDGPUIntrinsic::R600_store_swizzle: {
615  SDLoc DL(Op);
616  const SDValue Args[8] = {
617  Chain,
618  Op.getOperand(2), // Export Value
619  Op.getOperand(3), // ArrayBase
620  Op.getOperand(4), // Type
621  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
622  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
623  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
624  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
625  };
626  return DAG.getNode(AMDGPUISD::EXPORT, DL, Op.getValueType(), Args);
627  }
628 
629  // default for switch(IntrinsicID)
630  default: break;
631  }
632  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
633  break;
634  }
636  unsigned IntrinsicID =
637  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
638  EVT VT = Op.getValueType();
639  SDLoc DL(Op);
640  switch(IntrinsicID) {
641  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
642  case AMDGPUIntrinsic::R600_load_input: {
643  int64_t RegIndex = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
644  unsigned Reg = AMDGPU::R600_TReg32RegClass.getRegister(RegIndex);
646  MachineRegisterInfo &MRI = MF.getRegInfo();
647  MRI.addLiveIn(Reg);
648  return DAG.getCopyFromReg(DAG.getEntryNode(),
649  SDLoc(DAG.getEntryNode()), Reg, VT);
650  }
651 
652  case AMDGPUIntrinsic::R600_interp_input: {
653  int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
654  int ijb = cast<ConstantSDNode>(Op.getOperand(2))->getSExtValue();
655  MachineSDNode *interp;
656  if (ijb < 0) {
657  const R600InstrInfo *TII =
658  static_cast<const R600InstrInfo *>(Subtarget->getInstrInfo());
659  interp = DAG.getMachineNode(AMDGPU::INTERP_VEC_LOAD, DL,
660  MVT::v4f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32));
661  return DAG.getTargetExtractSubreg(
662  TII->getRegisterInfo().getSubRegFromChannel(slot % 4),
663  DL, MVT::f32, SDValue(interp, 0));
664  }
666  MachineRegisterInfo &MRI = MF.getRegInfo();
667  unsigned RegisterI = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb);
668  unsigned RegisterJ = AMDGPU::R600_TReg32RegClass.getRegister(2 * ijb + 1);
669  MRI.addLiveIn(RegisterI);
670  MRI.addLiveIn(RegisterJ);
671  SDValue RegisterINode = DAG.getCopyFromReg(DAG.getEntryNode(),
672  SDLoc(DAG.getEntryNode()), RegisterI, MVT::f32);
673  SDValue RegisterJNode = DAG.getCopyFromReg(DAG.getEntryNode(),
674  SDLoc(DAG.getEntryNode()), RegisterJ, MVT::f32);
675 
676  if (slot % 4 < 2)
677  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
678  MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
679  RegisterJNode, RegisterINode);
680  else
681  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
682  MVT::f32, MVT::f32, DAG.getTargetConstant(slot / 4, DL, MVT::i32),
683  RegisterJNode, RegisterINode);
684  return SDValue(interp, slot % 2);
685  }
686  case AMDGPUIntrinsic::R600_interp_xy:
687  case AMDGPUIntrinsic::R600_interp_zw: {
688  int slot = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
689  MachineSDNode *interp;
690  SDValue RegisterINode = Op.getOperand(2);
691  SDValue RegisterJNode = Op.getOperand(3);
692 
693  if (IntrinsicID == AMDGPUIntrinsic::R600_interp_xy)
694  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_XY, DL,
695  MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
696  RegisterJNode, RegisterINode);
697  else
698  interp = DAG.getMachineNode(AMDGPU::INTERP_PAIR_ZW, DL,
699  MVT::f32, MVT::f32, DAG.getTargetConstant(slot, DL, MVT::i32),
700  RegisterJNode, RegisterINode);
701  return DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v2f32,
702  SDValue(interp, 0), SDValue(interp, 1));
703  }
704  case AMDGPUIntrinsic::R600_tex:
705  case AMDGPUIntrinsic::R600_texc:
706  case AMDGPUIntrinsic::R600_txl:
707  case AMDGPUIntrinsic::R600_txlc:
708  case AMDGPUIntrinsic::R600_txb:
709  case AMDGPUIntrinsic::R600_txbc:
710  case AMDGPUIntrinsic::R600_txf:
711  case AMDGPUIntrinsic::R600_txq:
712  case AMDGPUIntrinsic::R600_ddx:
713  case AMDGPUIntrinsic::R600_ddy:
714  case AMDGPUIntrinsic::R600_ldptr: {
715  unsigned TextureOp;
716  switch (IntrinsicID) {
717  case AMDGPUIntrinsic::R600_tex:
718  TextureOp = 0;
719  break;
720  case AMDGPUIntrinsic::R600_texc:
721  TextureOp = 1;
722  break;
723  case AMDGPUIntrinsic::R600_txl:
724  TextureOp = 2;
725  break;
726  case AMDGPUIntrinsic::R600_txlc:
727  TextureOp = 3;
728  break;
729  case AMDGPUIntrinsic::R600_txb:
730  TextureOp = 4;
731  break;
732  case AMDGPUIntrinsic::R600_txbc:
733  TextureOp = 5;
734  break;
735  case AMDGPUIntrinsic::R600_txf:
736  TextureOp = 6;
737  break;
738  case AMDGPUIntrinsic::R600_txq:
739  TextureOp = 7;
740  break;
741  case AMDGPUIntrinsic::R600_ddx:
742  TextureOp = 8;
743  break;
744  case AMDGPUIntrinsic::R600_ddy:
745  TextureOp = 9;
746  break;
747  case AMDGPUIntrinsic::R600_ldptr:
748  TextureOp = 10;
749  break;
750  default:
751  llvm_unreachable("Unknow Texture Operation");
752  }
753 
754  SDValue TexArgs[19] = {
755  DAG.getConstant(TextureOp, DL, MVT::i32),
756  Op.getOperand(1),
757  DAG.getConstant(0, DL, MVT::i32),
758  DAG.getConstant(1, DL, MVT::i32),
759  DAG.getConstant(2, DL, MVT::i32),
760  DAG.getConstant(3, DL, MVT::i32),
761  Op.getOperand(2),
762  Op.getOperand(3),
763  Op.getOperand(4),
764  DAG.getConstant(0, DL, MVT::i32),
765  DAG.getConstant(1, DL, MVT::i32),
766  DAG.getConstant(2, DL, MVT::i32),
767  DAG.getConstant(3, DL, MVT::i32),
768  Op.getOperand(5),
769  Op.getOperand(6),
770  Op.getOperand(7),
771  Op.getOperand(8),
772  Op.getOperand(9),
773  Op.getOperand(10)
774  };
775  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
776  }
777  case AMDGPUIntrinsic::AMDGPU_dp4: {
778  SDValue Args[8] = {
780  DAG.getConstant(0, DL, MVT::i32)),
782  DAG.getConstant(0, DL, MVT::i32)),
784  DAG.getConstant(1, DL, MVT::i32)),
786  DAG.getConstant(1, DL, MVT::i32)),
788  DAG.getConstant(2, DL, MVT::i32)),
790  DAG.getConstant(2, DL, MVT::i32)),
792  DAG.getConstant(3, DL, MVT::i32)),
794  DAG.getConstant(3, DL, MVT::i32))
795  };
796  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
797  }
798 
799  case Intrinsic::r600_read_ngroups_x:
800  return LowerImplicitParameter(DAG, VT, DL, 0);
801  case Intrinsic::r600_read_ngroups_y:
802  return LowerImplicitParameter(DAG, VT, DL, 1);
803  case Intrinsic::r600_read_ngroups_z:
804  return LowerImplicitParameter(DAG, VT, DL, 2);
805  case Intrinsic::r600_read_global_size_x:
806  return LowerImplicitParameter(DAG, VT, DL, 3);
807  case Intrinsic::r600_read_global_size_y:
808  return LowerImplicitParameter(DAG, VT, DL, 4);
809  case Intrinsic::r600_read_global_size_z:
810  return LowerImplicitParameter(DAG, VT, DL, 5);
811  case Intrinsic::r600_read_local_size_x:
812  return LowerImplicitParameter(DAG, VT, DL, 6);
813  case Intrinsic::r600_read_local_size_y:
814  return LowerImplicitParameter(DAG, VT, DL, 7);
815  case Intrinsic::r600_read_local_size_z:
816  return LowerImplicitParameter(DAG, VT, DL, 8);
817 
818  case Intrinsic::AMDGPU_read_workdim: {
819  uint32_t ByteOffset = getImplicitParameterOffset(MFI, GRID_DIM);
820  return LowerImplicitParameter(DAG, VT, DL, ByteOffset / 4);
821  }
822 
823  case Intrinsic::r600_read_tgid_x:
824  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
825  AMDGPU::T1_X, VT);
826  case Intrinsic::r600_read_tgid_y:
827  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
828  AMDGPU::T1_Y, VT);
829  case Intrinsic::r600_read_tgid_z:
830  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
831  AMDGPU::T1_Z, VT);
832  case Intrinsic::r600_read_tidig_x:
833  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
834  AMDGPU::T0_X, VT);
835  case Intrinsic::r600_read_tidig_y:
836  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
837  AMDGPU::T0_Y, VT);
838  case Intrinsic::r600_read_tidig_z:
839  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
840  AMDGPU::T0_Z, VT);
841  case Intrinsic::AMDGPU_rsq:
842  // XXX - I'm assuming SI's RSQ_LEGACY matches R600's behavior.
843  return DAG.getNode(AMDGPUISD::RSQ_LEGACY, DL, VT, Op.getOperand(1));
844 
845  case AMDGPUIntrinsic::AMDGPU_fract:
846  case AMDGPUIntrinsic::AMDIL_fraction: // Legacy name.
847  return DAG.getNode(AMDGPUISD::FRACT, DL, VT, Op.getOperand(1));
848  }
849  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
850  break;
851  }
852  } // end switch(Op.getOpcode())
853  return SDValue();
854 }
855 
857  SmallVectorImpl<SDValue> &Results,
858  SelectionDAG &DAG) const {
859  switch (N->getOpcode()) {
860  default:
862  return;
863  case ISD::FP_TO_UINT:
864  if (N->getValueType(0) == MVT::i1) {
865  Results.push_back(LowerFPTOUINT(N->getOperand(0), DAG));
866  return;
867  }
868  // Fall-through. Since we don't care about out of bounds values
869  // we can use FP_TO_SINT for uints too. The DAGLegalizer code for uint
870  // considers some extra cases which are not necessary here.
871  case ISD::FP_TO_SINT: {
872  SDValue Result;
873  if (expandFP_TO_SINT(N, Result, DAG))
874  Results.push_back(Result);
875  return;
876  }
877  case ISD::SDIVREM: {
878  SDValue Op = SDValue(N, 1);
879  SDValue RES = LowerSDIVREM(Op, DAG);
880  Results.push_back(RES);
881  Results.push_back(RES.getValue(1));
882  break;
883  }
884  case ISD::UDIVREM: {
885  SDValue Op = SDValue(N, 0);
886  LowerUDIVREM64(Op, DAG, Results);
887  break;
888  }
889  }
890 }
891 
892 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
893  SDValue Vector) const {
894 
895  SDLoc DL(Vector);
896  EVT VecVT = Vector.getValueType();
897  EVT EltVT = VecVT.getVectorElementType();
899 
900  for (unsigned i = 0, e = VecVT.getVectorNumElements();
901  i != e; ++i) {
902  Args.push_back(DAG.getNode(
903  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
904  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
905  }
906 
907  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
908 }
909 
910 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
911  SelectionDAG &DAG) const {
912 
913  SDLoc DL(Op);
914  SDValue Vector = Op.getOperand(0);
915  SDValue Index = Op.getOperand(1);
916 
917  if (isa<ConstantSDNode>(Index) ||
919  return Op;
920 
921  Vector = vectorToVerticalVector(DAG, Vector);
923  Vector, Index);
924 }
925 
926 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
927  SelectionDAG &DAG) const {
928  SDLoc DL(Op);
929  SDValue Vector = Op.getOperand(0);
930  SDValue Value = Op.getOperand(1);
931  SDValue Index = Op.getOperand(2);
932 
933  if (isa<ConstantSDNode>(Index) ||
935  return Op;
936 
937  Vector = vectorToVerticalVector(DAG, Vector);
939  Vector, Value, Index);
940  return vectorToVerticalVector(DAG, Insert);
941 }
942 
943 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
944  // On hw >= R700, COS/SIN input must be between -1. and 1.
945  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
946  EVT VT = Op.getValueType();
947  SDValue Arg = Op.getOperand(0);
948  SDLoc DL(Op);
949  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
950  DAG.getNode(ISD::FADD, DL, VT,
951  DAG.getNode(ISD::FMUL, DL, VT, Arg,
952  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
953  DAG.getConstantFP(0.5, DL, MVT::f32)));
954  unsigned TrigNode;
955  switch (Op.getOpcode()) {
956  case ISD::FCOS:
957  TrigNode = AMDGPUISD::COS_HW;
958  break;
959  case ISD::FSIN:
960  TrigNode = AMDGPUISD::SIN_HW;
961  break;
962  default:
963  llvm_unreachable("Wrong trig opcode");
964  }
965  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
966  DAG.getNode(ISD::FADD, DL, VT, FractPart,
967  DAG.getConstantFP(-0.5, DL, MVT::f32)));
968  if (Gen >= AMDGPUSubtarget::R700)
969  return TrigVal;
970  // On R600 hw, COS/SIN input must be between -Pi and Pi.
971  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
972  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
973 }
974 
975 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
976  SDLoc DL(Op);
977  EVT VT = Op.getValueType();
978 
979  SDValue Lo = Op.getOperand(0);
980  SDValue Hi = Op.getOperand(1);
981  SDValue Shift = Op.getOperand(2);
982  SDValue Zero = DAG.getConstant(0, DL, VT);
983  SDValue One = DAG.getConstant(1, DL, VT);
984 
985  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
986  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
987  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
988  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
989 
990  // The dance around Width1 is necessary for 0 special case.
991  // Without it the CompShift might be 32, producing incorrect results in
992  // Overflow. So we do the shift in two steps, the alternative is to
993  // add a conditional to filter the special case.
994 
995  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
996  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
997 
998  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
999  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
1000  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
1001 
1002  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
1003  SDValue LoBig = Zero;
1004 
1005  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1006  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1007 
1008  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1009 }
1010 
1011 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
1012  SDLoc DL(Op);
1013  EVT VT = Op.getValueType();
1014 
1015  SDValue Lo = Op.getOperand(0);
1016  SDValue Hi = Op.getOperand(1);
1017  SDValue Shift = Op.getOperand(2);
1018  SDValue Zero = DAG.getConstant(0, DL, VT);
1019  SDValue One = DAG.getConstant(1, DL, VT);
1020 
1021  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
1022 
1023  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
1024  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
1025  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
1026  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
1027 
1028  // The dance around Width1 is necessary for 0 special case.
1029  // Without it the CompShift might be 32, producing incorrect results in
1030  // Overflow. So we do the shift in two steps, the alternative is to
1031  // add a conditional to filter the special case.
1032 
1033  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
1034  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
1035 
1036  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
1037  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
1038  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
1039 
1040  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
1041  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
1042 
1043  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
1044  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
1045 
1046  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
1047 }
1048 
1049 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
1050  unsigned mainop, unsigned ovf) const {
1051  SDLoc DL(Op);
1052  EVT VT = Op.getValueType();
1053 
1054  SDValue Lo = Op.getOperand(0);
1055  SDValue Hi = Op.getOperand(1);
1056 
1057  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
1058  // Extend sign.
1059  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
1060  DAG.getValueType(MVT::i1));
1061 
1062  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
1063 
1064  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
1065 }
1066 
1067 SDValue R600TargetLowering::LowerFPTOUINT(SDValue Op, SelectionDAG &DAG) const {
1068  SDLoc DL(Op);
1069  return DAG.getNode(
1070  ISD::SETCC,
1071  DL,
1072  MVT::i1,
1073  Op, DAG.getConstantFP(0.0f, DL, MVT::f32),
1074  DAG.getCondCode(ISD::SETNE)
1075  );
1076 }
1077 
1078 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
1079  SDLoc DL,
1080  unsigned DwordOffset) const {
1081  unsigned ByteOffset = DwordOffset * 4;
1082  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
1084 
1085  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
1086  assert(isInt<16>(ByteOffset));
1087 
1088  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1089  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
1091  false, false, false, 0);
1092 }
1093 
1094 bool R600TargetLowering::isZero(SDValue Op) const {
1095  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
1096  return Cst->isNullValue();
1097  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
1098  return CstFP->isZero();
1099  } else {
1100  return false;
1101  }
1102 }
1103 
1104 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
1105  SDLoc DL(Op);
1106  EVT VT = Op.getValueType();
1107 
1108  SDValue LHS = Op.getOperand(0);
1109  SDValue RHS = Op.getOperand(1);
1110  SDValue True = Op.getOperand(2);
1111  SDValue False = Op.getOperand(3);
1112  SDValue CC = Op.getOperand(4);
1113  SDValue Temp;
1114 
1115  if (VT == MVT::f32) {
1116  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
1117  SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
1118  if (MinMax)
1119  return MinMax;
1120  }
1121 
1122  // LHS and RHS are guaranteed to be the same value type
1123  EVT CompareVT = LHS.getValueType();
1124 
1125  // Check if we can lower this to a native operation.
1126 
1127  // Try to lower to a SET* instruction:
1128  //
1129  // SET* can match the following patterns:
1130  //
1131  // select_cc f32, f32, -1, 0, cc_supported
1132  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
1133  // select_cc i32, i32, -1, 0, cc_supported
1134  //
1135 
1136  // Move hardware True/False values to the correct operand.
1137  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1138  ISD::CondCode InverseCC =
1139  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1140  if (isHWTrueValue(False) && isHWFalseValue(True)) {
1141  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
1142  std::swap(False, True);
1143  CC = DAG.getCondCode(InverseCC);
1144  } else {
1145  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
1146  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
1147  std::swap(False, True);
1148  std::swap(LHS, RHS);
1149  CC = DAG.getCondCode(SwapInvCC);
1150  }
1151  }
1152  }
1153 
1154  if (isHWTrueValue(True) && isHWFalseValue(False) &&
1155  (CompareVT == VT || VT == MVT::i32)) {
1156  // This can be matched by a SET* instruction.
1157  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
1158  }
1159 
1160  // Try to lower to a CND* instruction:
1161  //
1162  // CND* can match the following patterns:
1163  //
1164  // select_cc f32, 0.0, f32, f32, cc_supported
1165  // select_cc f32, 0.0, i32, i32, cc_supported
1166  // select_cc i32, 0, f32, f32, cc_supported
1167  // select_cc i32, 0, i32, i32, cc_supported
1168  //
1169 
1170  // Try to move the zero value to the RHS
1171  if (isZero(LHS)) {
1172  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1173  // Try swapping the operands
1174  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1175  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1176  std::swap(LHS, RHS);
1177  CC = DAG.getCondCode(CCSwapped);
1178  } else {
1179  // Try inverting the conditon and then swapping the operands
1180  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1181  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1182  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1183  std::swap(True, False);
1184  std::swap(LHS, RHS);
1185  CC = DAG.getCondCode(CCSwapped);
1186  }
1187  }
1188  }
1189  if (isZero(RHS)) {
1190  SDValue Cond = LHS;
1191  SDValue Zero = RHS;
1192  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1193  if (CompareVT != VT) {
1194  // Bitcast True / False to the correct types. This will end up being
1195  // a nop, but it allows us to define only a single pattern in the
1196  // .TD files for each CND* instruction rather than having to have
1197  // one pattern for integer True/False and one for fp True/False
1198  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1199  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1200  }
1201 
1202  switch (CCOpcode) {
1203  case ISD::SETONE:
1204  case ISD::SETUNE:
1205  case ISD::SETNE:
1206  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1207  Temp = True;
1208  True = False;
1209  False = Temp;
1210  break;
1211  default:
1212  break;
1213  }
1214  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1215  Cond, Zero,
1216  True, False,
1217  DAG.getCondCode(CCOpcode));
1218  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1219  }
1220 
1221  // If we make it this for it means we have no native instructions to handle
1222  // this SELECT_CC, so we must lower it.
1223  SDValue HWTrue, HWFalse;
1224 
1225  if (CompareVT == MVT::f32) {
1226  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1227  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1228  } else if (CompareVT == MVT::i32) {
1229  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1230  HWFalse = DAG.getConstant(0, DL, CompareVT);
1231  }
1232  else {
1233  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1234  }
1235 
1236  // Lower this unsupported SELECT_CC into a combination of two supported
1237  // SELECT_CC operations.
1238  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1239 
1240  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1241  Cond, HWFalse,
1242  True, False,
1243  DAG.getCondCode(ISD::SETNE));
1244 }
1245 
1246 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1247 /// convert these pointers to a register index. Each register holds
1248 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1249 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1250 /// for indirect addressing.
1251 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1252  unsigned StackWidth,
1253  SelectionDAG &DAG) const {
1254  unsigned SRLPad;
1255  switch(StackWidth) {
1256  case 1:
1257  SRLPad = 2;
1258  break;
1259  case 2:
1260  SRLPad = 3;
1261  break;
1262  case 4:
1263  SRLPad = 4;
1264  break;
1265  default: llvm_unreachable("Invalid stack width");
1266  }
1267 
1268  SDLoc DL(Ptr);
1269  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1270  DAG.getConstant(SRLPad, DL, MVT::i32));
1271 }
1272 
1273 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1274  unsigned ElemIdx,
1275  unsigned &Channel,
1276  unsigned &PtrIncr) const {
1277  switch (StackWidth) {
1278  default:
1279  case 1:
1280  Channel = 0;
1281  if (ElemIdx > 0) {
1282  PtrIncr = 1;
1283  } else {
1284  PtrIncr = 0;
1285  }
1286  break;
1287  case 2:
1288  Channel = ElemIdx % 2;
1289  if (ElemIdx == 2) {
1290  PtrIncr = 1;
1291  } else {
1292  PtrIncr = 0;
1293  }
1294  break;
1295  case 4:
1296  Channel = ElemIdx;
1297  PtrIncr = 0;
1298  break;
1299  }
1300 }
1301 
1302 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1303  SDLoc DL(Op);
1304  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1305  SDValue Chain = Op.getOperand(0);
1306  SDValue Value = Op.getOperand(1);
1307  SDValue Ptr = Op.getOperand(2);
1308 
1309  SDValue Result = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
1310  if (Result.getNode()) {
1311  return Result;
1312  }
1313 
1314  if (StoreNode->getAddressSpace() == AMDGPUAS::GLOBAL_ADDRESS) {
1315  if (StoreNode->isTruncatingStore()) {
1316  EVT VT = Value.getValueType();
1317  assert(VT.bitsLE(MVT::i32));
1318  EVT MemVT = StoreNode->getMemoryVT();
1319  SDValue MaskConstant;
1320  if (MemVT == MVT::i8) {
1321  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1322  } else {
1323  assert(MemVT == MVT::i16);
1324  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1325  }
1326  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, VT, Ptr,
1327  DAG.getConstant(2, DL, MVT::i32));
1328  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, Ptr.getValueType(), Ptr,
1329  DAG.getConstant(0x00000003, DL, VT));
1330  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1331  SDValue Shift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1332  DAG.getConstant(3, DL, VT));
1333  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, Shift);
1334  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, Shift);
1335  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1336  // vector instead.
1337  SDValue Src[4] = {
1338  ShiftedValue,
1339  DAG.getConstant(0, DL, MVT::i32),
1340  DAG.getConstant(0, DL, MVT::i32),
1341  Mask
1342  };
1343  SDValue Input = DAG.getNode(ISD::BUILD_VECTOR, DL, MVT::v4i32, Src);
1344  SDValue Args[3] = { Chain, Input, DWordAddr };
1346  Op->getVTList(), Args, MemVT,
1347  StoreNode->getMemOperand());
1348  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR &&
1349  Value.getValueType().bitsGE(MVT::i32)) {
1350  // Convert pointer from byte address to dword address.
1351  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, Ptr.getValueType(),
1352  DAG.getNode(ISD::SRL, DL, Ptr.getValueType(),
1353  Ptr, DAG.getConstant(2, DL, MVT::i32)));
1354 
1355  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1356  llvm_unreachable("Truncated and indexed stores not supported yet");
1357  } else {
1358  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1359  }
1360  return Chain;
1361  }
1362  }
1363 
1364  EVT ValueVT = Value.getValueType();
1365 
1366  if (StoreNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1367  return SDValue();
1368  }
1369 
1371  if (Ret.getNode()) {
1372  return Ret;
1373  }
1374  // Lowering for indirect addressing
1375 
1376  const MachineFunction &MF = DAG.getMachineFunction();
1377  const AMDGPUFrameLowering *TFL =
1378  static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
1379  unsigned StackWidth = TFL->getStackWidth(MF);
1380 
1381  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1382 
1383  if (ValueVT.isVector()) {
1384  unsigned NumElemVT = ValueVT.getVectorNumElements();
1385  EVT ElemVT = ValueVT.getVectorElementType();
1386  SmallVector<SDValue, 4> Stores(NumElemVT);
1387 
1388  assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1389  "vector width in load");
1390 
1391  for (unsigned i = 0; i < NumElemVT; ++i) {
1392  unsigned Channel, PtrIncr;
1393  getStackAddress(StackWidth, i, Channel, PtrIncr);
1394  Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1395  DAG.getConstant(PtrIncr, DL, MVT::i32));
1396  SDValue Elem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ElemVT,
1397  Value, DAG.getConstant(i, DL, MVT::i32));
1398 
1399  Stores[i] = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
1400  Chain, Elem, Ptr,
1401  DAG.getTargetConstant(Channel, DL, MVT::i32));
1402  }
1403  Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Stores);
1404  } else {
1405  if (ValueVT == MVT::i8) {
1406  Value = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, Value);
1407  }
1408  Chain = DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other, Chain, Value, Ptr,
1409  DAG.getTargetConstant(0, DL, MVT::i32)); // Channel
1410  }
1411 
1412  return Chain;
1413 }
1414 
1415 // return (512 + (kc_bank << 12)
1416 static int
1418  switch (AddressSpace) {
1420  return 512;
1422  return 512 + 4096;
1424  return 512 + 4096 * 2;
1426  return 512 + 4096 * 3;
1428  return 512 + 4096 * 4;
1430  return 512 + 4096 * 5;
1432  return 512 + 4096 * 6;
1434  return 512 + 4096 * 7;
1436  return 512 + 4096 * 8;
1438  return 512 + 4096 * 9;
1440  return 512 + 4096 * 10;
1442  return 512 + 4096 * 11;
1444  return 512 + 4096 * 12;
1446  return 512 + 4096 * 13;
1448  return 512 + 4096 * 14;
1450  return 512 + 4096 * 15;
1451  default:
1452  return -1;
1453  }
1454 }
1455 
1456 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
1457 {
1458  EVT VT = Op.getValueType();
1459  SDLoc DL(Op);
1460  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1461  SDValue Chain = Op.getOperand(0);
1462  SDValue Ptr = Op.getOperand(1);
1463  SDValue LoweredLoad;
1464 
1465  if (SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG))
1466  return Ret;
1467 
1468  // Lower loads constant address space global variable loads
1469  if (LoadNode->getAddressSpace() == AMDGPUAS::CONSTANT_ADDRESS &&
1470  isa<GlobalVariable>(GetUnderlyingObject(
1471  LoadNode->getMemOperand()->getValue(), DAG.getDataLayout()))) {
1472 
1473  SDValue Ptr = DAG.getZExtOrTrunc(
1474  LoadNode->getBasePtr(), DL,
1476  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1477  DAG.getConstant(2, DL, MVT::i32));
1478  return DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op->getVTList(),
1479  LoadNode->getChain(), Ptr,
1480  DAG.getTargetConstant(0, DL, MVT::i32),
1481  Op.getOperand(2));
1482  }
1483 
1484  if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
1485  SDValue MergedValues[2] = {
1486  ScalarizeVectorLoad(Op, DAG),
1487  Chain
1488  };
1489  return DAG.getMergeValues(MergedValues, DL);
1490  }
1491 
1492  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1493  if (ConstantBlock > -1 &&
1494  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1495  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1496  SDValue Result;
1497  if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1498  isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1499  isa<ConstantSDNode>(Ptr)) {
1500  SDValue Slots[4];
1501  for (unsigned i = 0; i < 4; i++) {
1502  // We want Const position encoded with the following formula :
1503  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1504  // const_index is Ptr computed by llvm using an alignment of 16.
1505  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1506  // then div by 4 at the ISel step
1507  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1508  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1509  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1510  }
1511  EVT NewVT = MVT::v4i32;
1512  unsigned NumElements = 4;
1513  if (VT.isVector()) {
1514  NewVT = VT;
1515  NumElements = VT.getVectorNumElements();
1516  }
1517  Result = DAG.getNode(ISD::BUILD_VECTOR, DL, NewVT,
1518  makeArrayRef(Slots, NumElements));
1519  } else {
1520  // non-constant ptr can't be folded, keeps it as a v4f32 load
1521  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1522  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1523  DAG.getConstant(4, DL, MVT::i32)),
1524  DAG.getConstant(LoadNode->getAddressSpace() -
1526  );
1527  }
1528 
1529  if (!VT.isVector()) {
1530  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1531  DAG.getConstant(0, DL, MVT::i32));
1532  }
1533 
1534  SDValue MergedValues[2] = {
1535  Result,
1536  Chain
1537  };
1538  return DAG.getMergeValues(MergedValues, DL);
1539  }
1540 
1541  // For most operations returning SDValue() will result in the node being
1542  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1543  // need to manually expand loads that may be legal in some address spaces and
1544  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1545  // compute shaders, since the data is sign extended when it is uploaded to the
1546  // buffer. However SEXT loads from other address spaces are not supported, so
1547  // we need to expand them here.
1548  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1549  EVT MemVT = LoadNode->getMemoryVT();
1550  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1551  SDValue NewLoad = DAG.getExtLoad(ISD::EXTLOAD, DL, VT, Chain, Ptr,
1552  LoadNode->getPointerInfo(), MemVT,
1553  LoadNode->isVolatile(),
1554  LoadNode->isNonTemporal(),
1555  LoadNode->isInvariant(),
1556  LoadNode->getAlignment());
1557  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1558  DAG.getValueType(MemVT));
1559 
1560  SDValue MergedValues[2] = { Res, Chain };
1561  return DAG.getMergeValues(MergedValues, DL);
1562  }
1563 
1564  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1565  return SDValue();
1566  }
1567 
1568  // Lowering for indirect addressing
1569  const MachineFunction &MF = DAG.getMachineFunction();
1570  const AMDGPUFrameLowering *TFL =
1571  static_cast<const AMDGPUFrameLowering *>(Subtarget->getFrameLowering());
1572  unsigned StackWidth = TFL->getStackWidth(MF);
1573 
1574  Ptr = stackPtrToRegIndex(Ptr, StackWidth, DAG);
1575 
1576  if (VT.isVector()) {
1577  unsigned NumElemVT = VT.getVectorNumElements();
1578  EVT ElemVT = VT.getVectorElementType();
1579  SDValue Loads[4];
1580 
1581  assert(NumElemVT >= StackWidth && "Stack width cannot be greater than "
1582  "vector width in load");
1583 
1584  for (unsigned i = 0; i < NumElemVT; ++i) {
1585  unsigned Channel, PtrIncr;
1586  getStackAddress(StackWidth, i, Channel, PtrIncr);
1587  Ptr = DAG.getNode(ISD::ADD, DL, MVT::i32, Ptr,
1588  DAG.getConstant(PtrIncr, DL, MVT::i32));
1589  Loads[i] = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, ElemVT,
1590  Chain, Ptr,
1591  DAG.getTargetConstant(Channel, DL, MVT::i32),
1592  Op.getOperand(2));
1593  }
1594  for (unsigned i = NumElemVT; i < 4; ++i) {
1595  Loads[i] = DAG.getUNDEF(ElemVT);
1596  }
1597  EVT TargetVT = EVT::getVectorVT(*DAG.getContext(), ElemVT, 4);
1598  LoweredLoad = DAG.getNode(ISD::BUILD_VECTOR, DL, TargetVT, Loads);
1599  } else {
1600  LoweredLoad = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, VT,
1601  Chain, Ptr,
1602  DAG.getTargetConstant(0, DL, MVT::i32), // Channel
1603  Op.getOperand(2));
1604  }
1605 
1606  SDValue Ops[2] = {
1607  LoweredLoad,
1608  Chain
1609  };
1610 
1611  return DAG.getMergeValues(Ops, DL);
1612 }
1613 
1614 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1615  SDValue Chain = Op.getOperand(0);
1616  SDValue Cond = Op.getOperand(1);
1617  SDValue Jump = Op.getOperand(2);
1618 
1619  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1620  Chain, Jump, Cond);
1621 }
1622 
1623 /// XXX Only kernel functions are supported, so we can assume for now that
1624 /// every function is a kernel function, but in the future we should use
1625 /// separate calling conventions for kernel and non-kernel functions.
1627  SDValue Chain,
1628  CallingConv::ID CallConv,
1629  bool isVarArg,
1631  SDLoc DL, SelectionDAG &DAG,
1632  SmallVectorImpl<SDValue> &InVals) const {
1634  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1635  *DAG.getContext());
1636  MachineFunction &MF = DAG.getMachineFunction();
1637  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
1638 
1640 
1641  getOriginalFunctionArgs(DAG, MF.getFunction(), Ins, LocalIns);
1642 
1643  AnalyzeFormalArguments(CCInfo, LocalIns);
1644 
1645  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1646  CCValAssign &VA = ArgLocs[i];
1647  const ISD::InputArg &In = Ins[i];
1648  EVT VT = In.VT;
1649  EVT MemVT = VA.getLocVT();
1650  if (!VT.isVector() && MemVT.isVector()) {
1651  // Get load source type if scalarized.
1652  MemVT = MemVT.getVectorElementType();
1653  }
1654 
1655  if (MFI->getShaderType() != ShaderType::COMPUTE) {
1656  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1657  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1658  InVals.push_back(Register);
1659  continue;
1660  }
1661 
1664 
1665  // i64 isn't a legal type, so the register type used ends up as i32, which
1666  // isn't expected here. It attempts to create this sextload, but it ends up
1667  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1668  // for <1 x i64>.
1669 
1670  // The first 36 bytes of the input buffer contains information about
1671  // thread group and global sizes.
1673  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1674  // FIXME: This should really check the extload type, but the handling of
1675  // extload vector parameters seems to be broken.
1676 
1677  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1678  Ext = ISD::SEXTLOAD;
1679  }
1680 
1681  // Compute the offset from the value.
1682  // XXX - I think PartOffset should give you this, but it seems to give the
1683  // size of the register which isn't useful.
1684 
1685  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1686  unsigned PartOffset = VA.getLocMemOffset();
1687  unsigned Offset = 36 + VA.getLocMemOffset();
1688 
1689  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1690  SDValue Arg = DAG.getLoad(ISD::UNINDEXED, Ext, VT, DL, Chain,
1691  DAG.getConstant(Offset, DL, MVT::i32),
1692  DAG.getUNDEF(MVT::i32),
1693  PtrInfo,
1694  MemVT, false, true, true, 4);
1695 
1696  // 4 is the preferred alignment for the CONSTANT memory space.
1697  InVals.push_back(Arg);
1698  MFI->ABIArgOffset = Offset + MemVT.getStoreSize();
1699  }
1700  return Chain;
1701 }
1702 
1704  EVT VT) const {
1705  if (!VT.isVector())
1706  return MVT::i32;
1708 }
1709 
1711  SelectionDAG &DAG, SDValue VectorEntry,
1712  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1713  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1714  assert(RemapSwizzle.empty());
1715  SDValue NewBldVec[4] = {
1716  VectorEntry.getOperand(0),
1717  VectorEntry.getOperand(1),
1718  VectorEntry.getOperand(2),
1719  VectorEntry.getOperand(3)
1720  };
1721 
1722  for (unsigned i = 0; i < 4; i++) {
1723  if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1724  // We mask write here to teach later passes that the ith element of this
1725  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1726  // break false dependencies and additionnaly make assembly easier to read.
1727  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1728  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1729  if (C->isZero()) {
1730  RemapSwizzle[i] = 4; // SEL_0
1731  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1732  } else if (C->isExactlyValue(1.0)) {
1733  RemapSwizzle[i] = 5; // SEL_1
1734  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1735  }
1736  }
1737 
1738  if (NewBldVec[i].getOpcode() == ISD::UNDEF)
1739  continue;
1740  for (unsigned j = 0; j < i; j++) {
1741  if (NewBldVec[i] == NewBldVec[j]) {
1742  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1743  RemapSwizzle[i] = j;
1744  break;
1745  }
1746  }
1747  }
1748 
1749  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1750  VectorEntry.getValueType(), NewBldVec);
1751 }
1752 
1754  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1755  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1756  assert(RemapSwizzle.empty());
1757  SDValue NewBldVec[4] = {
1758  VectorEntry.getOperand(0),
1759  VectorEntry.getOperand(1),
1760  VectorEntry.getOperand(2),
1761  VectorEntry.getOperand(3)
1762  };
1763  bool isUnmovable[4] = { false, false, false, false };
1764  for (unsigned i = 0; i < 4; i++) {
1765  RemapSwizzle[i] = i;
1766  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1767  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1768  ->getZExtValue();
1769  if (i == Idx)
1770  isUnmovable[Idx] = true;
1771  }
1772  }
1773 
1774  for (unsigned i = 0; i < 4; i++) {
1775  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1776  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1777  ->getZExtValue();
1778  if (isUnmovable[Idx])
1779  continue;
1780  // Swap i and Idx
1781  std::swap(NewBldVec[Idx], NewBldVec[i]);
1782  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1783  break;
1784  }
1785  }
1786 
1787  return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(VectorEntry),
1788  VectorEntry.getValueType(), NewBldVec);
1789 }
1790 
1791 
1792 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector,
1793  SDValue Swz[4], SelectionDAG &DAG,
1794  SDLoc DL) const {
1795  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1796  // Old -> New swizzle values
1797  DenseMap<unsigned, unsigned> SwizzleRemap;
1798 
1799  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1800  for (unsigned i = 0; i < 4; i++) {
1801  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1802  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1803  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1804  }
1805 
1806  SwizzleRemap.clear();
1807  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1808  for (unsigned i = 0; i < 4; i++) {
1809  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1810  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1811  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1812  }
1813 
1814  return BuildVector;
1815 }
1816 
1817 
1818 //===----------------------------------------------------------------------===//
1819 // Custom DAG Optimizations
1820 //===----------------------------------------------------------------------===//
1821 
1823  DAGCombinerInfo &DCI) const {
1824  SelectionDAG &DAG = DCI.DAG;
1825 
1826  switch (N->getOpcode()) {
1827  default: return AMDGPUTargetLowering::PerformDAGCombine(N, DCI);
1828  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1829  case ISD::FP_ROUND: {
1830  SDValue Arg = N->getOperand(0);
1831  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1832  return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), N->getValueType(0),
1833  Arg.getOperand(0));
1834  }
1835  break;
1836  }
1837 
1838  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1839  // (i32 select_cc f32, f32, -1, 0 cc)
1840  //
1841  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1842  // this to one of the SET*_DX10 instructions.
1843  case ISD::FP_TO_SINT: {
1844  SDValue FNeg = N->getOperand(0);
1845  if (FNeg.getOpcode() != ISD::FNEG) {
1846  return SDValue();
1847  }
1848  SDValue SelectCC = FNeg.getOperand(0);
1849  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1850  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1851  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1852  !isHWTrueValue(SelectCC.getOperand(2)) ||
1853  !isHWFalseValue(SelectCC.getOperand(3))) {
1854  return SDValue();
1855  }
1856 
1857  SDLoc dl(N);
1858  return DAG.getNode(ISD::SELECT_CC, dl, N->getValueType(0),
1859  SelectCC.getOperand(0), // LHS
1860  SelectCC.getOperand(1), // RHS
1861  DAG.getConstant(-1, dl, MVT::i32), // True
1862  DAG.getConstant(0, dl, MVT::i32), // False
1863  SelectCC.getOperand(4)); // CC
1864 
1865  break;
1866  }
1867 
1868  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1869  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1870  case ISD::INSERT_VECTOR_ELT: {
1871  SDValue InVec = N->getOperand(0);
1872  SDValue InVal = N->getOperand(1);
1873  SDValue EltNo = N->getOperand(2);
1874  SDLoc dl(N);
1875 
1876  // If the inserted element is an UNDEF, just use the input vector.
1877  if (InVal.getOpcode() == ISD::UNDEF)
1878  return InVec;
1879 
1880  EVT VT = InVec.getValueType();
1881 
1882  // If we can't generate a legal BUILD_VECTOR, exit
1884  return SDValue();
1885 
1886  // Check that we know which element is being inserted
1887  if (!isa<ConstantSDNode>(EltNo))
1888  return SDValue();
1889  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1890 
1891  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1892  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1893  // vector elements.
1895  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1896  Ops.append(InVec.getNode()->op_begin(),
1897  InVec.getNode()->op_end());
1898  } else if (InVec.getOpcode() == ISD::UNDEF) {
1899  unsigned NElts = VT.getVectorNumElements();
1900  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1901  } else {
1902  return SDValue();
1903  }
1904 
1905  // Insert the element
1906  if (Elt < Ops.size()) {
1907  // All the operands of BUILD_VECTOR must have the same type;
1908  // we enforce that here.
1909  EVT OpVT = Ops[0].getValueType();
1910  if (InVal.getValueType() != OpVT)
1911  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1912  DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
1913  DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
1914  Ops[Elt] = InVal;
1915  }
1916 
1917  // Return the new vector
1918  return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
1919  }
1920 
1921  // Extract_vec (Build_vector) generated by custom lowering
1922  // also needs to be customly combined
1923  case ISD::EXTRACT_VECTOR_ELT: {
1924  SDValue Arg = N->getOperand(0);
1925  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1926  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1927  unsigned Element = Const->getZExtValue();
1928  return Arg->getOperand(Element);
1929  }
1930  }
1931  if (Arg.getOpcode() == ISD::BITCAST &&
1932  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR) {
1933  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1934  unsigned Element = Const->getZExtValue();
1935  return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getVTList(),
1936  Arg->getOperand(0).getOperand(Element));
1937  }
1938  }
1939  }
1940 
1941  case ISD::SELECT_CC: {
1942  // Try common optimizations
1944  if (Ret.getNode())
1945  return Ret;
1946 
1947  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1948  // selectcc x, y, a, b, inv(cc)
1949  //
1950  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1951  // selectcc x, y, a, b, cc
1952  SDValue LHS = N->getOperand(0);
1953  if (LHS.getOpcode() != ISD::SELECT_CC) {
1954  return SDValue();
1955  }
1956 
1957  SDValue RHS = N->getOperand(1);
1958  SDValue True = N->getOperand(2);
1959  SDValue False = N->getOperand(3);
1960  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1961 
1962  if (LHS.getOperand(2).getNode() != True.getNode() ||
1963  LHS.getOperand(3).getNode() != False.getNode() ||
1964  RHS.getNode() != False.getNode()) {
1965  return SDValue();
1966  }
1967 
1968  switch (NCC) {
1969  default: return SDValue();
1970  case ISD::SETNE: return LHS;
1971  case ISD::SETEQ: {
1972  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1973  LHSCC = ISD::getSetCCInverse(LHSCC,
1974  LHS.getOperand(0).getValueType().isInteger());
1975  if (DCI.isBeforeLegalizeOps() ||
1977  return DAG.getSelectCC(SDLoc(N),
1978  LHS.getOperand(0),
1979  LHS.getOperand(1),
1980  LHS.getOperand(2),
1981  LHS.getOperand(3),
1982  LHSCC);
1983  break;
1984  }
1985  }
1986  return SDValue();
1987  }
1988 
1989  case AMDGPUISD::EXPORT: {
1990  SDValue Arg = N->getOperand(1);
1991  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1992  break;
1993 
1994  SDValue NewArgs[8] = {
1995  N->getOperand(0), // Chain
1996  SDValue(),
1997  N->getOperand(2), // ArrayBase
1998  N->getOperand(3), // Type
1999  N->getOperand(4), // SWZ_X
2000  N->getOperand(5), // SWZ_Y
2001  N->getOperand(6), // SWZ_Z
2002  N->getOperand(7) // SWZ_W
2003  };
2004  SDLoc DL(N);
2005  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2006  return DAG.getNode(AMDGPUISD::EXPORT, DL, N->getVTList(), NewArgs);
2007  }
2008  case AMDGPUISD::TEXTURE_FETCH: {
2009  SDValue Arg = N->getOperand(1);
2010  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2011  break;
2012 
2013  SDValue NewArgs[19] = {
2014  N->getOperand(0),
2015  N->getOperand(1),
2016  N->getOperand(2),
2017  N->getOperand(3),
2018  N->getOperand(4),
2019  N->getOperand(5),
2020  N->getOperand(6),
2021  N->getOperand(7),
2022  N->getOperand(8),
2023  N->getOperand(9),
2024  N->getOperand(10),
2025  N->getOperand(11),
2026  N->getOperand(12),
2027  N->getOperand(13),
2028  N->getOperand(14),
2029  N->getOperand(15),
2030  N->getOperand(16),
2031  N->getOperand(17),
2032  N->getOperand(18),
2033  };
2034  SDLoc DL(N);
2035  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2036  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2037  }
2038  }
2039 
2041 }
2042 
2043 static bool
2044 FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg,
2045  SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG) {
2046  const R600InstrInfo *TII =
2047  static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
2048  if (!Src.isMachineOpcode())
2049  return false;
2050  switch (Src.getMachineOpcode()) {
2051  case AMDGPU::FNEG_R600:
2052  if (!Neg.getNode())
2053  return false;
2054  Src = Src.getOperand(0);
2055  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2056  return true;
2057  case AMDGPU::FABS_R600:
2058  if (!Abs.getNode())
2059  return false;
2060  Src = Src.getOperand(0);
2061  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2062  return true;
2063  case AMDGPU::CONST_COPY: {
2064  unsigned Opcode = ParentNode->getMachineOpcode();
2065  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2066 
2067  if (!Sel.getNode())
2068  return false;
2069 
2070  SDValue CstOffset = Src.getOperand(0);
2071  if (ParentNode->getValueType(0).isVector())
2072  return false;
2073 
2074  // Gather constants values
2075  int SrcIndices[] = {
2076  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2077  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2078  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
2079  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2080  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2081  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2082  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2083  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2084  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2085  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2086  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2087  };
2088  std::vector<unsigned> Consts;
2089  for (int OtherSrcIdx : SrcIndices) {
2090  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2091  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2092  continue;
2093  if (HasDst) {
2094  OtherSrcIdx--;
2095  OtherSelIdx--;
2096  }
2097  if (RegisterSDNode *Reg =
2098  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2099  if (Reg->getReg() == AMDGPU::ALU_CONST) {
2100  ConstantSDNode *Cst
2101  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2102  Consts.push_back(Cst->getZExtValue());
2103  }
2104  }
2105  }
2106 
2107  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2108  Consts.push_back(Cst->getZExtValue());
2109  if (!TII->fitsConstReadLimitations(Consts)) {
2110  return false;
2111  }
2112 
2113  Sel = CstOffset;
2114  Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2115  return true;
2116  }
2117  case AMDGPU::MOV_IMM_I32:
2118  case AMDGPU::MOV_IMM_F32: {
2119  unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2120  uint64_t ImmValue = 0;
2121 
2122 
2123  if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2125  float FloatValue = FPC->getValueAPF().convertToFloat();
2126  if (FloatValue == 0.0) {
2127  ImmReg = AMDGPU::ZERO;
2128  } else if (FloatValue == 0.5) {
2129  ImmReg = AMDGPU::HALF;
2130  } else if (FloatValue == 1.0) {
2131  ImmReg = AMDGPU::ONE;
2132  } else {
2133  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2134  }
2135  } else {
2137  uint64_t Value = C->getZExtValue();
2138  if (Value == 0) {
2139  ImmReg = AMDGPU::ZERO;
2140  } else if (Value == 1) {
2141  ImmReg = AMDGPU::ONE_INT;
2142  } else {
2143  ImmValue = Value;
2144  }
2145  }
2146 
2147  // Check that we aren't already using an immediate.
2148  // XXX: It's possible for an instruction to have more than one
2149  // immediate operand, but this is not supported yet.
2150  if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2151  if (!Imm.getNode())
2152  return false;
2154  assert(C);
2155  if (C->getZExtValue())
2156  return false;
2157  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2158  }
2159  Src = DAG.getRegister(ImmReg, MVT::i32);
2160  return true;
2161  }
2162  default:
2163  return false;
2164  }
2165 }
2166 
2167 
2168 /// \brief Fold the instructions after selecting them
2169 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2170  SelectionDAG &DAG) const {
2171  const R600InstrInfo *TII =
2172  static_cast<const R600InstrInfo *>(DAG.getSubtarget().getInstrInfo());
2173  if (!Node->isMachineOpcode())
2174  return Node;
2175  unsigned Opcode = Node->getMachineOpcode();
2176  SDValue FakeOp;
2177 
2178  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2179 
2180  if (Opcode == AMDGPU::DOT_4) {
2181  int OperandIdx[] = {
2182  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2183  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2184  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2185  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2186  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2187  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2188  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2189  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2190  };
2191  int NegIdx[] = {
2192  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2193  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2194  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2195  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2196  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2197  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2198  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2199  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2200  };
2201  int AbsIdx[] = {
2202  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2203  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2204  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2205  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2206  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2207  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2208  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2209  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2210  };
2211  for (unsigned i = 0; i < 8; i++) {
2212  if (OperandIdx[i] < 0)
2213  return Node;
2214  SDValue &Src = Ops[OperandIdx[i] - 1];
2215  SDValue &Neg = Ops[NegIdx[i] - 1];
2216  SDValue &Abs = Ops[AbsIdx[i] - 1];
2217  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2218  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2219  if (HasDst)
2220  SelIdx--;
2221  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2222  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2223  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2224  }
2225  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2226  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2227  SDValue &Src = Ops[i];
2228  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2229  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2230  }
2231  } else if (Opcode == AMDGPU::CLAMP_R600) {
2232  SDValue Src = Node->getOperand(0);
2233  if (!Src.isMachineOpcode() ||
2234  !TII->hasInstrModifiers(Src.getMachineOpcode()))
2235  return Node;
2236  int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2237  AMDGPU::OpName::clamp);
2238  if (ClampIdx < 0)
2239  return Node;
2240  SDLoc DL(Node);
2241  std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
2242  Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2243  return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2244  Node->getVTList(), Ops);
2245  } else {
2246  if (!TII->hasInstrModifiers(Opcode))
2247  return Node;
2248  int OperandIdx[] = {
2249  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2250  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2251  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2252  };
2253  int NegIdx[] = {
2254  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2255  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2256  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2257  };
2258  int AbsIdx[] = {
2259  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2260  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2261  -1
2262  };
2263  for (unsigned i = 0; i < 3; i++) {
2264  if (OperandIdx[i] < 0)
2265  return Node;
2266  SDValue &Src = Ops[OperandIdx[i] - 1];
2267  SDValue &Neg = Ops[NegIdx[i] - 1];
2268  SDValue FakeAbs;
2269  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2270  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2271  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2272  int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2273  if (HasDst) {
2274  SelIdx--;
2275  ImmIdx--;
2276  }
2277  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2278  SDValue &Imm = Ops[ImmIdx];
2279  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2280  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2281  }
2282  }
2283 
2284  return Node;
2285 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:477
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:450
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
void push_back(const T &Elt)
Definition: SmallVector.h:222
const MachineFunction * getParent() const
getParent - Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:104
SDValue getValue(unsigned R) const
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setImmOperand(MachineInstr *MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
LLVMContext * getContext() const
Definition: SelectionDAG.h:289
SDValue getCopyToReg(SDValue Chain, SDLoc dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:522
AMDGPU specific subclass of TargetSubtarget.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1327
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:554
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
const ConstantFP * getFPImm() const
SmallVector< unsigned, 4 > LiveOuts
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
SDVTList getVTList() const
unsigned createVirtualRegister(const TargetRegisterClass *RegClass)
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void addLiveIn(unsigned Reg, unsigned vreg=0)
addLiveIn - Add the specified register as a live-in.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue getMergeValues(ArrayRef< SDValue > Ops, SDLoc dl)
Create a MERGE_VALUES node from the given operands.
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:210
static bool isVirtualRegister(unsigned Reg)
isVirtualRegister - Return true if the specified register number is in the virtual register namespace...
const TargetSubtargetInfo & getSubtarget() const
Definition: SelectionDAG.h:286
static PointerType * get(Type *ElementType, unsigned AddressSpace)
PointerType::get - This constructs a pointer to an object of the specified type in a numbered address...
Definition: Type.cpp:738
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
SDValue getSelectCC(SDLoc DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:752
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
SDValue getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define OPCODE_IS_NOT_ZERO_INT
const SDValue & getOperand(unsigned Num) const
Address space for local memory.
Definition: AMDGPU.h:112
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
const SDValue & getBasePtr() const
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:357
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
const R600RegisterInfo & getRegisterInfo() const override
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:200
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:371
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:115
SDValue getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:308
#define MO_FLAG_ABS
Definition: R600Defines.h:19
Shift and rotation operations.
Definition: ISDOpcodes.h:332
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:98
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:283
int getLDSNoRetOp(uint16_t Opcode)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APInt.h:33
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
const AMDGPUInstrInfo * getInstrInfo() const override
Reg
All possible values of the reg field in the ModR/M byte.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
unsigned getMachineOpcode() const
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:183
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on a AMDIL target machine.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
const MachineInstrBuilder & addImm(int64_t Val) const
addImm - Add a new immediate operand.
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:110
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:271
bool hasInstrModifiers(unsigned Opcode) const
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:216
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
load Combine Adjacent Loads
unsigned getLocReg() const
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
const AMDGPUSubtarget * Subtarget
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
#define OPCODE_IS_NOT_ZERO
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:191
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:659
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:804
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
EVT getMemoryVT() const
Return the type of the in-memory value.
int64_t getImm() const
R600TargetLowering(TargetMachine &TM, const AMDGPUSubtarget &STI)
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:142
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:284
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:195
PointerType - Class to represent pointers.
Definition: DerivedTypes.h:449
bool isHWTrueValue(SDValue Op) const
UNDEF - An undefined node.
Definition: ISDOpcodes.h:169
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:267
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:436
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:262
SDNode * getNode() const
get the SDNode which holds the desired result
bundle_iterator< MachineInstr, instr_iterator > iterator
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:239
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:157
static ConstantPointerNull * get(PointerType *T)
get() - Static factory methods - Return objects of the specified value
Definition: Constants.cpp:1455
MVT - Machine Value Type.
const SDValue & getOperand(unsigned i) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:41
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
static volatile int One
Definition: InfiniteTest.cpp:9
Address space for constant memory.
Definition: AMDGPU.h:111
Simple binary floating point operators.
Definition: ISDOpcodes.h:237
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isNonTemporal() const
MVT getLocVT() const
static int ConstantAddressBlock(unsigned AddressSpace)
Address space for private memory.
Definition: AMDGPU.h:109
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:780
DebugLoc findDebugLoc(instr_iterator MBBI)
findDebugLoc - find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions...
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:267
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:273
unsigned getSubRegFromChannel(unsigned Channel) const
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
SDValue getCopyFromReg(SDValue Chain, SDLoc dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:547
SDValue getTargetConstant(uint64_t Val, SDLoc DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:436
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
CondCode getSetCCSwappedOperands(CondCode Operation)
getSetCCSwappedOperands - Return the operation corresponding to (Y op X) when given the operation for...
SDValue ScalarizeVectorLoad(SDValue Op, SelectionDAG &DAG) const
Split a vector load into a scalar load of each component.
bool isVolatile() const
SDValue getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, unsigned Alignment, const AAMDNodes &AAInfo=AAMDNodes())
MachineInstrBuilder BuildMI(MachineFunction &MF, DebugLoc DL, const MCInstrDesc &MCID)
BuildMI - Builder interface.
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:416
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static UndefValue * get(Type *T)
get() - Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1473
const APFloat & getValueAPF() const
bool isHWFalseValue(SDValue Op) const
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements)
getVectorVT - Returns the EVT that represents a vector NumElements in length, where each element is o...
Definition: ValueTypes.h:70
MachinePointerInfo - This class contains a discriminated union of information about pointers in memor...
Value * GetUnderlyingObject(Value *V, const DataLayout &DL, unsigned MaxLookup=6)
GetUnderlyingObject - This method strips off any GEP address adjustments and pointer casts from the s...
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachinePointerInfo & getPointerInfo() const
static bool FoldOperand(SDNode *ParentNode, unsigned SrcIdx, SDValue &Src, SDValue &Neg, SDValue &Abs, SDValue &Sel, SDValue &Imm, SelectionDAG &DAG)
REG_SEQUENCE - This variadic instruction is used to form a register that represents a consecutive seq...
Definition: TargetOpcodes.h:82
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:177
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
TokenFactor - This node takes multiple tokens as input and produces a single token result...
Definition: ISDOpcodes.h:50
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
CCState - This class holds information needed while lowering arguments and return values...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:273
bool isInvariant() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:179
MachineOperand class - Representation of each machine instruction operand.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:861
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:548
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
Promote Memory to Register
Definition: Mem2Reg.cpp:58
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isMachineOpcode() const
unsigned getStackWidth(const MachineFunction &MF) const
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
getSetCCInverse - Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operat...
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:576
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const Value * getValue() const
getValue - Return the base address of the memory access.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:342
Interface for the AMDGPU Implementation of the Intrinsic Info class.
op_iterator op_begin() const
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:383
APInt bitcastToAPInt() const
Definition: APFloat.cpp:3084
LLVM_ATTRIBUTE_UNUSED_RESULT std::enable_if< !is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:285
SDValue getNode(unsigned Opcode, SDLoc DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:386
const AMDGPUFrameLowering * getFrameLowering() const override
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:51
void addFlag(MachineInstr *MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:321
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
R600 DAG Lowering interface definition.
unsigned ABIArgOffset
Start of implicit kernel args.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:401
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:518
Information about the stack frame layout on the AMDGPU targets.
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:233
unsigned getOrigArgIndex() const
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
float convertToFloat() const
Definition: APFloat.cpp:3107
op_iterator op_end() const
MachineSDNode * getMachineNode(unsigned Opcode, SDLoc dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
SDValue CombineFMinMaxLegacy(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:272
EVT getValueType() const
Return the ValueType of the referenced return value.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
SDValue getCondCode(ISD::CondCode Cond)
SDValue getConstant(uint64_t Val, SDLoc DL, EVT VT, bool isTarget=false, bool isOpaque=false)
const APFloat & getValueAPF() const
Definition: Constants.h:270
void getOriginalFunctionArgs(SelectionDAG &DAG, const Function *F, const SmallVectorImpl< ISD::InputArg > &Ins, SmallVectorImpl< ISD::InputArg > &OrigIns) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
bool LLVM_ATTRIBUTE_UNUSED_RESULT empty() const
Definition: DenseMap.h:79
unsigned getReg() const
getReg - Returns the register number.
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand)
A convenience function for creating TargetInstrInfo::EXTRACT_SUBREG nodes.
SDValue getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT)
Convert Op, which must be of integer type, to the integer type VT, by either zero-extending or trunca...
virtual const TargetInstrInfo * getInstrInfo() const
LLVM Value Representation.
Definition: Value.h:69
SDValue getRegister(unsigned Reg, EVT VT)
bool isTruncatingStore() const
Return true if the op does a truncation before store.
SDValue getValueType(EVT)
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
Primary interface to the complete machine description for the target machine.
#define MO_FLAG_MASK
Definition: R600Defines.h:20
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:365
unsigned getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getConstantFP(double Val, SDLoc DL, EVT VT, bool isTarget=false)
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:188
unsigned getLocMemOffset() const
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:338
bool isLDSRetInstr(unsigned Opcode) const
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:389
unsigned getAlignment() const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
const MachineInstrBuilder & addReg(unsigned RegNo, unsigned flags=0, unsigned SubReg=0) const
addReg - Add a new virtual register operand...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:506
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
#define T1
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:203
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const
SDValue getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
const AMDGPURegisterInfo * getRegisterInfo() const override
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:110
uint64_t getZExtValue() const
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:225
This class is used to represent ISD::LOAD nodes.