LLVM  4.0.0
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 /// \file
11 /// \brief Custom DAG lowering for R600
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "R600ISelLowering.h"
16 #include "AMDGPUFrameLowering.h"
17 #include "AMDGPUIntrinsicInfo.h"
18 #include "AMDGPUSubtarget.h"
19 #include "R600Defines.h"
20 #include "R600FrameLowering.h"
21 #include "R600InstrInfo.h"
23 #include "Utils/AMDGPUBaseInfo.h"
24 #include "llvm/ADT/APFloat.h"
25 #include "llvm/ADT/APInt.h"
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/DenseMap.h"
28 #include "llvm/ADT/SmallVector.h"
40 #include "llvm/IR/Constants.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/Support/Casting.h"
43 #include "llvm/Support/Compiler.h"
45 #include <cassert>
46 #include <cstdint>
47 #include <iterator>
48 #include <utility>
49 #include <vector>
50 
51 using namespace llvm;
52 
54  const R600Subtarget &STI)
55  : AMDGPUTargetLowering(TM, STI), Gen(STI.getGeneration()) {
56  addRegisterClass(MVT::f32, &AMDGPU::R600_Reg32RegClass);
57  addRegisterClass(MVT::i32, &AMDGPU::R600_Reg32RegClass);
58  addRegisterClass(MVT::v2f32, &AMDGPU::R600_Reg64RegClass);
59  addRegisterClass(MVT::v2i32, &AMDGPU::R600_Reg64RegClass);
60  addRegisterClass(MVT::v4f32, &AMDGPU::R600_Reg128RegClass);
61  addRegisterClass(MVT::v4i32, &AMDGPU::R600_Reg128RegClass);
62 
64 
65  // Legalize loads and stores to the private address space.
69 
70  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
71  // spaces, so it is custom lowered to handle those where it isn't.
72  for (MVT VT : MVT::integer_valuetypes()) {
76 
80 
84  }
85 
86  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
90 
94 
99 
102  // We need to include these since trunc STORES to PRIVATE need
103  // special handling to accommodate RMW
114 
115  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
118 
119  // Set condition code actions
132 
137 
140 
143 
147 
149 
152 
159 
164 
165  // ADD, SUB overflow.
166  // TODO: turn these into Legal?
167  if (Subtarget->hasCARRY())
169 
170  if (Subtarget->hasBORROW())
172 
173  // Expand sign extension of vectors
174  if (!Subtarget->hasBFE())
176 
179 
180  if (!Subtarget->hasBFE())
184 
185  if (!Subtarget->hasBFE())
189 
193 
195 
197 
202 
207 
208  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
209  // to be Legal/Custom in order to avoid library calls.
213 
215 
216  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
217  for (MVT VT : ScalarIntVTs) {
222  }
223 
225 
232 }
233 
235  return static_cast<const R600Subtarget *>(Subtarget);
236 }
237 
238 static inline bool isEOP(MachineBasicBlock::iterator I) {
239  if (std::next(I) == I->getParent()->end())
240  return false;
241  return std::next(I)->getOpcode() == AMDGPU::RETURN;
242 }
243 
246  MachineBasicBlock *BB) const {
247  MachineFunction *MF = BB->getParent();
251 
252  switch (MI.getOpcode()) {
253  default:
254  // Replace LDS_*_RET instruction that don't have any uses with the
255  // equivalent LDS_*_NORET instruction.
256  if (TII->isLDSRetInstr(MI.getOpcode())) {
257  int DstIdx = TII->getOperandIdx(MI.getOpcode(), AMDGPU::OpName::dst);
258  assert(DstIdx != -1);
259  MachineInstrBuilder NewMI;
260  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
261  // LDS_1A2D support and remove this special case.
262  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
263  MI.getOpcode() == AMDGPU::LDS_CMPST_RET)
264  return BB;
265 
266  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
267  TII->get(AMDGPU::getLDSNoRetOp(MI.getOpcode())));
268  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
269  NewMI.addOperand(MI.getOperand(i));
270  }
271  } else {
273  }
274  break;
275  case AMDGPU::CLAMP_R600: {
277  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
278  MI.getOperand(1).getReg());
279  TII->addFlag(*NewMI, 0, MO_FLAG_CLAMP);
280  break;
281  }
282 
283  case AMDGPU::FABS_R600: {
285  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
286  MI.getOperand(1).getReg());
287  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
288  break;
289  }
290 
291  case AMDGPU::FNEG_R600: {
293  *BB, I, AMDGPU::MOV, MI.getOperand(0).getReg(),
294  MI.getOperand(1).getReg());
295  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
296  break;
297  }
298 
299  case AMDGPU::MASK_WRITE: {
300  unsigned maskedRegister = MI.getOperand(0).getReg();
302  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
303  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
304  break;
305  }
306 
307  case AMDGPU::MOV_IMM_F32:
308  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
309  .getFPImm()
310  ->getValueAPF()
311  .bitcastToAPInt()
312  .getZExtValue());
313  break;
314 
315  case AMDGPU::MOV_IMM_I32:
316  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
317  MI.getOperand(1).getImm());
318  break;
319 
320  case AMDGPU::MOV_IMM_GLOBAL_ADDR: {
321  //TODO: Perhaps combine this instruction with the next if possible
322  auto MIB = TII->buildDefaultInstruction(
323  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_LITERAL_X);
324  int Idx = TII->getOperandIdx(*MIB, AMDGPU::OpName::literal);
325  //TODO: Ugh this is rather ugly
326  MIB->getOperand(Idx) = MI.getOperand(1);
327  break;
328  }
329 
330  case AMDGPU::CONST_COPY: {
332  *BB, MI, AMDGPU::MOV, MI.getOperand(0).getReg(), AMDGPU::ALU_CONST);
333  TII->setImmOperand(*NewMI, AMDGPU::OpName::src0_sel,
334  MI.getOperand(1).getImm());
335  break;
336  }
337 
338  case AMDGPU::RAT_WRITE_CACHELESS_32_eg:
339  case AMDGPU::RAT_WRITE_CACHELESS_64_eg:
340  case AMDGPU::RAT_WRITE_CACHELESS_128_eg:
341  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
342  .addOperand(MI.getOperand(0))
343  .addOperand(MI.getOperand(1))
344  .addImm(isEOP(I)); // Set End of program bit
345  break;
346 
347  case AMDGPU::RAT_STORE_TYPED_eg:
348  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
349  .addOperand(MI.getOperand(0))
350  .addOperand(MI.getOperand(1))
351  .addOperand(MI.getOperand(2))
352  .addImm(isEOP(I)); // Set End of program bit
353  break;
354 
355  case AMDGPU::BRANCH:
356  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP))
357  .addOperand(MI.getOperand(0));
358  break;
359 
360  case AMDGPU::BRANCH_COND_f32: {
361  MachineInstr *NewMI =
362  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
363  AMDGPU::PREDICATE_BIT)
364  .addOperand(MI.getOperand(1))
365  .addImm(AMDGPU::PRED_SETNE)
366  .addImm(0); // Flags
367  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
368  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
369  .addOperand(MI.getOperand(0))
370  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
371  break;
372  }
373 
374  case AMDGPU::BRANCH_COND_i32: {
375  MachineInstr *NewMI =
376  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::PRED_X),
377  AMDGPU::PREDICATE_BIT)
378  .addOperand(MI.getOperand(1))
379  .addImm(AMDGPU::PRED_SETNE_INT)
380  .addImm(0); // Flags
381  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
382  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(AMDGPU::JUMP_COND))
383  .addOperand(MI.getOperand(0))
384  .addReg(AMDGPU::PREDICATE_BIT, RegState::Kill);
385  break;
386  }
387 
388  case AMDGPU::EG_ExportSwz:
389  case AMDGPU::R600_ExportSwz: {
390  // Instruction is left unmodified if its not the last one of its type
391  bool isLastInstructionOfItsType = true;
392  unsigned InstExportType = MI.getOperand(1).getImm();
393  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
394  EndBlock = BB->end(); NextExportInst != EndBlock;
395  NextExportInst = std::next(NextExportInst)) {
396  if (NextExportInst->getOpcode() == AMDGPU::EG_ExportSwz ||
397  NextExportInst->getOpcode() == AMDGPU::R600_ExportSwz) {
398  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
399  .getImm();
400  if (CurrentInstExportType == InstExportType) {
401  isLastInstructionOfItsType = false;
402  break;
403  }
404  }
405  }
406  bool EOP = isEOP(I);
407  if (!EOP && !isLastInstructionOfItsType)
408  return BB;
409  unsigned CfInst = (MI.getOpcode() == AMDGPU::EG_ExportSwz) ? 84 : 40;
410  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
411  .addOperand(MI.getOperand(0))
412  .addOperand(MI.getOperand(1))
413  .addOperand(MI.getOperand(2))
414  .addOperand(MI.getOperand(3))
415  .addOperand(MI.getOperand(4))
416  .addOperand(MI.getOperand(5))
417  .addOperand(MI.getOperand(6))
418  .addImm(CfInst)
419  .addImm(EOP);
420  break;
421  }
422  case AMDGPU::RETURN: {
423  return BB;
424  }
425  }
426 
427  MI.eraseFromParent();
428  return BB;
429 }
430 
431 //===----------------------------------------------------------------------===//
432 // Custom DAG Lowering Operations
433 //===----------------------------------------------------------------------===//
434 
438  switch (Op.getOpcode()) {
439  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
440  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
441  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
442  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
443  case ISD::SRA_PARTS:
444  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
445  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
446  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
447  case ISD::FCOS:
448  case ISD::FSIN: return LowerTrig(Op, DAG);
449  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
450  case ISD::STORE: return LowerSTORE(Op, DAG);
451  case ISD::LOAD: {
452  SDValue Result = LowerLOAD(Op, DAG);
453  assert((!Result.getNode() ||
454  Result.getNode()->getNumValues() == 2) &&
455  "Load should return a value and a chain");
456  return Result;
457  }
458 
459  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
460  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
461  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
462  case ISD::INTRINSIC_VOID: {
463  SDValue Chain = Op.getOperand(0);
464  unsigned IntrinsicID =
465  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
466  switch (IntrinsicID) {
467  case AMDGPUIntrinsic::r600_store_swizzle: {
468  SDLoc DL(Op);
469  const SDValue Args[8] = {
470  Chain,
471  Op.getOperand(2), // Export Value
472  Op.getOperand(3), // ArrayBase
473  Op.getOperand(4), // Type
474  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
475  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
476  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
477  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
478  };
479  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
480  }
481 
482  // default for switch(IntrinsicID)
483  default: break;
484  }
485  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
486  break;
487  }
489  unsigned IntrinsicID =
490  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
491  EVT VT = Op.getValueType();
492  SDLoc DL(Op);
493  switch(IntrinsicID) {
494  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
495  case AMDGPUIntrinsic::r600_tex:
496  case AMDGPUIntrinsic::r600_texc: {
497  unsigned TextureOp;
498  switch (IntrinsicID) {
499  case AMDGPUIntrinsic::r600_tex:
500  TextureOp = 0;
501  break;
502  case AMDGPUIntrinsic::r600_texc:
503  TextureOp = 1;
504  break;
505  default:
506  llvm_unreachable("unhandled texture operation");
507  }
508 
509  SDValue TexArgs[19] = {
510  DAG.getConstant(TextureOp, DL, MVT::i32),
511  Op.getOperand(1),
512  DAG.getConstant(0, DL, MVT::i32),
513  DAG.getConstant(1, DL, MVT::i32),
514  DAG.getConstant(2, DL, MVT::i32),
515  DAG.getConstant(3, DL, MVT::i32),
516  Op.getOperand(2),
517  Op.getOperand(3),
518  Op.getOperand(4),
519  DAG.getConstant(0, DL, MVT::i32),
520  DAG.getConstant(1, DL, MVT::i32),
521  DAG.getConstant(2, DL, MVT::i32),
522  DAG.getConstant(3, DL, MVT::i32),
523  Op.getOperand(5),
524  Op.getOperand(6),
525  Op.getOperand(7),
526  Op.getOperand(8),
527  Op.getOperand(9),
528  Op.getOperand(10)
529  };
530  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
531  }
532  case AMDGPUIntrinsic::r600_dot4: {
533  SDValue Args[8] = {
535  DAG.getConstant(0, DL, MVT::i32)),
537  DAG.getConstant(0, DL, MVT::i32)),
539  DAG.getConstant(1, DL, MVT::i32)),
541  DAG.getConstant(1, DL, MVT::i32)),
543  DAG.getConstant(2, DL, MVT::i32)),
545  DAG.getConstant(2, DL, MVT::i32)),
547  DAG.getConstant(3, DL, MVT::i32)),
549  DAG.getConstant(3, DL, MVT::i32))
550  };
551  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
552  }
553 
554  case Intrinsic::r600_implicitarg_ptr: {
557  return DAG.getConstant(ByteOffset, DL, PtrVT);
558  }
559  case Intrinsic::r600_read_ngroups_x:
560  return LowerImplicitParameter(DAG, VT, DL, 0);
561  case Intrinsic::r600_read_ngroups_y:
562  return LowerImplicitParameter(DAG, VT, DL, 1);
563  case Intrinsic::r600_read_ngroups_z:
564  return LowerImplicitParameter(DAG, VT, DL, 2);
565  case Intrinsic::r600_read_global_size_x:
566  return LowerImplicitParameter(DAG, VT, DL, 3);
567  case Intrinsic::r600_read_global_size_y:
568  return LowerImplicitParameter(DAG, VT, DL, 4);
569  case Intrinsic::r600_read_global_size_z:
570  return LowerImplicitParameter(DAG, VT, DL, 5);
571  case Intrinsic::r600_read_local_size_x:
572  return LowerImplicitParameter(DAG, VT, DL, 6);
573  case Intrinsic::r600_read_local_size_y:
574  return LowerImplicitParameter(DAG, VT, DL, 7);
575  case Intrinsic::r600_read_local_size_z:
576  return LowerImplicitParameter(DAG, VT, DL, 8);
577 
578  case Intrinsic::r600_read_tgid_x:
579  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
580  AMDGPU::T1_X, VT);
581  case Intrinsic::r600_read_tgid_y:
582  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
583  AMDGPU::T1_Y, VT);
584  case Intrinsic::r600_read_tgid_z:
585  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
586  AMDGPU::T1_Z, VT);
587  case Intrinsic::r600_read_tidig_x:
588  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
589  AMDGPU::T0_X, VT);
590  case Intrinsic::r600_read_tidig_y:
591  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
592  AMDGPU::T0_Y, VT);
593  case Intrinsic::r600_read_tidig_z:
594  return CreateLiveInRegister(DAG, &AMDGPU::R600_TReg32RegClass,
595  AMDGPU::T0_Z, VT);
596 
597  case Intrinsic::r600_recipsqrt_ieee:
598  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
599 
600  case Intrinsic::r600_recipsqrt_clamped:
601  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
602  }
603 
604  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
605  break;
606  }
607  } // end switch(Op.getOpcode())
608  return SDValue();
609 }
610 
613  SelectionDAG &DAG) const {
614  switch (N->getOpcode()) {
615  default:
617  return;
618  case ISD::FP_TO_UINT:
619  if (N->getValueType(0) == MVT::i1) {
620  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
621  return;
622  }
623  // Since we don't care about out of bounds values we can use FP_TO_SINT for
624  // uints too. The DAGLegalizer code for uint considers some extra cases
625  // which are not necessary here.
627  case ISD::FP_TO_SINT: {
628  if (N->getValueType(0) == MVT::i1) {
629  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
630  return;
631  }
632 
633  SDValue Result;
634  if (expandFP_TO_SINT(N, Result, DAG))
635  Results.push_back(Result);
636  return;
637  }
638  case ISD::SDIVREM: {
639  SDValue Op = SDValue(N, 1);
640  SDValue RES = LowerSDIVREM(Op, DAG);
641  Results.push_back(RES);
642  Results.push_back(RES.getValue(1));
643  break;
644  }
645  case ISD::UDIVREM: {
646  SDValue Op = SDValue(N, 0);
647  LowerUDIVREM64(Op, DAG, Results);
648  break;
649  }
650  }
651 }
652 
653 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
654  SDValue Vector) const {
655  SDLoc DL(Vector);
656  EVT VecVT = Vector.getValueType();
657  EVT EltVT = VecVT.getVectorElementType();
659 
660  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
661  Args.push_back(DAG.getNode(
662  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
663  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
664  }
665 
666  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
667 }
668 
669 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
670  SelectionDAG &DAG) const {
671  SDLoc DL(Op);
672  SDValue Vector = Op.getOperand(0);
673  SDValue Index = Op.getOperand(1);
674 
675  if (isa<ConstantSDNode>(Index) ||
677  return Op;
678 
679  Vector = vectorToVerticalVector(DAG, Vector);
680  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
681  Vector, Index);
682 }
683 
684 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
685  SelectionDAG &DAG) const {
686  SDLoc DL(Op);
687  SDValue Vector = Op.getOperand(0);
688  SDValue Value = Op.getOperand(1);
689  SDValue Index = Op.getOperand(2);
690 
691  if (isa<ConstantSDNode>(Index) ||
693  return Op;
694 
695  Vector = vectorToVerticalVector(DAG, Vector);
696  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
697  Vector, Value, Index);
698  return vectorToVerticalVector(DAG, Insert);
699 }
700 
701 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
702  SDValue Op,
703  SelectionDAG &DAG) const {
704  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
706  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
707 
708  const DataLayout &DL = DAG.getDataLayout();
709  const GlobalValue *GV = GSD->getGlobal();
710  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
711 
712  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
713  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
714 }
715 
716 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
717  // On hw >= R700, COS/SIN input must be between -1. and 1.
718  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
719  EVT VT = Op.getValueType();
720  SDValue Arg = Op.getOperand(0);
721  SDLoc DL(Op);
722 
723  // TODO: Should this propagate fast-math-flags?
724  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
725  DAG.getNode(ISD::FADD, DL, VT,
726  DAG.getNode(ISD::FMUL, DL, VT, Arg,
727  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
728  DAG.getConstantFP(0.5, DL, MVT::f32)));
729  unsigned TrigNode;
730  switch (Op.getOpcode()) {
731  case ISD::FCOS:
732  TrigNode = AMDGPUISD::COS_HW;
733  break;
734  case ISD::FSIN:
735  TrigNode = AMDGPUISD::SIN_HW;
736  break;
737  default:
738  llvm_unreachable("Wrong trig opcode");
739  }
740  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
741  DAG.getNode(ISD::FADD, DL, VT, FractPart,
742  DAG.getConstantFP(-0.5, DL, MVT::f32)));
743  if (Gen >= R600Subtarget::R700)
744  return TrigVal;
745  // On R600 hw, COS/SIN input must be between -Pi and Pi.
746  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
747  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
748 }
749 
750 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
751  SDLoc DL(Op);
752  EVT VT = Op.getValueType();
753 
754  SDValue Lo = Op.getOperand(0);
755  SDValue Hi = Op.getOperand(1);
756  SDValue Shift = Op.getOperand(2);
757  SDValue Zero = DAG.getConstant(0, DL, VT);
758  SDValue One = DAG.getConstant(1, DL, VT);
759 
760  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
761  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
762  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
763  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
764 
765  // The dance around Width1 is necessary for 0 special case.
766  // Without it the CompShift might be 32, producing incorrect results in
767  // Overflow. So we do the shift in two steps, the alternative is to
768  // add a conditional to filter the special case.
769 
770  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
771  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
772 
773  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
774  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
775  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
776 
777  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
778  SDValue LoBig = Zero;
779 
780  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
781  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
782 
783  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
784 }
785 
786 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
787  SDLoc DL(Op);
788  EVT VT = Op.getValueType();
789 
790  SDValue Lo = Op.getOperand(0);
791  SDValue Hi = Op.getOperand(1);
792  SDValue Shift = Op.getOperand(2);
793  SDValue Zero = DAG.getConstant(0, DL, VT);
794  SDValue One = DAG.getConstant(1, DL, VT);
795 
796  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
797 
798  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
799  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
800  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
801  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
802 
803  // The dance around Width1 is necessary for 0 special case.
804  // Without it the CompShift might be 32, producing incorrect results in
805  // Overflow. So we do the shift in two steps, the alternative is to
806  // add a conditional to filter the special case.
807 
808  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
809  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
810 
811  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
812  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
813  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
814 
815  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
816  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
817 
818  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
819  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
820 
821  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
822 }
823 
824 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
825  unsigned mainop, unsigned ovf) const {
826  SDLoc DL(Op);
827  EVT VT = Op.getValueType();
828 
829  SDValue Lo = Op.getOperand(0);
830  SDValue Hi = Op.getOperand(1);
831 
832  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
833  // Extend sign.
834  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
835  DAG.getValueType(MVT::i1));
836 
837  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
838 
839  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
840 }
841 
842 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
843  SDLoc DL(Op);
844  return DAG.getNode(
845  ISD::SETCC,
846  DL,
847  MVT::i1,
848  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
849  DAG.getCondCode(ISD::SETEQ));
850 }
851 
852 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
853  SDLoc DL(Op);
854  return DAG.getNode(
855  ISD::SETCC,
856  DL,
857  MVT::i1,
858  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
859  DAG.getCondCode(ISD::SETEQ));
860 }
861 
862 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
863  const SDLoc &DL,
864  unsigned DwordOffset) const {
865  unsigned ByteOffset = DwordOffset * 4;
866  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
868 
869  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
870  assert(isInt<16>(ByteOffset));
871 
872  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
873  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
875 }
876 
877 bool R600TargetLowering::isZero(SDValue Op) const {
878  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
879  return Cst->isNullValue();
880  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
881  return CstFP->isZero();
882  } else {
883  return false;
884  }
885 }
886 
887 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
888  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
889  return CFP->isExactlyValue(1.0);
890  }
891  return isAllOnesConstant(Op);
892 }
893 
894 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
895  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
896  return CFP->getValueAPF().isZero();
897  }
898  return isNullConstant(Op);
899 }
900 
901 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
902  SDLoc DL(Op);
903  EVT VT = Op.getValueType();
904 
905  SDValue LHS = Op.getOperand(0);
906  SDValue RHS = Op.getOperand(1);
907  SDValue True = Op.getOperand(2);
908  SDValue False = Op.getOperand(3);
909  SDValue CC = Op.getOperand(4);
910  SDValue Temp;
911 
912  if (VT == MVT::f32) {
913  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
914  SDValue MinMax = CombineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
915  if (MinMax)
916  return MinMax;
917  }
918 
919  // LHS and RHS are guaranteed to be the same value type
920  EVT CompareVT = LHS.getValueType();
921 
922  // Check if we can lower this to a native operation.
923 
924  // Try to lower to a SET* instruction:
925  //
926  // SET* can match the following patterns:
927  //
928  // select_cc f32, f32, -1, 0, cc_supported
929  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
930  // select_cc i32, i32, -1, 0, cc_supported
931  //
932 
933  // Move hardware True/False values to the correct operand.
934  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
935  ISD::CondCode InverseCC =
936  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
937  if (isHWTrueValue(False) && isHWFalseValue(True)) {
938  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
939  std::swap(False, True);
940  CC = DAG.getCondCode(InverseCC);
941  } else {
942  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
943  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
944  std::swap(False, True);
945  std::swap(LHS, RHS);
946  CC = DAG.getCondCode(SwapInvCC);
947  }
948  }
949  }
950 
951  if (isHWTrueValue(True) && isHWFalseValue(False) &&
952  (CompareVT == VT || VT == MVT::i32)) {
953  // This can be matched by a SET* instruction.
954  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
955  }
956 
957  // Try to lower to a CND* instruction:
958  //
959  // CND* can match the following patterns:
960  //
961  // select_cc f32, 0.0, f32, f32, cc_supported
962  // select_cc f32, 0.0, i32, i32, cc_supported
963  // select_cc i32, 0, f32, f32, cc_supported
964  // select_cc i32, 0, i32, i32, cc_supported
965  //
966 
967  // Try to move the zero value to the RHS
968  if (isZero(LHS)) {
969  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
970  // Try swapping the operands
971  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
972  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
973  std::swap(LHS, RHS);
974  CC = DAG.getCondCode(CCSwapped);
975  } else {
976  // Try inverting the conditon and then swapping the operands
977  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
978  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
979  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
980  std::swap(True, False);
981  std::swap(LHS, RHS);
982  CC = DAG.getCondCode(CCSwapped);
983  }
984  }
985  }
986  if (isZero(RHS)) {
987  SDValue Cond = LHS;
988  SDValue Zero = RHS;
989  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
990  if (CompareVT != VT) {
991  // Bitcast True / False to the correct types. This will end up being
992  // a nop, but it allows us to define only a single pattern in the
993  // .TD files for each CND* instruction rather than having to have
994  // one pattern for integer True/False and one for fp True/False
995  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
996  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
997  }
998 
999  switch (CCOpcode) {
1000  case ISD::SETONE:
1001  case ISD::SETUNE:
1002  case ISD::SETNE:
1003  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1004  Temp = True;
1005  True = False;
1006  False = Temp;
1007  break;
1008  default:
1009  break;
1010  }
1011  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1012  Cond, Zero,
1013  True, False,
1014  DAG.getCondCode(CCOpcode));
1015  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1016  }
1017 
1018  // If we make it this for it means we have no native instructions to handle
1019  // this SELECT_CC, so we must lower it.
1020  SDValue HWTrue, HWFalse;
1021 
1022  if (CompareVT == MVT::f32) {
1023  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1024  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1025  } else if (CompareVT == MVT::i32) {
1026  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1027  HWFalse = DAG.getConstant(0, DL, CompareVT);
1028  }
1029  else {
1030  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1031  }
1032 
1033  // Lower this unsupported SELECT_CC into a combination of two supported
1034  // SELECT_CC operations.
1035  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1036 
1037  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1038  Cond, HWFalse,
1039  True, False,
1040  DAG.getCondCode(ISD::SETNE));
1041 }
1042 
1043 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1044 /// convert these pointers to a register index. Each register holds
1045 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1046 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1047 /// for indirect addressing.
1048 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1049  unsigned StackWidth,
1050  SelectionDAG &DAG) const {
1051  unsigned SRLPad;
1052  switch(StackWidth) {
1053  case 1:
1054  SRLPad = 2;
1055  break;
1056  case 2:
1057  SRLPad = 3;
1058  break;
1059  case 4:
1060  SRLPad = 4;
1061  break;
1062  default: llvm_unreachable("Invalid stack width");
1063  }
1064 
1065  SDLoc DL(Ptr);
1066  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1067  DAG.getConstant(SRLPad, DL, MVT::i32));
1068 }
1069 
1070 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1071  unsigned ElemIdx,
1072  unsigned &Channel,
1073  unsigned &PtrIncr) const {
1074  switch (StackWidth) {
1075  default:
1076  case 1:
1077  Channel = 0;
1078  if (ElemIdx > 0) {
1079  PtrIncr = 1;
1080  } else {
1081  PtrIncr = 0;
1082  }
1083  break;
1084  case 2:
1085  Channel = ElemIdx % 2;
1086  if (ElemIdx == 2) {
1087  PtrIncr = 1;
1088  } else {
1089  PtrIncr = 0;
1090  }
1091  break;
1092  case 4:
1093  Channel = ElemIdx;
1094  PtrIncr = 0;
1095  break;
1096  }
1097 }
1098 
1099 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1100  SelectionDAG &DAG) const {
1101  SDLoc DL(Store);
1102  //TODO: Who creates the i8 stores?
1103  assert(Store->isTruncatingStore()
1104  || Store->getValue().getValueType() == MVT::i8);
1106 
1107  SDValue Mask;
1108  if (Store->getMemoryVT() == MVT::i8) {
1109  assert(Store->getAlignment() >= 1);
1110  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1111  } else if (Store->getMemoryVT() == MVT::i16) {
1112  assert(Store->getAlignment() >= 2);
1113  Mask = DAG.getConstant(0xffff, DL, MVT::i32);;
1114  } else {
1115  llvm_unreachable("Unsupported private trunc store");
1116  }
1117 
1118  SDValue OldChain = Store->getChain();
1119  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1120  // Skip dummy
1121  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1122  SDValue BasePtr = Store->getBasePtr();
1123  SDValue Offset = Store->getOffset();
1124  EVT MemVT = Store->getMemoryVT();
1125 
1126  SDValue LoadPtr = BasePtr;
1127  if (!Offset.isUndef()) {
1128  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1129  }
1130 
1131  // Get dword location
1132  // TODO: this should be eliminated by the future SHR ptr, 2
1133  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1134  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1135 
1136  // Load dword
1137  // TODO: can we be smarter about machine pointer info?
1138  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());
1139 
1140  Chain = Dst.getValue(1);
1141 
1142  // Get offset in dword
1143  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1144  DAG.getConstant(0x3, DL, MVT::i32));
1145 
1146  // Convert byte offset to bit shift
1147  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1148  DAG.getConstant(3, DL, MVT::i32));
1149 
1150  // TODO: Contrary to the name of the functiom,
1151  // it also handles sub i32 non-truncating stores (like i1)
1152  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1153  Store->getValue());
1154 
1155  // Mask the value to the right type
1156  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1157 
1158  // Shift the value in place
1159  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1160  MaskedValue, ShiftAmt);
1161 
1162  // Shift the mask in place
1163  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1164 
1165  // Invert the mask. NOTE: if we had native ROL instructions we could
1166  // use inverted mask
1167  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1168 
1169  // Cleanup the target bits
1170  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1171 
1172  // Add the new bits
1173  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1174 
1175  // Store dword
1176  // TODO: Can we be smarter about MachinePointerInfo?
1177  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, MachinePointerInfo());
1178 
1179  // If we are part of expanded vector, make our neighbors depend on this store
1180  if (VectorTrunc) {
1181  // Make all other vector elements depend on this store
1182  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1183  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1184  }
1185  return NewStore;
1186 }
1187 
1188 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1189  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1190  unsigned AS = StoreNode->getAddressSpace();
1191 
1192  SDValue Chain = StoreNode->getChain();
1193  SDValue Ptr = StoreNode->getBasePtr();
1194  SDValue Value = StoreNode->getValue();
1195 
1196  EVT VT = Value.getValueType();
1197  EVT MemVT = StoreNode->getMemoryVT();
1198  EVT PtrVT = Ptr.getValueType();
1199 
1200  SDLoc DL(Op);
1201 
1202  // Neither LOCAL nor PRIVATE can do vectors at the moment
1203  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1204  VT.isVector()) {
1205  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && StoreNode->isTruncatingStore()) {
1206  // Add an extra level of chain to isolate this vector
1207  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1208  // TODO: can the chain be replaced without creating a new store?
1209  SDValue NewStore = DAG.getTruncStore(
1210  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1211  MemVT, StoreNode->getAlignment(),
1212  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1213  StoreNode = cast<StoreSDNode>(NewStore);
1214  }
1215 
1216  return scalarizeVectorStore(StoreNode, DAG);
1217  }
1218 
1219  unsigned Align = StoreNode->getAlignment();
1220  if (Align < MemVT.getStoreSize() &&
1221  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1222  return expandUnalignedStore(StoreNode, DAG);
1223  }
1224 
1225  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1226  DAG.getConstant(2, DL, PtrVT));
1227 
1228  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1229  // It is beneficial to create MSKOR here instead of combiner to avoid
1230  // artificial dependencies introduced by RMW
1231  if (StoreNode->isTruncatingStore()) {
1232  assert(VT.bitsLE(MVT::i32));
1233  SDValue MaskConstant;
1234  if (MemVT == MVT::i8) {
1235  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1236  } else {
1237  assert(MemVT == MVT::i16);
1238  assert(StoreNode->getAlignment() >= 2);
1239  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1240  }
1241 
1242  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1243  DAG.getConstant(0x00000003, DL, PtrVT));
1244  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1245  DAG.getConstant(3, DL, VT));
1246 
1247  // Put the mask in correct place
1248  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1249 
1250  // Put the value bits in correct place
1251  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1252  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1253 
1254  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1255  // vector instead.
1256  SDValue Src[4] = {
1257  ShiftedValue,
1258  DAG.getConstant(0, DL, MVT::i32),
1259  DAG.getConstant(0, DL, MVT::i32),
1260  Mask
1261  };
1262  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1263  SDValue Args[3] = { Chain, Input, DWordAddr };
1265  Op->getVTList(), Args, MemVT,
1266  StoreNode->getMemOperand());
1267  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1268  // Convert pointer from byte address to dword address.
1269  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1270 
1271  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1272  llvm_unreachable("Truncated and indexed stores not supported yet");
1273  } else {
1274  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1275  }
1276  return Chain;
1277  }
1278  }
1279 
1280  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1281  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1282  return SDValue();
1283 
1284  if (MemVT.bitsLT(MVT::i32))
1285  return lowerPrivateTruncStore(StoreNode, DAG);
1286 
1287  // Standard i32+ store, tag it with DWORDADDR to note that the address
1288  // has been shifted
1289  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1290  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1291  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1292  }
1293 
1294  // Tagged i32+ stores will be matched by patterns
1295  return SDValue();
1296 }
1297 
1298 // return (512 + (kc_bank << 12)
1299 static int
1301  switch (AddressSpace) {
1303  return 512;
1305  return 512 + 4096;
1307  return 512 + 4096 * 2;
1309  return 512 + 4096 * 3;
1311  return 512 + 4096 * 4;
1313  return 512 + 4096 * 5;
1315  return 512 + 4096 * 6;
1317  return 512 + 4096 * 7;
1319  return 512 + 4096 * 8;
1321  return 512 + 4096 * 9;
1323  return 512 + 4096 * 10;
1325  return 512 + 4096 * 11;
1327  return 512 + 4096 * 12;
1329  return 512 + 4096 * 13;
1331  return 512 + 4096 * 14;
1333  return 512 + 4096 * 15;
1334  default:
1335  return -1;
1336  }
1337 }
1338 
1339 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1340  SelectionDAG &DAG) const {
1341  SDLoc DL(Op);
1342  LoadSDNode *Load = cast<LoadSDNode>(Op);
1343  ISD::LoadExtType ExtType = Load->getExtensionType();
1344  EVT MemVT = Load->getMemoryVT();
1345  assert(Load->getAlignment() >= MemVT.getStoreSize());
1346 
1347  SDValue BasePtr = Load->getBasePtr();
1348  SDValue Chain = Load->getChain();
1349  SDValue Offset = Load->getOffset();
1350 
1351  SDValue LoadPtr = BasePtr;
1352  if (!Offset.isUndef()) {
1353  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1354  }
1355 
1356  // Get dword location
1357  // NOTE: this should be eliminated by the future SHR ptr, 2
1358  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1359  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1360 
1361  // Load dword
1362  // TODO: can we be smarter about machine pointer info?
1363  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, MachinePointerInfo());
1364 
1365  // Get offset within the register.
1366  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1367  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1368 
1369  // Bit offset of target byte (byteIdx * 8).
1370  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1371  DAG.getConstant(3, DL, MVT::i32));
1372 
1373  // Shift to the right.
1374  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1375 
1376  // Eliminate the upper bits by setting them to ...
1377  EVT MemEltVT = MemVT.getScalarType();
1378 
1379  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1380  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1381  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1382  } else { // ... or zeros.
1383  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1384  }
1385 
1386  SDValue Ops[] = {
1387  Ret,
1388  Read.getValue(1) // This should be our output chain
1389  };
1390 
1391  return DAG.getMergeValues(Ops, DL);
1392 }
1393 
1394 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1395  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1396  unsigned AS = LoadNode->getAddressSpace();
1397  EVT MemVT = LoadNode->getMemoryVT();
1398  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1399 
1400  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1401  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1402  return lowerPrivateExtLoad(Op, DAG);
1403  }
1404 
1405  SDLoc DL(Op);
1406  EVT VT = Op.getValueType();
1407  SDValue Chain = LoadNode->getChain();
1408  SDValue Ptr = LoadNode->getBasePtr();
1409 
1410  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1411  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1412  VT.isVector()) {
1413  return scalarizeVectorLoad(LoadNode, DAG);
1414  }
1415 
1416  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1417  if (ConstantBlock > -1 &&
1418  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1419  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1420  SDValue Result;
1421  if (isa<ConstantExpr>(LoadNode->getMemOperand()->getValue()) ||
1422  isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1423  isa<ConstantSDNode>(Ptr)) {
1424  SDValue Slots[4];
1425  for (unsigned i = 0; i < 4; i++) {
1426  // We want Const position encoded with the following formula :
1427  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1428  // const_index is Ptr computed by llvm using an alignment of 16.
1429  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1430  // then div by 4 at the ISel step
1431  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1432  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1433  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1434  }
1435  EVT NewVT = MVT::v4i32;
1436  unsigned NumElements = 4;
1437  if (VT.isVector()) {
1438  NewVT = VT;
1439  NumElements = VT.getVectorNumElements();
1440  }
1441  Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1442  } else {
1443  // non-constant ptr can't be folded, keeps it as a v4f32 load
1444  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1445  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1446  DAG.getConstant(4, DL, MVT::i32)),
1447  DAG.getConstant(LoadNode->getAddressSpace() -
1449  );
1450  }
1451 
1452  if (!VT.isVector()) {
1453  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1454  DAG.getConstant(0, DL, MVT::i32));
1455  }
1456 
1457  SDValue MergedValues[2] = {
1458  Result,
1459  Chain
1460  };
1461  return DAG.getMergeValues(MergedValues, DL);
1462  }
1463 
1464  // For most operations returning SDValue() will result in the node being
1465  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1466  // need to manually expand loads that may be legal in some address spaces and
1467  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1468  // compute shaders, since the data is sign extended when it is uploaded to the
1469  // buffer. However SEXT loads from other address spaces are not supported, so
1470  // we need to expand them here.
1471  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1472  EVT MemVT = LoadNode->getMemoryVT();
1473  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1474  SDValue NewLoad = DAG.getExtLoad(
1475  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1476  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1477  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1478  DAG.getValueType(MemVT));
1479 
1480  SDValue MergedValues[2] = { Res, Chain };
1481  return DAG.getMergeValues(MergedValues, DL);
1482  }
1483 
1484  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1485  return SDValue();
1486  }
1487 
1488  // DWORDADDR ISD marks already shifted address
1489  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1490  assert(VT == MVT::i32);
1491  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1492  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1493  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1494  }
1495  return SDValue();
1496 }
1497 
1498 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1499  SDValue Chain = Op.getOperand(0);
1500  SDValue Cond = Op.getOperand(1);
1501  SDValue Jump = Op.getOperand(2);
1502 
1503  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1504  Chain, Jump, Cond);
1505 }
1506 
1507 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1508  SelectionDAG &DAG) const {
1509  MachineFunction &MF = DAG.getMachineFunction();
1511 
1512  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1513 
1514  unsigned FrameIndex = FIN->getIndex();
1515  unsigned IgnoredFrameReg;
1516  unsigned Offset =
1517  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1518  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1519  Op.getValueType());
1520 }
1521 
1522 /// XXX Only kernel functions are supported, so we can assume for now that
1523 /// every function is a kernel function, but in the future we should use
1524 /// separate calling conventions for kernel and non-kernel functions.
1526  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1527  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1528  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1530  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1531  *DAG.getContext());
1532  MachineFunction &MF = DAG.getMachineFunction();
1533  R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
1534 
1536 
1537  if (AMDGPU::isShader(CallConv)) {
1538  AnalyzeFormalArguments(CCInfo, Ins);
1539  } else {
1540  analyzeFormalArgumentsCompute(CCInfo, Ins);
1541  }
1542 
1543  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1544  CCValAssign &VA = ArgLocs[i];
1545  const ISD::InputArg &In = Ins[i];
1546  EVT VT = In.VT;
1547  EVT MemVT = VA.getLocVT();
1548  if (!VT.isVector() && MemVT.isVector()) {
1549  // Get load source type if scalarized.
1550  MemVT = MemVT.getVectorElementType();
1551  }
1552 
1553  if (AMDGPU::isShader(CallConv)) {
1554  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &AMDGPU::R600_Reg128RegClass);
1555  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1556  InVals.push_back(Register);
1557  continue;
1558  }
1559 
1562 
1563  // i64 isn't a legal type, so the register type used ends up as i32, which
1564  // isn't expected here. It attempts to create this sextload, but it ends up
1565  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1566  // for <1 x i64>.
1567 
1568  // The first 36 bytes of the input buffer contains information about
1569  // thread group and global sizes.
1571  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1572  // FIXME: This should really check the extload type, but the handling of
1573  // extload vector parameters seems to be broken.
1574 
1575  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1576  Ext = ISD::SEXTLOAD;
1577  }
1578 
1579  // Compute the offset from the value.
1580  // XXX - I think PartOffset should give you this, but it seems to give the
1581  // size of the register which isn't useful.
1582 
1583  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1584  unsigned PartOffset = VA.getLocMemOffset();
1585  unsigned Offset = Subtarget->getExplicitKernelArgOffset(MF) + VA.getLocMemOffset();
1586 
1587  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1588  SDValue Arg = DAG.getLoad(
1589  ISD::UNINDEXED, Ext, VT, DL, Chain,
1590  DAG.getConstant(Offset, DL, MVT::i32), DAG.getUNDEF(MVT::i32), PtrInfo,
1591  MemVT, /* Alignment = */ 4, MachineMemOperand::MONonTemporal |
1594 
1595  // 4 is the preferred alignment for the CONSTANT memory space.
1596  InVals.push_back(Arg);
1597  MFI->setABIArgOffset(Offset + MemVT.getStoreSize());
1598  }
1599  return Chain;
1600 }
1601 
1603  EVT VT) const {
1604  if (!VT.isVector())
1605  return MVT::i32;
1607 }
1608 
1610  unsigned AddrSpace,
1611  unsigned Align,
1612  bool *IsFast) const {
1613  if (IsFast)
1614  *IsFast = false;
1615 
1616  if (!VT.isSimple() || VT == MVT::Other)
1617  return false;
1618 
1619  if (VT.bitsLT(MVT::i32))
1620  return false;
1621 
1622  // TODO: This is a rough estimate.
1623  if (IsFast)
1624  *IsFast = true;
1625 
1626  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1627 }
1628 
1630  SelectionDAG &DAG, SDValue VectorEntry,
1631  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1632  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1633  assert(RemapSwizzle.empty());
1634  SDValue NewBldVec[4] = {
1635  VectorEntry.getOperand(0),
1636  VectorEntry.getOperand(1),
1637  VectorEntry.getOperand(2),
1638  VectorEntry.getOperand(3)
1639  };
1640 
1641  for (unsigned i = 0; i < 4; i++) {
1642  if (NewBldVec[i].isUndef())
1643  // We mask write here to teach later passes that the ith element of this
1644  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1645  // break false dependencies and additionnaly make assembly easier to read.
1646  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1647  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1648  if (C->isZero()) {
1649  RemapSwizzle[i] = 4; // SEL_0
1650  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1651  } else if (C->isExactlyValue(1.0)) {
1652  RemapSwizzle[i] = 5; // SEL_1
1653  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1654  }
1655  }
1656 
1657  if (NewBldVec[i].isUndef())
1658  continue;
1659  for (unsigned j = 0; j < i; j++) {
1660  if (NewBldVec[i] == NewBldVec[j]) {
1661  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1662  RemapSwizzle[i] = j;
1663  break;
1664  }
1665  }
1666  }
1667 
1668  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1669  NewBldVec);
1670 }
1671 
1673  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1674  assert(VectorEntry.getOpcode() == ISD::BUILD_VECTOR);
1675  assert(RemapSwizzle.empty());
1676  SDValue NewBldVec[4] = {
1677  VectorEntry.getOperand(0),
1678  VectorEntry.getOperand(1),
1679  VectorEntry.getOperand(2),
1680  VectorEntry.getOperand(3)
1681  };
1682  bool isUnmovable[4] = { false, false, false, false };
1683  for (unsigned i = 0; i < 4; i++) {
1684  RemapSwizzle[i] = i;
1685  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1686  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1687  ->getZExtValue();
1688  if (i == Idx)
1689  isUnmovable[Idx] = true;
1690  }
1691  }
1692 
1693  for (unsigned i = 0; i < 4; i++) {
1694  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1695  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1696  ->getZExtValue();
1697  if (isUnmovable[Idx])
1698  continue;
1699  // Swap i and Idx
1700  std::swap(NewBldVec[Idx], NewBldVec[i]);
1701  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1702  break;
1703  }
1704  }
1705 
1706  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1707  NewBldVec);
1708 }
1709 
1710 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1711  SelectionDAG &DAG,
1712  const SDLoc &DL) const {
1713  assert(BuildVector.getOpcode() == ISD::BUILD_VECTOR);
1714  // Old -> New swizzle values
1715  DenseMap<unsigned, unsigned> SwizzleRemap;
1716 
1717  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1718  for (unsigned i = 0; i < 4; i++) {
1719  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1720  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1721  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1722  }
1723 
1724  SwizzleRemap.clear();
1725  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1726  for (unsigned i = 0; i < 4; i++) {
1727  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1728  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1729  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1730  }
1731 
1732  return BuildVector;
1733 }
1734 
1735 //===----------------------------------------------------------------------===//
1736 // Custom DAG Optimizations
1737 //===----------------------------------------------------------------------===//
1738 
1740  DAGCombinerInfo &DCI) const {
1741  SelectionDAG &DAG = DCI.DAG;
1742  SDLoc DL(N);
1743 
1744  switch (N->getOpcode()) {
1745  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1746  case ISD::FP_ROUND: {
1747  SDValue Arg = N->getOperand(0);
1748  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1749  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1750  Arg.getOperand(0));
1751  }
1752  break;
1753  }
1754 
1755  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1756  // (i32 select_cc f32, f32, -1, 0 cc)
1757  //
1758  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1759  // this to one of the SET*_DX10 instructions.
1760  case ISD::FP_TO_SINT: {
1761  SDValue FNeg = N->getOperand(0);
1762  if (FNeg.getOpcode() != ISD::FNEG) {
1763  return SDValue();
1764  }
1765  SDValue SelectCC = FNeg.getOperand(0);
1766  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1767  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1768  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1769  !isHWTrueValue(SelectCC.getOperand(2)) ||
1770  !isHWFalseValue(SelectCC.getOperand(3))) {
1771  return SDValue();
1772  }
1773 
1774  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1775  SelectCC.getOperand(0), // LHS
1776  SelectCC.getOperand(1), // RHS
1777  DAG.getConstant(-1, DL, MVT::i32), // True
1778  DAG.getConstant(0, DL, MVT::i32), // False
1779  SelectCC.getOperand(4)); // CC
1780 
1781  break;
1782  }
1783 
1784  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1785  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1786  case ISD::INSERT_VECTOR_ELT: {
1787  SDValue InVec = N->getOperand(0);
1788  SDValue InVal = N->getOperand(1);
1789  SDValue EltNo = N->getOperand(2);
1790 
1791  // If the inserted element is an UNDEF, just use the input vector.
1792  if (InVal.isUndef())
1793  return InVec;
1794 
1795  EVT VT = InVec.getValueType();
1796 
1797  // If we can't generate a legal BUILD_VECTOR, exit
1799  return SDValue();
1800 
1801  // Check that we know which element is being inserted
1802  if (!isa<ConstantSDNode>(EltNo))
1803  return SDValue();
1804  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1805 
1806  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1807  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1808  // vector elements.
1810  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1811  Ops.append(InVec.getNode()->op_begin(),
1812  InVec.getNode()->op_end());
1813  } else if (InVec.isUndef()) {
1814  unsigned NElts = VT.getVectorNumElements();
1815  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1816  } else {
1817  return SDValue();
1818  }
1819 
1820  // Insert the element
1821  if (Elt < Ops.size()) {
1822  // All the operands of BUILD_VECTOR must have the same type;
1823  // we enforce that here.
1824  EVT OpVT = Ops[0].getValueType();
1825  if (InVal.getValueType() != OpVT)
1826  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1827  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1828  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1829  Ops[Elt] = InVal;
1830  }
1831 
1832  // Return the new vector
1833  return DAG.getBuildVector(VT, DL, Ops);
1834  }
1835 
1836  // Extract_vec (Build_vector) generated by custom lowering
1837  // also needs to be customly combined
1838  case ISD::EXTRACT_VECTOR_ELT: {
1839  SDValue Arg = N->getOperand(0);
1840  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1841  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1842  unsigned Element = Const->getZExtValue();
1843  return Arg->getOperand(Element);
1844  }
1845  }
1846  if (Arg.getOpcode() == ISD::BITCAST &&
1847  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1850  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1851  unsigned Element = Const->getZExtValue();
1852  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1853  Arg->getOperand(0).getOperand(Element));
1854  }
1855  }
1856  break;
1857  }
1858 
1859  case ISD::SELECT_CC: {
1860  // Try common optimizations
1862  return Ret;
1863 
1864  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1865  // selectcc x, y, a, b, inv(cc)
1866  //
1867  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1868  // selectcc x, y, a, b, cc
1869  SDValue LHS = N->getOperand(0);
1870  if (LHS.getOpcode() != ISD::SELECT_CC) {
1871  return SDValue();
1872  }
1873 
1874  SDValue RHS = N->getOperand(1);
1875  SDValue True = N->getOperand(2);
1876  SDValue False = N->getOperand(3);
1877  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1878 
1879  if (LHS.getOperand(2).getNode() != True.getNode() ||
1880  LHS.getOperand(3).getNode() != False.getNode() ||
1881  RHS.getNode() != False.getNode()) {
1882  return SDValue();
1883  }
1884 
1885  switch (NCC) {
1886  default: return SDValue();
1887  case ISD::SETNE: return LHS;
1888  case ISD::SETEQ: {
1889  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1890  LHSCC = ISD::getSetCCInverse(LHSCC,
1891  LHS.getOperand(0).getValueType().isInteger());
1892  if (DCI.isBeforeLegalizeOps() ||
1894  return DAG.getSelectCC(DL,
1895  LHS.getOperand(0),
1896  LHS.getOperand(1),
1897  LHS.getOperand(2),
1898  LHS.getOperand(3),
1899  LHSCC);
1900  break;
1901  }
1902  }
1903  return SDValue();
1904  }
1905 
1906  case AMDGPUISD::R600_EXPORT: {
1907  SDValue Arg = N->getOperand(1);
1908  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1909  break;
1910 
1911  SDValue NewArgs[8] = {
1912  N->getOperand(0), // Chain
1913  SDValue(),
1914  N->getOperand(2), // ArrayBase
1915  N->getOperand(3), // Type
1916  N->getOperand(4), // SWZ_X
1917  N->getOperand(5), // SWZ_Y
1918  N->getOperand(6), // SWZ_Z
1919  N->getOperand(7) // SWZ_W
1920  };
1921  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1922  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1923  }
1924  case AMDGPUISD::TEXTURE_FETCH: {
1925  SDValue Arg = N->getOperand(1);
1926  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1927  break;
1928 
1929  SDValue NewArgs[19] = {
1930  N->getOperand(0),
1931  N->getOperand(1),
1932  N->getOperand(2),
1933  N->getOperand(3),
1934  N->getOperand(4),
1935  N->getOperand(5),
1936  N->getOperand(6),
1937  N->getOperand(7),
1938  N->getOperand(8),
1939  N->getOperand(9),
1940  N->getOperand(10),
1941  N->getOperand(11),
1942  N->getOperand(12),
1943  N->getOperand(13),
1944  N->getOperand(14),
1945  N->getOperand(15),
1946  N->getOperand(16),
1947  N->getOperand(17),
1948  N->getOperand(18),
1949  };
1950  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1951  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1952  }
1953  default: break;
1954  }
1955 
1957 }
1958 
1959 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1960  SDValue &Src, SDValue &Neg, SDValue &Abs,
1961  SDValue &Sel, SDValue &Imm,
1962  SelectionDAG &DAG) const {
1964  if (!Src.isMachineOpcode())
1965  return false;
1966 
1967  switch (Src.getMachineOpcode()) {
1968  case AMDGPU::FNEG_R600:
1969  if (!Neg.getNode())
1970  return false;
1971  Src = Src.getOperand(0);
1972  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1973  return true;
1974  case AMDGPU::FABS_R600:
1975  if (!Abs.getNode())
1976  return false;
1977  Src = Src.getOperand(0);
1978  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1979  return true;
1980  case AMDGPU::CONST_COPY: {
1981  unsigned Opcode = ParentNode->getMachineOpcode();
1982  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
1983 
1984  if (!Sel.getNode())
1985  return false;
1986 
1987  SDValue CstOffset = Src.getOperand(0);
1988  if (ParentNode->getValueType(0).isVector())
1989  return false;
1990 
1991  // Gather constants values
1992  int SrcIndices[] = {
1993  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
1994  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
1995  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2),
1996  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
1997  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
1998  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
1999  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2000  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2001  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2002  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2003  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2004  };
2005  std::vector<unsigned> Consts;
2006  for (int OtherSrcIdx : SrcIndices) {
2007  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2008  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2009  continue;
2010  if (HasDst) {
2011  OtherSrcIdx--;
2012  OtherSelIdx--;
2013  }
2014  if (RegisterSDNode *Reg =
2015  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2016  if (Reg->getReg() == AMDGPU::ALU_CONST) {
2017  ConstantSDNode *Cst
2018  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2019  Consts.push_back(Cst->getZExtValue());
2020  }
2021  }
2022  }
2023 
2024  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2025  Consts.push_back(Cst->getZExtValue());
2026  if (!TII->fitsConstReadLimitations(Consts)) {
2027  return false;
2028  }
2029 
2030  Sel = CstOffset;
2031  Src = DAG.getRegister(AMDGPU::ALU_CONST, MVT::f32);
2032  return true;
2033  }
2034  case AMDGPU::MOV_IMM_GLOBAL_ADDR:
2035  // Check if the Imm slot is used. Taken from below.
2036  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2037  return false;
2038  Imm = Src.getOperand(0);
2039  Src = DAG.getRegister(AMDGPU::ALU_LITERAL_X, MVT::i32);
2040  return true;
2041  case AMDGPU::MOV_IMM_I32:
2042  case AMDGPU::MOV_IMM_F32: {
2043  unsigned ImmReg = AMDGPU::ALU_LITERAL_X;
2044  uint64_t ImmValue = 0;
2045 
2046  if (Src.getMachineOpcode() == AMDGPU::MOV_IMM_F32) {
2048  float FloatValue = FPC->getValueAPF().convertToFloat();
2049  if (FloatValue == 0.0) {
2050  ImmReg = AMDGPU::ZERO;
2051  } else if (FloatValue == 0.5) {
2052  ImmReg = AMDGPU::HALF;
2053  } else if (FloatValue == 1.0) {
2054  ImmReg = AMDGPU::ONE;
2055  } else {
2056  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2057  }
2058  } else {
2060  uint64_t Value = C->getZExtValue();
2061  if (Value == 0) {
2062  ImmReg = AMDGPU::ZERO;
2063  } else if (Value == 1) {
2064  ImmReg = AMDGPU::ONE_INT;
2065  } else {
2066  ImmValue = Value;
2067  }
2068  }
2069 
2070  // Check that we aren't already using an immediate.
2071  // XXX: It's possible for an instruction to have more than one
2072  // immediate operand, but this is not supported yet.
2073  if (ImmReg == AMDGPU::ALU_LITERAL_X) {
2074  if (!Imm.getNode())
2075  return false;
2077  assert(C);
2078  if (C->getZExtValue())
2079  return false;
2080  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2081  }
2082  Src = DAG.getRegister(ImmReg, MVT::i32);
2083  return true;
2084  }
2085  default:
2086  return false;
2087  }
2088 }
2089 
2090 /// \brief Fold the instructions after selecting them
2091 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2092  SelectionDAG &DAG) const {
2093  const R600InstrInfo *TII = getSubtarget()->getInstrInfo();
2094  if (!Node->isMachineOpcode())
2095  return Node;
2096 
2097  unsigned Opcode = Node->getMachineOpcode();
2098  SDValue FakeOp;
2099 
2100  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2101 
2102  if (Opcode == AMDGPU::DOT_4) {
2103  int OperandIdx[] = {
2104  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_X),
2105  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Y),
2106  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_Z),
2107  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_W),
2108  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_X),
2109  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Y),
2110  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_Z),
2111  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_W)
2112  };
2113  int NegIdx[] = {
2114  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_X),
2115  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Y),
2116  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_Z),
2117  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg_W),
2118  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_X),
2119  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Y),
2120  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_Z),
2121  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg_W)
2122  };
2123  int AbsIdx[] = {
2124  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_X),
2125  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Y),
2126  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_Z),
2127  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs_W),
2128  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_X),
2129  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Y),
2130  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_Z),
2131  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs_W)
2132  };
2133  for (unsigned i = 0; i < 8; i++) {
2134  if (OperandIdx[i] < 0)
2135  return Node;
2136  SDValue &Src = Ops[OperandIdx[i] - 1];
2137  SDValue &Neg = Ops[NegIdx[i] - 1];
2138  SDValue &Abs = Ops[AbsIdx[i] - 1];
2139  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2140  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2141  if (HasDst)
2142  SelIdx--;
2143  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2144  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2145  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2146  }
2147  } else if (Opcode == AMDGPU::REG_SEQUENCE) {
2148  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2149  SDValue &Src = Ops[i];
2150  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2151  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2152  }
2153  } else if (Opcode == AMDGPU::CLAMP_R600) {
2154  SDValue Src = Node->getOperand(0);
2155  if (!Src.isMachineOpcode() ||
2156  !TII->hasInstrModifiers(Src.getMachineOpcode()))
2157  return Node;
2158  int ClampIdx = TII->getOperandIdx(Src.getMachineOpcode(),
2159  AMDGPU::OpName::clamp);
2160  if (ClampIdx < 0)
2161  return Node;
2162  SDLoc DL(Node);
2163  std::vector<SDValue> Ops(Src->op_begin(), Src->op_end());
2164  Ops[ClampIdx - 1] = DAG.getTargetConstant(1, DL, MVT::i32);
2165  return DAG.getMachineNode(Src.getMachineOpcode(), DL,
2166  Node->getVTList(), Ops);
2167  } else {
2168  if (!TII->hasInstrModifiers(Opcode))
2169  return Node;
2170  int OperandIdx[] = {
2171  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0),
2172  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1),
2173  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2)
2174  };
2175  int NegIdx[] = {
2176  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_neg),
2177  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_neg),
2178  TII->getOperandIdx(Opcode, AMDGPU::OpName::src2_neg)
2179  };
2180  int AbsIdx[] = {
2181  TII->getOperandIdx(Opcode, AMDGPU::OpName::src0_abs),
2182  TII->getOperandIdx(Opcode, AMDGPU::OpName::src1_abs),
2183  -1
2184  };
2185  for (unsigned i = 0; i < 3; i++) {
2186  if (OperandIdx[i] < 0)
2187  return Node;
2188  SDValue &Src = Ops[OperandIdx[i] - 1];
2189  SDValue &Neg = Ops[NegIdx[i] - 1];
2190  SDValue FakeAbs;
2191  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2192  bool HasDst = TII->getOperandIdx(Opcode, AMDGPU::OpName::dst) > -1;
2193  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2194  int ImmIdx = TII->getOperandIdx(Opcode, AMDGPU::OpName::literal);
2195  if (HasDst) {
2196  SelIdx--;
2197  ImmIdx--;
2198  }
2199  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2200  SDValue &Imm = Ops[ImmIdx];
2201  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2202  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2203  }
2204  }
2205 
2206  return Node;
2207 }
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:500
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:467
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:102
SDValue getValue(unsigned R) const
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Flags getFlags() const
Return the raw flags of the source value,.
LLVMContext * getContext() const
Definition: SelectionDAG.h:333
AMDGPU specific subclass of TargetSubtarget.
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1309
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
size_t i
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:572
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
const ConstantFP * getFPImm() const
void AnalyzeFormalArguments(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
float convertToFloat() const
Definition: APFloat.h:1014
SDVTList getVTList() const
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:219
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const GlobalValue * getGlobal() const
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:655
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Function Alias Analysis Results
Type * getTypeForEVT(LLVMContext &Context) const
getTypeForEVT - This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
unsigned getNumOperands() const
Return the number of values used by this operation.
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:271
const SDValue & getOperand(unsigned Num) const
Address space for local memory.
Definition: AMDGPU.h:141
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT TVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
const SDValue & getBasePtr() const
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:369
bool bitsLT(EVT VT) const
bitsLT - Return true if this has less bits than VT.
Definition: ValueTypes.h:212
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
static MCDisassembler::DecodeStatus addOperand(MCInst &Inst, const MCOperand &Opnd)
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
SDValue CombineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:388
bool isVector() const
isVector - Return true if this is a vector value type.
Definition: ValueTypes.h:133
Pointer to the start of the shader's constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:440
#define MO_FLAG_ABS
Definition: R600Defines.h:19
Shift and rotation operations.
Definition: ISDOpcodes.h:344
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:327
int getLDSNoRetOp(uint16_t Opcode)
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:32
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
virtual SDValue CreateLiveInRegister(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
Helper function that adds Reg to the LiveIn list of the DAG's MachineFunction.
unsigned getAddressSpace() const
Reg
All possible values of the reg field in the ModR/M byte.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
APInt bitcastToAPInt() const
Definition: APFloat.h:1012
The memory access is dereferenceable (i.e., doesn't trap).
EVT getScalarType() const
getScalarType - If this is a vector type, return the element type, otherwise return this...
Definition: ValueTypes.h:233
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
unsigned getMachineOpcode() const
bool bitsGE(EVT VT) const
bitsGE - Return true if this has no less bits than VT.
Definition: ValueTypes.h:206
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
bool isInteger() const
isInteger - Return true if this is an integer, or a vector integer type.
Definition: ValueTypes.h:123
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
unsigned getNumOperands() const
Access to explicit operands of the instruction.
Definition: MachineInstr.h:277
bool hasInstrModifiers(unsigned Opcode) const
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
EVT getVectorElementType() const
getVectorElementType - Given a vector type, return the type of each element.
Definition: ValueTypes.h:239
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
unsigned getLocReg() const
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
const AMDGPUSubtarget * Subtarget
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
#define MO_FLAG_NEG
Definition: R600Defines.h:18
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
const SDValue & getBasePtr() const
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:737
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:842
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
EVT getMemoryVT() const
Return the type of the in-memory value.
int64_t getImm() const
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:487
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:151
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:328
bool bitsLE(EVT VT) const
bitsLE - Return true if this has no more bits than VT.
Definition: ValueTypes.h:218
Class to represent pointers.
Definition: DerivedTypes.h:443
This class is used to represent ISD::STORE nodes.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:273
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:453
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:274
const R600FrameLowering * getFrameLowering() const override
SDNode * getNode() const
get the SDNode which holds the desired result
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:262
unsigned getStoreSize() const
getStoreSize - Return the number of bytes overwritten by a store of the specified value type...
Definition: ValueTypes.h:268
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:166
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1323
MVT - Machine Value Type.
bool isShader(CallingConv::ID cc)
const SDValue & getOperand(unsigned i) const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:48
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Address space for constant memory (VTX2)
Definition: AMDGPU.h:140
Simple binary floating point operators.
Definition: ISDOpcodes.h:246
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, bool Vol=false, bool ReadMem=true, bool WriteMem=true, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
MVT getLocVT() const
static int ConstantAddressBlock(unsigned AddressSpace)
Address space for private memory.
Definition: AMDGPU.h:138
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:818
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:279
This file contains the declarations for the subclasses of Constant, which represent the different fla...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:279
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:228
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
This file declares a class to represent arbitrary precision floating point values and provide a varie...
uint32_t Offset
const R600InstrInfo * getInstrInfo() const override
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
unsigned getOpcode() const
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
const SDValue & getValue() const
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:392
EVT - Extended Value Type.
Definition: ValueTypes.h:31
static UndefValue * get(Type *T)
Static factory methods - Return an 'undef' object of the specified type.
Definition: Constants.cpp:1337
const APFloat & getValueAPF() const
This class contains a discriminated union of information about pointers in memory operands...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
#define MO_FLAG_CLAMP
Definition: R600Defines.h:17
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
const MachinePointerInfo & getPointerInfo() const
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE instructions.
const SDValue & getOffset() const
bool bitsGT(EVT VT) const
bitsGT - Return true if this has more bits than VT.
Definition: ValueTypes.h:200
const R600RegisterInfo * getRegisterInfo() const override
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
CCState - This class holds information needed while lowering arguments and return values...
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:285
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:166
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:843
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:639
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:22
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:566
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
const SDValue & getChain() const
Promote Memory to Register
Definition: Mem2Reg.cpp:100
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const R600Subtarget * getSubtarget() const
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
bool isMachineOpcode() const
unsigned getStackWidth(const MachineFunction &MF) const
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
static GCRegistry::Add< ShadowStackGC > C("shadow-stack","Very portable GC for uncooperative code generators")
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:586
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
const Value * getValue() const
Return the base address of the memory access.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:354
Interface for the AMDGPU Implementation of the Intrinsic Info class.
op_iterator op_begin() const
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:403
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getAddressSpace() const
Return the address space for the associated pointer.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
Representation of each machine instruction.
Definition: MachineInstr.h:52
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:333
bool isUndef() const
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:610
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
Address space for indirect addressible parameter memory (VTX1)
Definition: AMDGPU.h:145
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:418
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:536
unsigned getSizeInBits() const
getSizeInBits - Return the size of the specified value type in bits.
Definition: ValueTypes.h:256
unsigned getOrigArgIndex() const
#define I(x, y, z)
Definition: MD5.cpp:54
#define N
LLVM_ATTRIBUTE_ALWAYS_INLINE size_type size() const
Definition: SmallVector.h:135
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
The memory access always returns the same value (or traps).
op_iterator op_end() const
unsigned getExplicitKernelArgOffset(const MachineFunction &MF) const
Returns the offset in bytes from the start of the input buffer of the first explicit kernel argument...
const SDValue & getOffset() const
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:287
static volatile int Zero
EVT getValueType() const
Return the ValueType of the referenced return value.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
SDValue getCondCode(ISD::CondCode Cond)
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:80
const APFloat & getValueAPF() const
Definition: Constants.h:300
void setABIArgOffset(unsigned NewOffset)
unsigned getReg() const
getReg - Returns the register number.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
bool isSimple() const
isSimple - Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:107
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:71
SDValue getRegister(unsigned Reg, EVT VT)
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
bool isTruncatingStore() const
Return true if the op does a truncation before store.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:830
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:239
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:81
static bool isEOP(MachineBasicBlock::iterator I)
const MachineInstrBuilder & addOperand(const MachineOperand &MO) const
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.Val alone...
Primary interface to the complete machine description for the target machine.
IRTranslator LLVM IR MI
#define MO_FLAG_MASK
Definition: R600Defines.h:20
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:377
uint32_t getImplicitParameterOffset(const AMDGPUMachineFunction *MFI, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
unsigned getLocMemOffset() const
Conversion operators.
Definition: ISDOpcodes.h:397
bool fitsConstReadLimitations(const std::vector< MachineInstr * > &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
int * Ptr
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:381
bool isLDSRetInstr(unsigned Opcode) const
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT SrcTy)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:406
unsigned getAlignment() const
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
FNEG, FABS, FSQRT, FSIN, FCOS, FPOWI, FPOW, FLOG, FLOG2, FLOG10, FEXP, FEXP2, FCEIL, FTRUNC, FRINT, FNEARBYINT, FROUND, FFLOOR - Perform various unary floating point operations.
Definition: ISDOpcodes.h:516
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
EVT changeVectorElementTypeToInteger() const
changeVectorElementTypeToInteger - Return a vector with the same number of elements as this vector...
Definition: ValueTypes.h:80
MVT getSimpleVT() const
getSimpleVT - Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:226
#define MO_FLAG_PUSH
Definition: R600Defines.h:21
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:529
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:139
uint64_t getZExtValue() const
unsigned getVectorNumElements() const
getVectorNumElements - Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:248
This class is used to represent ISD::LOAD nodes.