LLVM  9.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPUFrameLowering.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600FrameLowering.h"
19 #include "R600InstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
44 #include <cassert>
45 #include <cstdint>
46 #include <iterator>
47 #include <utility>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #include "R600GenCallingConv.inc"
53 
55  const R600Subtarget &STI)
56  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
57  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
58  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
59  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
60  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
61  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
62  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
63 
65 
66  // Legalize loads and stores to the private address space.
70 
71  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
72  // spaces, so it is custom lowered to handle those where it isn't.
73  for (MVT VT : MVT::integer_valuetypes()) {
77 
81 
85  }
86 
87  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
91 
95 
100 
103  // We need to include these since trunc STORES to PRIVATE need
104  // special handling to accommodate RMW
115 
116  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
119 
120  // Set condition code actions
133 
138 
141 
144 
148 
150 
155 
158 
165 
170 
171  // ADD, SUB overflow.
172  // TODO: turn these into Legal?
173  if (Subtarget->hasCARRY())
175 
176  if (Subtarget->hasBORROW())
178 
179  // Expand sign extension of vectors
180  if (!Subtarget->hasBFE())
182 
185 
186  if (!Subtarget->hasBFE())
190 
191  if (!Subtarget->hasBFE())
195 
199 
201 
203 
208 
213 
214  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
215  // to be Legal/Custom in order to avoid library calls.
219 
220  if (!Subtarget->hasFMA()) {
223  }
224 
225  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
226  // need it for R600.
227  if (!Subtarget->hasFP32Denormals())
229 
230  if (!Subtarget->hasBFI()) {
231  // fcopysign can be done in a single instruction with BFI.
234  }
235 
236  if (!Subtarget->hasBCNT(32))
238 
239  if (!Subtarget->hasBCNT(64))
241 
242  if (Subtarget->hasFFBH())
244 
245  if (Subtarget->hasFFBL())
247 
248  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
249  // need it for R600.
250  if (Subtarget->hasBFE())
251  setHasExtractBitsInsn(true);
252 
254 
255  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
256  for (MVT VT : ScalarIntVTs) {
261  }
262 
263  // LLVM will expand these to atomic_cmp_swap(0)
264  // and atomic_swap, respectively.
267 
268  // We need to custom lower some of the intrinsics
271 
273 
280 }
281 
282 static inline bool isEOP(MachineBasicBlock::iterator I) {
283  if (std::next(I) == I->getParent()->end())
284  return false;
285  return std::next(I)->getOpcode() == R600::RETURN;
286 }
287 
290  MachineBasicBlock *BB) const {
291  MachineFunction *MF = BB->getParent();
294  const R600InstrInfo *TII = Subtarget->getInstrInfo();
295 
296  switch (MI.getOpcode()) {
297  default:
298  // Replace LDS_*_RET instruction that don't have any uses with the
299  // equivalent LDS_*_NORET instruction.
300  if (TII->isLDSRetInstr(MI.getOpcode())) {
301  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
302  assert(DstIdx != -1);
303  MachineInstrBuilder NewMI;
304  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
305  // LDS_1A2D support and remove this special case.
306  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
307  MI.getOpcode() == R600::LDS_CMPST_RET)
308  return BB;
309 
310  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
311  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
312  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
313  NewMI.add(MI.getOperand(i));
314  }
315  } else {
317  }
318  break;
319 
320  case R600::FABS_R600: {
322  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
323  MI.getOperand(1).getReg());
324  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
325  break;
326  }
327 
328  case R600::FNEG_R600: {
330  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
331  MI.getOperand(1).getReg());
332  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
333  break;
334  }
335 
336  case R600::MASK_WRITE: {
337  unsigned maskedRegister = MI.getOperand(0).getReg();
339  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
340  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
341  break;
342  }
343 
344  case R600::MOV_IMM_F32:
345  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
346  .getFPImm()
347  ->getValueAPF()
348  .bitcastToAPInt()
349  .getZExtValue());
350  break;
351 
352  case R600::MOV_IMM_I32:
353  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
354  MI.getOperand(1).getImm());
355  break;
356 
357  case R600::MOV_IMM_GLOBAL_ADDR: {
358  //TODO: Perhaps combine this instruction with the next if possible
359  auto MIB = TII->buildDefaultInstruction(
360  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
361  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
362  //TODO: Ugh this is rather ugly
363  MIB->getOperand(Idx) = MI.getOperand(1);
364  break;
365  }
366 
367  case R600::CONST_COPY: {
369  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
370  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
371  MI.getOperand(1).getImm());
372  break;
373  }
374 
375  case R600::RAT_WRITE_CACHELESS_32_eg:
376  case R600::RAT_WRITE_CACHELESS_64_eg:
377  case R600::RAT_WRITE_CACHELESS_128_eg:
378  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
379  .add(MI.getOperand(0))
380  .add(MI.getOperand(1))
381  .addImm(isEOP(I)); // Set End of program bit
382  break;
383 
384  case R600::RAT_STORE_TYPED_eg:
385  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
386  .add(MI.getOperand(0))
387  .add(MI.getOperand(1))
388  .add(MI.getOperand(2))
389  .addImm(isEOP(I)); // Set End of program bit
390  break;
391 
392  case R600::BRANCH:
393  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
394  .add(MI.getOperand(0));
395  break;
396 
397  case R600::BRANCH_COND_f32: {
398  MachineInstr *NewMI =
399  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
400  R600::PREDICATE_BIT)
401  .add(MI.getOperand(1))
402  .addImm(R600::PRED_SETNE)
403  .addImm(0); // Flags
404  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
405  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
406  .add(MI.getOperand(0))
407  .addReg(R600::PREDICATE_BIT, RegState::Kill);
408  break;
409  }
410 
411  case R600::BRANCH_COND_i32: {
412  MachineInstr *NewMI =
413  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
414  R600::PREDICATE_BIT)
415  .add(MI.getOperand(1))
416  .addImm(R600::PRED_SETNE_INT)
417  .addImm(0); // Flags
418  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
419  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
420  .add(MI.getOperand(0))
421  .addReg(R600::PREDICATE_BIT, RegState::Kill);
422  break;
423  }
424 
425  case R600::EG_ExportSwz:
426  case R600::R600_ExportSwz: {
427  // Instruction is left unmodified if its not the last one of its type
428  bool isLastInstructionOfItsType = true;
429  unsigned InstExportType = MI.getOperand(1).getImm();
430  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
431  EndBlock = BB->end(); NextExportInst != EndBlock;
432  NextExportInst = std::next(NextExportInst)) {
433  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
434  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
435  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
436  .getImm();
437  if (CurrentInstExportType == InstExportType) {
438  isLastInstructionOfItsType = false;
439  break;
440  }
441  }
442  }
443  bool EOP = isEOP(I);
444  if (!EOP && !isLastInstructionOfItsType)
445  return BB;
446  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
447  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
448  .add(MI.getOperand(0))
449  .add(MI.getOperand(1))
450  .add(MI.getOperand(2))
451  .add(MI.getOperand(3))
452  .add(MI.getOperand(4))
453  .add(MI.getOperand(5))
454  .add(MI.getOperand(6))
455  .addImm(CfInst)
456  .addImm(EOP);
457  break;
458  }
459  case R600::RETURN: {
460  return BB;
461  }
462  }
463 
464  MI.eraseFromParent();
465  return BB;
466 }
467 
468 //===----------------------------------------------------------------------===//
469 // Custom DAG Lowering Operations
470 //===----------------------------------------------------------------------===//
471 
475  switch (Op.getOpcode()) {
476  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
477  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
478  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
479  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
480  case ISD::SRA_PARTS:
481  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
482  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
483  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
484  case ISD::FCOS:
485  case ISD::FSIN: return LowerTrig(Op, DAG);
486  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
487  case ISD::STORE: return LowerSTORE(Op, DAG);
488  case ISD::LOAD: {
489  SDValue Result = LowerLOAD(Op, DAG);
490  assert((!Result.getNode() ||
491  Result.getNode()->getNumValues() == 2) &&
492  "Load should return a value and a chain");
493  return Result;
494  }
495 
496  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
497  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
498  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
499  case ISD::INTRINSIC_VOID: {
500  SDValue Chain = Op.getOperand(0);
501  unsigned IntrinsicID =
502  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
503  switch (IntrinsicID) {
504  case Intrinsic::r600_store_swizzle: {
505  SDLoc DL(Op);
506  const SDValue Args[8] = {
507  Chain,
508  Op.getOperand(2), // Export Value
509  Op.getOperand(3), // ArrayBase
510  Op.getOperand(4), // Type
511  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
512  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
513  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
514  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
515  };
516  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
517  }
518 
519  // default for switch(IntrinsicID)
520  default: break;
521  }
522  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
523  break;
524  }
526  unsigned IntrinsicID =
527  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
528  EVT VT = Op.getValueType();
529  SDLoc DL(Op);
530  switch (IntrinsicID) {
531  case Intrinsic::r600_tex:
532  case Intrinsic::r600_texc: {
533  unsigned TextureOp;
534  switch (IntrinsicID) {
535  case Intrinsic::r600_tex:
536  TextureOp = 0;
537  break;
538  case Intrinsic::r600_texc:
539  TextureOp = 1;
540  break;
541  default:
542  llvm_unreachable("unhandled texture operation");
543  }
544 
545  SDValue TexArgs[19] = {
546  DAG.getConstant(TextureOp, DL, MVT::i32),
547  Op.getOperand(1),
548  DAG.getConstant(0, DL, MVT::i32),
549  DAG.getConstant(1, DL, MVT::i32),
550  DAG.getConstant(2, DL, MVT::i32),
551  DAG.getConstant(3, DL, MVT::i32),
552  Op.getOperand(2),
553  Op.getOperand(3),
554  Op.getOperand(4),
555  DAG.getConstant(0, DL, MVT::i32),
556  DAG.getConstant(1, DL, MVT::i32),
557  DAG.getConstant(2, DL, MVT::i32),
558  DAG.getConstant(3, DL, MVT::i32),
559  Op.getOperand(5),
560  Op.getOperand(6),
561  Op.getOperand(7),
562  Op.getOperand(8),
563  Op.getOperand(9),
564  Op.getOperand(10)
565  };
566  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
567  }
568  case Intrinsic::r600_dot4: {
569  SDValue Args[8] = {
571  DAG.getConstant(0, DL, MVT::i32)),
573  DAG.getConstant(0, DL, MVT::i32)),
575  DAG.getConstant(1, DL, MVT::i32)),
577  DAG.getConstant(1, DL, MVT::i32)),
579  DAG.getConstant(2, DL, MVT::i32)),
581  DAG.getConstant(2, DL, MVT::i32)),
583  DAG.getConstant(3, DL, MVT::i32)),
585  DAG.getConstant(3, DL, MVT::i32))
586  };
587  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
588  }
589 
590  case Intrinsic::r600_implicitarg_ptr: {
593  return DAG.getConstant(ByteOffset, DL, PtrVT);
594  }
595  case Intrinsic::r600_read_ngroups_x:
596  return LowerImplicitParameter(DAG, VT, DL, 0);
597  case Intrinsic::r600_read_ngroups_y:
598  return LowerImplicitParameter(DAG, VT, DL, 1);
599  case Intrinsic::r600_read_ngroups_z:
600  return LowerImplicitParameter(DAG, VT, DL, 2);
601  case Intrinsic::r600_read_global_size_x:
602  return LowerImplicitParameter(DAG, VT, DL, 3);
603  case Intrinsic::r600_read_global_size_y:
604  return LowerImplicitParameter(DAG, VT, DL, 4);
605  case Intrinsic::r600_read_global_size_z:
606  return LowerImplicitParameter(DAG, VT, DL, 5);
607  case Intrinsic::r600_read_local_size_x:
608  return LowerImplicitParameter(DAG, VT, DL, 6);
609  case Intrinsic::r600_read_local_size_y:
610  return LowerImplicitParameter(DAG, VT, DL, 7);
611  case Intrinsic::r600_read_local_size_z:
612  return LowerImplicitParameter(DAG, VT, DL, 8);
613 
614  case Intrinsic::r600_read_tgid_x:
615  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
616  R600::T1_X, VT);
617  case Intrinsic::r600_read_tgid_y:
618  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
619  R600::T1_Y, VT);
620  case Intrinsic::r600_read_tgid_z:
621  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
622  R600::T1_Z, VT);
623  case Intrinsic::r600_read_tidig_x:
624  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
625  R600::T0_X, VT);
626  case Intrinsic::r600_read_tidig_y:
627  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
628  R600::T0_Y, VT);
629  case Intrinsic::r600_read_tidig_z:
630  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
631  R600::T0_Z, VT);
632 
633  case Intrinsic::r600_recipsqrt_ieee:
634  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
635 
636  case Intrinsic::r600_recipsqrt_clamped:
637  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
638  default:
639  return Op;
640  }
641 
642  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
643  break;
644  }
645  } // end switch(Op.getOpcode())
646  return SDValue();
647 }
648 
651  SelectionDAG &DAG) const {
652  switch (N->getOpcode()) {
653  default:
655  return;
656  case ISD::FP_TO_UINT:
657  if (N->getValueType(0) == MVT::i1) {
658  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
659  return;
660  }
661  // Since we don't care about out of bounds values we can use FP_TO_SINT for
662  // uints too. The DAGLegalizer code for uint considers some extra cases
663  // which are not necessary here.
665  case ISD::FP_TO_SINT: {
666  if (N->getValueType(0) == MVT::i1) {
667  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
668  return;
669  }
670 
671  SDValue Result;
672  if (expandFP_TO_SINT(N, Result, DAG))
673  Results.push_back(Result);
674  return;
675  }
676  case ISD::SDIVREM: {
677  SDValue Op = SDValue(N, 1);
678  SDValue RES = LowerSDIVREM(Op, DAG);
679  Results.push_back(RES);
680  Results.push_back(RES.getValue(1));
681  break;
682  }
683  case ISD::UDIVREM: {
684  SDValue Op = SDValue(N, 0);
685  LowerUDIVREM64(Op, DAG, Results);
686  break;
687  }
688  }
689 }
690 
691 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
692  SDValue Vector) const {
693  SDLoc DL(Vector);
694  EVT VecVT = Vector.getValueType();
695  EVT EltVT = VecVT.getVectorElementType();
697 
698  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
699  Args.push_back(DAG.getNode(
700  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
701  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
702  }
703 
704  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
705 }
706 
707 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
708  SelectionDAG &DAG) const {
709  SDLoc DL(Op);
710  SDValue Vector = Op.getOperand(0);
711  SDValue Index = Op.getOperand(1);
712 
713  if (isa<ConstantSDNode>(Index) ||
715  return Op;
716 
717  Vector = vectorToVerticalVector(DAG, Vector);
718  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
719  Vector, Index);
720 }
721 
722 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
723  SelectionDAG &DAG) const {
724  SDLoc DL(Op);
725  SDValue Vector = Op.getOperand(0);
726  SDValue Value = Op.getOperand(1);
727  SDValue Index = Op.getOperand(2);
728 
729  if (isa<ConstantSDNode>(Index) ||
731  return Op;
732 
733  Vector = vectorToVerticalVector(DAG, Vector);
734  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
735  Vector, Value, Index);
736  return vectorToVerticalVector(DAG, Insert);
737 }
738 
739 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
740  SDValue Op,
741  SelectionDAG &DAG) const {
742  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
744  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
745 
746  const DataLayout &DL = DAG.getDataLayout();
747  const GlobalValue *GV = GSD->getGlobal();
748  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
749 
750  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
751  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
752 }
753 
754 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
755  // On hw >= R700, COS/SIN input must be between -1. and 1.
756  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
757  EVT VT = Op.getValueType();
758  SDValue Arg = Op.getOperand(0);
759  SDLoc DL(Op);
760 
761  // TODO: Should this propagate fast-math-flags?
762  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
763  DAG.getNode(ISD::FADD, DL, VT,
764  DAG.getNode(ISD::FMUL, DL, VT, Arg,
765  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
766  DAG.getConstantFP(0.5, DL, MVT::f32)));
767  unsigned TrigNode;
768  switch (Op.getOpcode()) {
769  case ISD::FCOS:
770  TrigNode = AMDGPUISD::COS_HW;
771  break;
772  case ISD::FSIN:
773  TrigNode = AMDGPUISD::SIN_HW;
774  break;
775  default:
776  llvm_unreachable("Wrong trig opcode");
777  }
778  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
779  DAG.getNode(ISD::FADD, DL, VT, FractPart,
780  DAG.getConstantFP(-0.5, DL, MVT::f32)));
781  if (Gen >= AMDGPUSubtarget::R700)
782  return TrigVal;
783  // On R600 hw, COS/SIN input must be between -Pi and Pi.
784  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
785  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
786 }
787 
788 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
789  SDLoc DL(Op);
790  EVT VT = Op.getValueType();
791 
792  SDValue Lo = Op.getOperand(0);
793  SDValue Hi = Op.getOperand(1);
794  SDValue Shift = Op.getOperand(2);
795  SDValue Zero = DAG.getConstant(0, DL, VT);
796  SDValue One = DAG.getConstant(1, DL, VT);
797 
798  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
799  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
800  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
801  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
802 
803  // The dance around Width1 is necessary for 0 special case.
804  // Without it the CompShift might be 32, producing incorrect results in
805  // Overflow. So we do the shift in two steps, the alternative is to
806  // add a conditional to filter the special case.
807 
808  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
809  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
810 
811  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
812  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
813  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
814 
815  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
816  SDValue LoBig = Zero;
817 
818  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
819  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
820 
821  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
822 }
823 
824 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
825  SDLoc DL(Op);
826  EVT VT = Op.getValueType();
827 
828  SDValue Lo = Op.getOperand(0);
829  SDValue Hi = Op.getOperand(1);
830  SDValue Shift = Op.getOperand(2);
831  SDValue Zero = DAG.getConstant(0, DL, VT);
832  SDValue One = DAG.getConstant(1, DL, VT);
833 
834  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
835 
836  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
837  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
838  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
839  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
840 
841  // The dance around Width1 is necessary for 0 special case.
842  // Without it the CompShift might be 32, producing incorrect results in
843  // Overflow. So we do the shift in two steps, the alternative is to
844  // add a conditional to filter the special case.
845 
846  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
847  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
848 
849  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
850  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
851  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
852 
853  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
854  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
855 
856  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
857  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
858 
859  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
860 }
861 
862 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
863  unsigned mainop, unsigned ovf) const {
864  SDLoc DL(Op);
865  EVT VT = Op.getValueType();
866 
867  SDValue Lo = Op.getOperand(0);
868  SDValue Hi = Op.getOperand(1);
869 
870  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
871  // Extend sign.
872  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
873  DAG.getValueType(MVT::i1));
874 
875  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
876 
877  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
878 }
879 
880 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
881  SDLoc DL(Op);
882  return DAG.getNode(
883  ISD::SETCC,
884  DL,
885  MVT::i1,
886  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
887  DAG.getCondCode(ISD::SETEQ));
888 }
889 
890 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
891  SDLoc DL(Op);
892  return DAG.getNode(
893  ISD::SETCC,
894  DL,
895  MVT::i1,
896  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
897  DAG.getCondCode(ISD::SETEQ));
898 }
899 
900 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
901  const SDLoc &DL,
902  unsigned DwordOffset) const {
903  unsigned ByteOffset = DwordOffset * 4;
904  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
906 
907  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
908  assert(isInt<16>(ByteOffset));
909 
910  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
911  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
913 }
914 
915 bool R600TargetLowering::isZero(SDValue Op) const {
916  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
917  return Cst->isNullValue();
918  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
919  return CstFP->isZero();
920  } else {
921  return false;
922  }
923 }
924 
925 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
926  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
927  return CFP->isExactlyValue(1.0);
928  }
929  return isAllOnesConstant(Op);
930 }
931 
932 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
933  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
934  return CFP->getValueAPF().isZero();
935  }
936  return isNullConstant(Op);
937 }
938 
939 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
940  SDLoc DL(Op);
941  EVT VT = Op.getValueType();
942 
943  SDValue LHS = Op.getOperand(0);
944  SDValue RHS = Op.getOperand(1);
945  SDValue True = Op.getOperand(2);
946  SDValue False = Op.getOperand(3);
947  SDValue CC = Op.getOperand(4);
948  SDValue Temp;
949 
950  if (VT == MVT::f32) {
951  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
952  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
953  if (MinMax)
954  return MinMax;
955  }
956 
957  // LHS and RHS are guaranteed to be the same value type
958  EVT CompareVT = LHS.getValueType();
959 
960  // Check if we can lower this to a native operation.
961 
962  // Try to lower to a SET* instruction:
963  //
964  // SET* can match the following patterns:
965  //
966  // select_cc f32, f32, -1, 0, cc_supported
967  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
968  // select_cc i32, i32, -1, 0, cc_supported
969  //
970 
971  // Move hardware True/False values to the correct operand.
972  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
973  ISD::CondCode InverseCC =
974  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
975  if (isHWTrueValue(False) && isHWFalseValue(True)) {
976  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
977  std::swap(False, True);
978  CC = DAG.getCondCode(InverseCC);
979  } else {
980  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
981  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
982  std::swap(False, True);
983  std::swap(LHS, RHS);
984  CC = DAG.getCondCode(SwapInvCC);
985  }
986  }
987  }
988 
989  if (isHWTrueValue(True) && isHWFalseValue(False) &&
990  (CompareVT == VT || VT == MVT::i32)) {
991  // This can be matched by a SET* instruction.
992  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
993  }
994 
995  // Try to lower to a CND* instruction:
996  //
997  // CND* can match the following patterns:
998  //
999  // select_cc f32, 0.0, f32, f32, cc_supported
1000  // select_cc f32, 0.0, i32, i32, cc_supported
1001  // select_cc i32, 0, f32, f32, cc_supported
1002  // select_cc i32, 0, i32, i32, cc_supported
1003  //
1004 
1005  // Try to move the zero value to the RHS
1006  if (isZero(LHS)) {
1007  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1008  // Try swapping the operands
1009  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1010  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1011  std::swap(LHS, RHS);
1012  CC = DAG.getCondCode(CCSwapped);
1013  } else {
1014  // Try inverting the conditon and then swapping the operands
1015  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1016  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1017  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1018  std::swap(True, False);
1019  std::swap(LHS, RHS);
1020  CC = DAG.getCondCode(CCSwapped);
1021  }
1022  }
1023  }
1024  if (isZero(RHS)) {
1025  SDValue Cond = LHS;
1026  SDValue Zero = RHS;
1027  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1028  if (CompareVT != VT) {
1029  // Bitcast True / False to the correct types. This will end up being
1030  // a nop, but it allows us to define only a single pattern in the
1031  // .TD files for each CND* instruction rather than having to have
1032  // one pattern for integer True/False and one for fp True/False
1033  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1034  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1035  }
1036 
1037  switch (CCOpcode) {
1038  case ISD::SETONE:
1039  case ISD::SETUNE:
1040  case ISD::SETNE:
1041  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1042  Temp = True;
1043  True = False;
1044  False = Temp;
1045  break;
1046  default:
1047  break;
1048  }
1049  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1050  Cond, Zero,
1051  True, False,
1052  DAG.getCondCode(CCOpcode));
1053  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1054  }
1055 
1056  // If we make it this for it means we have no native instructions to handle
1057  // this SELECT_CC, so we must lower it.
1058  SDValue HWTrue, HWFalse;
1059 
1060  if (CompareVT == MVT::f32) {
1061  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1062  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1063  } else if (CompareVT == MVT::i32) {
1064  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1065  HWFalse = DAG.getConstant(0, DL, CompareVT);
1066  }
1067  else {
1068  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1069  }
1070 
1071  // Lower this unsupported SELECT_CC into a combination of two supported
1072  // SELECT_CC operations.
1073  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1074 
1075  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1076  Cond, HWFalse,
1077  True, False,
1078  DAG.getCondCode(ISD::SETNE));
1079 }
1080 
1081 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1082 /// convert these pointers to a register index. Each register holds
1083 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1084 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1085 /// for indirect addressing.
1086 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1087  unsigned StackWidth,
1088  SelectionDAG &DAG) const {
1089  unsigned SRLPad;
1090  switch(StackWidth) {
1091  case 1:
1092  SRLPad = 2;
1093  break;
1094  case 2:
1095  SRLPad = 3;
1096  break;
1097  case 4:
1098  SRLPad = 4;
1099  break;
1100  default: llvm_unreachable("Invalid stack width");
1101  }
1102 
1103  SDLoc DL(Ptr);
1104  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1105  DAG.getConstant(SRLPad, DL, MVT::i32));
1106 }
1107 
1108 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1109  unsigned ElemIdx,
1110  unsigned &Channel,
1111  unsigned &PtrIncr) const {
1112  switch (StackWidth) {
1113  default:
1114  case 1:
1115  Channel = 0;
1116  if (ElemIdx > 0) {
1117  PtrIncr = 1;
1118  } else {
1119  PtrIncr = 0;
1120  }
1121  break;
1122  case 2:
1123  Channel = ElemIdx % 2;
1124  if (ElemIdx == 2) {
1125  PtrIncr = 1;
1126  } else {
1127  PtrIncr = 0;
1128  }
1129  break;
1130  case 4:
1131  Channel = ElemIdx;
1132  PtrIncr = 0;
1133  break;
1134  }
1135 }
1136 
1137 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1138  SelectionDAG &DAG) const {
1139  SDLoc DL(Store);
1140  //TODO: Who creates the i8 stores?
1141  assert(Store->isTruncatingStore()
1142  || Store->getValue().getValueType() == MVT::i8);
1144 
1145  SDValue Mask;
1146  if (Store->getMemoryVT() == MVT::i8) {
1147  assert(Store->getAlignment() >= 1);
1148  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1149  } else if (Store->getMemoryVT() == MVT::i16) {
1150  assert(Store->getAlignment() >= 2);
1151  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1152  } else {
1153  llvm_unreachable("Unsupported private trunc store");
1154  }
1155 
1156  SDValue OldChain = Store->getChain();
1157  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1158  // Skip dummy
1159  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1160  SDValue BasePtr = Store->getBasePtr();
1161  SDValue Offset = Store->getOffset();
1162  EVT MemVT = Store->getMemoryVT();
1163 
1164  SDValue LoadPtr = BasePtr;
1165  if (!Offset.isUndef()) {
1166  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1167  }
1168 
1169  // Get dword location
1170  // TODO: this should be eliminated by the future SHR ptr, 2
1171  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1172  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1173 
1174  // Load dword
1175  // TODO: can we be smarter about machine pointer info?
1178  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1179 
1180  Chain = Dst.getValue(1);
1181 
1182  // Get offset in dword
1183  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1184  DAG.getConstant(0x3, DL, MVT::i32));
1185 
1186  // Convert byte offset to bit shift
1187  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1188  DAG.getConstant(3, DL, MVT::i32));
1189 
1190  // TODO: Contrary to the name of the functiom,
1191  // it also handles sub i32 non-truncating stores (like i1)
1192  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1193  Store->getValue());
1194 
1195  // Mask the value to the right type
1196  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1197 
1198  // Shift the value in place
1199  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1200  MaskedValue, ShiftAmt);
1201 
1202  // Shift the mask in place
1203  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1204 
1205  // Invert the mask. NOTE: if we had native ROL instructions we could
1206  // use inverted mask
1207  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1208 
1209  // Cleanup the target bits
1210  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1211 
1212  // Add the new bits
1213  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1214 
1215  // Store dword
1216  // TODO: Can we be smarter about MachinePointerInfo?
1217  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1218 
1219  // If we are part of expanded vector, make our neighbors depend on this store
1220  if (VectorTrunc) {
1221  // Make all other vector elements depend on this store
1222  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1223  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1224  }
1225  return NewStore;
1226 }
1227 
1228 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1229  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1230  unsigned AS = StoreNode->getAddressSpace();
1231 
1232  SDValue Chain = StoreNode->getChain();
1233  SDValue Ptr = StoreNode->getBasePtr();
1234  SDValue Value = StoreNode->getValue();
1235 
1236  EVT VT = Value.getValueType();
1237  EVT MemVT = StoreNode->getMemoryVT();
1238  EVT PtrVT = Ptr.getValueType();
1239 
1240  SDLoc DL(Op);
1241 
1242  const bool TruncatingStore = StoreNode->isTruncatingStore();
1243 
1244  // Neither LOCAL nor PRIVATE can do vectors at the moment
1245  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1246  TruncatingStore) &&
1247  VT.isVector()) {
1248  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1249  // Add an extra level of chain to isolate this vector
1250  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1251  // TODO: can the chain be replaced without creating a new store?
1252  SDValue NewStore = DAG.getTruncStore(
1253  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1254  MemVT, StoreNode->getAlignment(),
1255  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1256  StoreNode = cast<StoreSDNode>(NewStore);
1257  }
1258 
1259  return scalarizeVectorStore(StoreNode, DAG);
1260  }
1261 
1262  unsigned Align = StoreNode->getAlignment();
1263  if (Align < MemVT.getStoreSize() &&
1264  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1265  return expandUnalignedStore(StoreNode, DAG);
1266  }
1267 
1268  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1269  DAG.getConstant(2, DL, PtrVT));
1270 
1271  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1272  // It is beneficial to create MSKOR here instead of combiner to avoid
1273  // artificial dependencies introduced by RMW
1274  if (TruncatingStore) {
1275  assert(VT.bitsLE(MVT::i32));
1276  SDValue MaskConstant;
1277  if (MemVT == MVT::i8) {
1278  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1279  } else {
1280  assert(MemVT == MVT::i16);
1281  assert(StoreNode->getAlignment() >= 2);
1282  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1283  }
1284 
1285  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1286  DAG.getConstant(0x00000003, DL, PtrVT));
1287  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1288  DAG.getConstant(3, DL, VT));
1289 
1290  // Put the mask in correct place
1291  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1292 
1293  // Put the value bits in correct place
1294  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1295  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1296 
1297  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1298  // vector instead.
1299  SDValue Src[4] = {
1300  ShiftedValue,
1301  DAG.getConstant(0, DL, MVT::i32),
1302  DAG.getConstant(0, DL, MVT::i32),
1303  Mask
1304  };
1305  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1306  SDValue Args[3] = { Chain, Input, DWordAddr };
1308  Op->getVTList(), Args, MemVT,
1309  StoreNode->getMemOperand());
1310  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1311  // Convert pointer from byte address to dword address.
1312  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1313 
1314  if (StoreNode->isIndexed()) {
1315  llvm_unreachable("Indexed stores not supported yet");
1316  } else {
1317  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1318  }
1319  return Chain;
1320  }
1321  }
1322 
1323  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1324  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1325  return SDValue();
1326 
1327  if (MemVT.bitsLT(MVT::i32))
1328  return lowerPrivateTruncStore(StoreNode, DAG);
1329 
1330  // Standard i32+ store, tag it with DWORDADDR to note that the address
1331  // has been shifted
1332  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1333  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1334  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1335  }
1336 
1337  // Tagged i32+ stores will be matched by patterns
1338  return SDValue();
1339 }
1340 
1341 // return (512 + (kc_bank << 12)
1342 static int
1344  switch (AddressSpace) {
1346  return 512;
1348  return 512 + 4096;
1350  return 512 + 4096 * 2;
1352  return 512 + 4096 * 3;
1354  return 512 + 4096 * 4;
1356  return 512 + 4096 * 5;
1358  return 512 + 4096 * 6;
1360  return 512 + 4096 * 7;
1362  return 512 + 4096 * 8;
1364  return 512 + 4096 * 9;
1366  return 512 + 4096 * 10;
1368  return 512 + 4096 * 11;
1370  return 512 + 4096 * 12;
1372  return 512 + 4096 * 13;
1374  return 512 + 4096 * 14;
1376  return 512 + 4096 * 15;
1377  default:
1378  return -1;
1379  }
1380 }
1381 
1382 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1383  SelectionDAG &DAG) const {
1384  SDLoc DL(Op);
1385  LoadSDNode *Load = cast<LoadSDNode>(Op);
1387  EVT MemVT = Load->getMemoryVT();
1388  assert(Load->getAlignment() >= MemVT.getStoreSize());
1389 
1390  SDValue BasePtr = Load->getBasePtr();
1391  SDValue Chain = Load->getChain();
1392  SDValue Offset = Load->getOffset();
1393 
1394  SDValue LoadPtr = BasePtr;
1395  if (!Offset.isUndef()) {
1396  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1397  }
1398 
1399  // Get dword location
1400  // NOTE: this should be eliminated by the future SHR ptr, 2
1401  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1402  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1403 
1404  // Load dword
1405  // TODO: can we be smarter about machine pointer info?
1408  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1409 
1410  // Get offset within the register.
1411  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1412  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1413 
1414  // Bit offset of target byte (byteIdx * 8).
1415  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1416  DAG.getConstant(3, DL, MVT::i32));
1417 
1418  // Shift to the right.
1419  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1420 
1421  // Eliminate the upper bits by setting them to ...
1422  EVT MemEltVT = MemVT.getScalarType();
1423 
1424  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1425  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1426  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1427  } else { // ... or zeros.
1428  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1429  }
1430 
1431  SDValue Ops[] = {
1432  Ret,
1433  Read.getValue(1) // This should be our output chain
1434  };
1435 
1436  return DAG.getMergeValues(Ops, DL);
1437 }
1438 
1439 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1440  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1441  unsigned AS = LoadNode->getAddressSpace();
1442  EVT MemVT = LoadNode->getMemoryVT();
1444 
1445  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1446  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1447  return lowerPrivateExtLoad(Op, DAG);
1448  }
1449 
1450  SDLoc DL(Op);
1451  EVT VT = Op.getValueType();
1452  SDValue Chain = LoadNode->getChain();
1453  SDValue Ptr = LoadNode->getBasePtr();
1454 
1455  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1456  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1457  VT.isVector()) {
1458  return scalarizeVectorLoad(LoadNode, DAG);
1459  }
1460 
1461  // This is still used for explicit load from addrspace(8)
1462  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1463  if (ConstantBlock > -1 &&
1464  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1465  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1466  SDValue Result;
1467  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1468  isa<ConstantSDNode>(Ptr)) {
1469  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1470  } else {
1471  //TODO: Does this even work?
1472  // non-constant ptr can't be folded, keeps it as a v4f32 load
1473  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1474  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1475  DAG.getConstant(4, DL, MVT::i32)),
1476  DAG.getConstant(LoadNode->getAddressSpace() -
1478  );
1479  }
1480 
1481  if (!VT.isVector()) {
1482  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1483  DAG.getConstant(0, DL, MVT::i32));
1484  }
1485 
1486  SDValue MergedValues[2] = {
1487  Result,
1488  Chain
1489  };
1490  return DAG.getMergeValues(MergedValues, DL);
1491  }
1492 
1493  // For most operations returning SDValue() will result in the node being
1494  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1495  // need to manually expand loads that may be legal in some address spaces and
1496  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1497  // compute shaders, since the data is sign extended when it is uploaded to the
1498  // buffer. However SEXT loads from other address spaces are not supported, so
1499  // we need to expand them here.
1500  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1501  EVT MemVT = LoadNode->getMemoryVT();
1502  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1503  SDValue NewLoad = DAG.getExtLoad(
1504  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1505  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1506  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1507  DAG.getValueType(MemVT));
1508 
1509  SDValue MergedValues[2] = { Res, Chain };
1510  return DAG.getMergeValues(MergedValues, DL);
1511  }
1512 
1513  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1514  return SDValue();
1515  }
1516 
1517  // DWORDADDR ISD marks already shifted address
1518  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1519  assert(VT == MVT::i32);
1520  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1521  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1522  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1523  }
1524  return SDValue();
1525 }
1526 
1527 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1528  SDValue Chain = Op.getOperand(0);
1529  SDValue Cond = Op.getOperand(1);
1530  SDValue Jump = Op.getOperand(2);
1531 
1532  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1533  Chain, Jump, Cond);
1534 }
1535 
1536 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1537  SelectionDAG &DAG) const {
1538  MachineFunction &MF = DAG.getMachineFunction();
1539  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1540 
1541  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1542 
1543  unsigned FrameIndex = FIN->getIndex();
1544  unsigned IgnoredFrameReg;
1545  unsigned Offset =
1546  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1547  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1548  Op.getValueType());
1549 }
1550 
1552  bool IsVarArg) const {
1553  switch (CC) {
1556  case CallingConv::C:
1557  case CallingConv::Fast:
1558  case CallingConv::Cold:
1559  llvm_unreachable("kernels should not be handled here");
1567  return CC_R600;
1568  default:
1569  report_fatal_error("Unsupported calling convention.");
1570  }
1571 }
1572 
1573 /// XXX Only kernel functions are supported, so we can assume for now that
1574 /// every function is a kernel function, but in the future we should use
1575 /// separate calling conventions for kernel and non-kernel functions.
1577  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1578  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1579  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1581  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1582  *DAG.getContext());
1583  MachineFunction &MF = DAG.getMachineFunction();
1585 
1586  if (AMDGPU::isShader(CallConv)) {
1587  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1588  } else {
1589  analyzeFormalArgumentsCompute(CCInfo, Ins);
1590  }
1591 
1592  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1593  CCValAssign &VA = ArgLocs[i];
1594  const ISD::InputArg &In = Ins[i];
1595  EVT VT = In.VT;
1596  EVT MemVT = VA.getLocVT();
1597  if (!VT.isVector() && MemVT.isVector()) {
1598  // Get load source type if scalarized.
1599  MemVT = MemVT.getVectorElementType();
1600  }
1601 
1602  if (AMDGPU::isShader(CallConv)) {
1603  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1604  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1605  InVals.push_back(Register);
1606  continue;
1607  }
1608 
1611 
1612  // i64 isn't a legal type, so the register type used ends up as i32, which
1613  // isn't expected here. It attempts to create this sextload, but it ends up
1614  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1615  // for <1 x i64>.
1616 
1617  // The first 36 bytes of the input buffer contains information about
1618  // thread group and global sizes.
1620  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1621  // FIXME: This should really check the extload type, but the handling of
1622  // extload vector parameters seems to be broken.
1623 
1624  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1625  Ext = ISD::SEXTLOAD;
1626  }
1627 
1628  // Compute the offset from the value.
1629  // XXX - I think PartOffset should give you this, but it seems to give the
1630  // size of the register which isn't useful.
1631 
1632  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1633  unsigned PartOffset = VA.getLocMemOffset();
1634  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1635 
1636  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1637  SDValue Arg = DAG.getLoad(
1638  ISD::UNINDEXED, Ext, VT, DL, Chain,
1639  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1640  PtrInfo,
1641  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1644 
1645  InVals.push_back(Arg);
1646  }
1647  return Chain;
1648 }
1649 
1651  EVT VT) const {
1652  if (!VT.isVector())
1653  return MVT::i32;
1655 }
1656 
1658  const SelectionDAG &DAG) const {
1659  // Local and Private addresses do not handle vectors. Limit to i32
1660  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1661  return (MemVT.getSizeInBits() <= 32);
1662  }
1663  return true;
1664 }
1665 
1667  unsigned AddrSpace,
1668  unsigned Align,
1669  bool *IsFast) const {
1670  if (IsFast)
1671  *IsFast = false;
1672 
1673  if (!VT.isSimple() || VT == MVT::Other)
1674  return false;
1675 
1676  if (VT.bitsLT(MVT::i32))
1677  return false;
1678 
1679  // TODO: This is a rough estimate.
1680  if (IsFast)
1681  *IsFast = true;
1682 
1683  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1684 }
1685 
1687  SelectionDAG &DAG, SDValue VectorEntry,
1688  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1689  assert(RemapSwizzle.empty());
1690 
1691  SDLoc DL(VectorEntry);
1692  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1693 
1694  SDValue NewBldVec[4];
1695  for (unsigned i = 0; i < 4; i++)
1696  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1697  DAG.getIntPtrConstant(i, DL));
1698 
1699  for (unsigned i = 0; i < 4; i++) {
1700  if (NewBldVec[i].isUndef())
1701  // We mask write here to teach later passes that the ith element of this
1702  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1703  // break false dependencies and additionnaly make assembly easier to read.
1704  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1705  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1706  if (C->isZero()) {
1707  RemapSwizzle[i] = 4; // SEL_0
1708  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1709  } else if (C->isExactlyValue(1.0)) {
1710  RemapSwizzle[i] = 5; // SEL_1
1711  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1712  }
1713  }
1714 
1715  if (NewBldVec[i].isUndef())
1716  continue;
1717  for (unsigned j = 0; j < i; j++) {
1718  if (NewBldVec[i] == NewBldVec[j]) {
1719  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1720  RemapSwizzle[i] = j;
1721  break;
1722  }
1723  }
1724  }
1725 
1726  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1727  NewBldVec);
1728 }
1729 
1731  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1732  assert(RemapSwizzle.empty());
1733 
1734  SDLoc DL(VectorEntry);
1735  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1736 
1737  SDValue NewBldVec[4];
1738  bool isUnmovable[4] = {false, false, false, false};
1739  for (unsigned i = 0; i < 4; i++)
1740  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1741  DAG.getIntPtrConstant(i, DL));
1742 
1743  for (unsigned i = 0; i < 4; i++) {
1744  RemapSwizzle[i] = i;
1745  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1746  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1747  ->getZExtValue();
1748  if (i == Idx)
1749  isUnmovable[Idx] = true;
1750  }
1751  }
1752 
1753  for (unsigned i = 0; i < 4; i++) {
1754  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1755  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1756  ->getZExtValue();
1757  if (isUnmovable[Idx])
1758  continue;
1759  // Swap i and Idx
1760  std::swap(NewBldVec[Idx], NewBldVec[i]);
1761  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1762  break;
1763  }
1764  }
1765 
1766  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1767  NewBldVec);
1768 }
1769 
1770 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1771  SelectionDAG &DAG,
1772  const SDLoc &DL) const {
1773  // Old -> New swizzle values
1774  DenseMap<unsigned, unsigned> SwizzleRemap;
1775 
1776  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1777  for (unsigned i = 0; i < 4; i++) {
1778  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1779  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1780  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1781  }
1782 
1783  SwizzleRemap.clear();
1784  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1785  for (unsigned i = 0; i < 4; i++) {
1786  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1787  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1788  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1789  }
1790 
1791  return BuildVector;
1792 }
1793 
1794 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1795  SelectionDAG &DAG) const {
1796  SDLoc DL(LoadNode);
1797  EVT VT = LoadNode->getValueType(0);
1798  SDValue Chain = LoadNode->getChain();
1799  SDValue Ptr = LoadNode->getBasePtr();
1800  assert (isa<ConstantSDNode>(Ptr));
1801 
1802  //TODO: Support smaller loads
1803  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1804  return SDValue();
1805 
1806  if (LoadNode->getAlignment() < 4)
1807  return SDValue();
1808 
1809  int ConstantBlock = ConstantAddressBlock(Block);
1810 
1811  SDValue Slots[4];
1812  for (unsigned i = 0; i < 4; i++) {
1813  // We want Const position encoded with the following formula :
1814  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1815  // const_index is Ptr computed by llvm using an alignment of 16.
1816  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1817  // then div by 4 at the ISel step
1818  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1819  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1820  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1821  }
1822  EVT NewVT = MVT::v4i32;
1823  unsigned NumElements = 4;
1824  if (VT.isVector()) {
1825  NewVT = VT;
1826  NumElements = VT.getVectorNumElements();
1827  }
1828  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1829  if (!VT.isVector()) {
1830  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1831  DAG.getConstant(0, DL, MVT::i32));
1832  }
1833  SDValue MergedValues[2] = {
1834  Result,
1835  Chain
1836  };
1837  return DAG.getMergeValues(MergedValues, DL);
1838 }
1839 
1840 //===----------------------------------------------------------------------===//
1841 // Custom DAG Optimizations
1842 //===----------------------------------------------------------------------===//
1843 
1845  DAGCombinerInfo &DCI) const {
1846  SelectionDAG &DAG = DCI.DAG;
1847  SDLoc DL(N);
1848 
1849  switch (N->getOpcode()) {
1850  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1851  case ISD::FP_ROUND: {
1852  SDValue Arg = N->getOperand(0);
1853  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1854  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1855  Arg.getOperand(0));
1856  }
1857  break;
1858  }
1859 
1860  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1861  // (i32 select_cc f32, f32, -1, 0 cc)
1862  //
1863  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1864  // this to one of the SET*_DX10 instructions.
1865  case ISD::FP_TO_SINT: {
1866  SDValue FNeg = N->getOperand(0);
1867  if (FNeg.getOpcode() != ISD::FNEG) {
1868  return SDValue();
1869  }
1870  SDValue SelectCC = FNeg.getOperand(0);
1871  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1872  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1873  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1874  !isHWTrueValue(SelectCC.getOperand(2)) ||
1875  !isHWFalseValue(SelectCC.getOperand(3))) {
1876  return SDValue();
1877  }
1878 
1879  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1880  SelectCC.getOperand(0), // LHS
1881  SelectCC.getOperand(1), // RHS
1882  DAG.getConstant(-1, DL, MVT::i32), // True
1883  DAG.getConstant(0, DL, MVT::i32), // False
1884  SelectCC.getOperand(4)); // CC
1885 
1886  break;
1887  }
1888 
1889  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1890  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1891  case ISD::INSERT_VECTOR_ELT: {
1892  SDValue InVec = N->getOperand(0);
1893  SDValue InVal = N->getOperand(1);
1894  SDValue EltNo = N->getOperand(2);
1895 
1896  // If the inserted element is an UNDEF, just use the input vector.
1897  if (InVal.isUndef())
1898  return InVec;
1899 
1900  EVT VT = InVec.getValueType();
1901 
1902  // If we can't generate a legal BUILD_VECTOR, exit
1904  return SDValue();
1905 
1906  // Check that we know which element is being inserted
1907  if (!isa<ConstantSDNode>(EltNo))
1908  return SDValue();
1909  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1910 
1911  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1912  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1913  // vector elements.
1915  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1916  Ops.append(InVec.getNode()->op_begin(),
1917  InVec.getNode()->op_end());
1918  } else if (InVec.isUndef()) {
1919  unsigned NElts = VT.getVectorNumElements();
1920  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1921  } else {
1922  return SDValue();
1923  }
1924 
1925  // Insert the element
1926  if (Elt < Ops.size()) {
1927  // All the operands of BUILD_VECTOR must have the same type;
1928  // we enforce that here.
1929  EVT OpVT = Ops[0].getValueType();
1930  if (InVal.getValueType() != OpVT)
1931  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1932  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1933  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1934  Ops[Elt] = InVal;
1935  }
1936 
1937  // Return the new vector
1938  return DAG.getBuildVector(VT, DL, Ops);
1939  }
1940 
1941  // Extract_vec (Build_vector) generated by custom lowering
1942  // also needs to be customly combined
1943  case ISD::EXTRACT_VECTOR_ELT: {
1944  SDValue Arg = N->getOperand(0);
1945  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1946  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1947  unsigned Element = Const->getZExtValue();
1948  return Arg->getOperand(Element);
1949  }
1950  }
1951  if (Arg.getOpcode() == ISD::BITCAST &&
1952  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1955  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1956  unsigned Element = Const->getZExtValue();
1957  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1958  Arg->getOperand(0).getOperand(Element));
1959  }
1960  }
1961  break;
1962  }
1963 
1964  case ISD::SELECT_CC: {
1965  // Try common optimizations
1967  return Ret;
1968 
1969  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1970  // selectcc x, y, a, b, inv(cc)
1971  //
1972  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1973  // selectcc x, y, a, b, cc
1974  SDValue LHS = N->getOperand(0);
1975  if (LHS.getOpcode() != ISD::SELECT_CC) {
1976  return SDValue();
1977  }
1978 
1979  SDValue RHS = N->getOperand(1);
1980  SDValue True = N->getOperand(2);
1981  SDValue False = N->getOperand(3);
1982  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1983 
1984  if (LHS.getOperand(2).getNode() != True.getNode() ||
1985  LHS.getOperand(3).getNode() != False.getNode() ||
1986  RHS.getNode() != False.getNode()) {
1987  return SDValue();
1988  }
1989 
1990  switch (NCC) {
1991  default: return SDValue();
1992  case ISD::SETNE: return LHS;
1993  case ISD::SETEQ: {
1994  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1995  LHSCC = ISD::getSetCCInverse(LHSCC,
1996  LHS.getOperand(0).getValueType().isInteger());
1997  if (DCI.isBeforeLegalizeOps() ||
1999  return DAG.getSelectCC(DL,
2000  LHS.getOperand(0),
2001  LHS.getOperand(1),
2002  LHS.getOperand(2),
2003  LHS.getOperand(3),
2004  LHSCC);
2005  break;
2006  }
2007  }
2008  return SDValue();
2009  }
2010 
2011  case AMDGPUISD::R600_EXPORT: {
2012  SDValue Arg = N->getOperand(1);
2013  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2014  break;
2015 
2016  SDValue NewArgs[8] = {
2017  N->getOperand(0), // Chain
2018  SDValue(),
2019  N->getOperand(2), // ArrayBase
2020  N->getOperand(3), // Type
2021  N->getOperand(4), // SWZ_X
2022  N->getOperand(5), // SWZ_Y
2023  N->getOperand(6), // SWZ_Z
2024  N->getOperand(7) // SWZ_W
2025  };
2026  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2027  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
2028  }
2029  case AMDGPUISD::TEXTURE_FETCH: {
2030  SDValue Arg = N->getOperand(1);
2031  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2032  break;
2033 
2034  SDValue NewArgs[19] = {
2035  N->getOperand(0),
2036  N->getOperand(1),
2037  N->getOperand(2),
2038  N->getOperand(3),
2039  N->getOperand(4),
2040  N->getOperand(5),
2041  N->getOperand(6),
2042  N->getOperand(7),
2043  N->getOperand(8),
2044  N->getOperand(9),
2045  N->getOperand(10),
2046  N->getOperand(11),
2047  N->getOperand(12),
2048  N->getOperand(13),
2049  N->getOperand(14),
2050  N->getOperand(15),
2051  N->getOperand(16),
2052  N->getOperand(17),
2053  N->getOperand(18),
2054  };
2055  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2056  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2057  }
2058 
2059  case ISD::LOAD: {
2060  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
2061  SDValue Ptr = LoadNode->getBasePtr();
2062  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
2063  isa<ConstantSDNode>(Ptr))
2064  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
2065  break;
2066  }
2067 
2068  default: break;
2069  }
2070 
2072 }
2073 
2074 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2075  SDValue &Src, SDValue &Neg, SDValue &Abs,
2076  SDValue &Sel, SDValue &Imm,
2077  SelectionDAG &DAG) const {
2078  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2079  if (!Src.isMachineOpcode())
2080  return false;
2081 
2082  switch (Src.getMachineOpcode()) {
2083  case R600::FNEG_R600:
2084  if (!Neg.getNode())
2085  return false;
2086  Src = Src.getOperand(0);
2087  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2088  return true;
2089  case R600::FABS_R600:
2090  if (!Abs.getNode())
2091  return false;
2092  Src = Src.getOperand(0);
2093  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2094  return true;
2095  case R600::CONST_COPY: {
2096  unsigned Opcode = ParentNode->getMachineOpcode();
2097  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2098 
2099  if (!Sel.getNode())
2100  return false;
2101 
2102  SDValue CstOffset = Src.getOperand(0);
2103  if (ParentNode->getValueType(0).isVector())
2104  return false;
2105 
2106  // Gather constants values
2107  int SrcIndices[] = {
2108  TII->getOperandIdx(Opcode, R600::OpName::src0),
2109  TII->getOperandIdx(Opcode, R600::OpName::src1),
2110  TII->getOperandIdx(Opcode, R600::OpName::src2),
2111  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2112  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2113  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2114  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2115  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2116  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2117  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2118  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2119  };
2120  std::vector<unsigned> Consts;
2121  for (int OtherSrcIdx : SrcIndices) {
2122  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2123  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2124  continue;
2125  if (HasDst) {
2126  OtherSrcIdx--;
2127  OtherSelIdx--;
2128  }
2129  if (RegisterSDNode *Reg =
2130  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2131  if (Reg->getReg() == R600::ALU_CONST) {
2132  ConstantSDNode *Cst
2133  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2134  Consts.push_back(Cst->getZExtValue());
2135  }
2136  }
2137  }
2138 
2139  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2140  Consts.push_back(Cst->getZExtValue());
2141  if (!TII->fitsConstReadLimitations(Consts)) {
2142  return false;
2143  }
2144 
2145  Sel = CstOffset;
2146  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2147  return true;
2148  }
2149  case R600::MOV_IMM_GLOBAL_ADDR:
2150  // Check if the Imm slot is used. Taken from below.
2151  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2152  return false;
2153  Imm = Src.getOperand(0);
2154  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2155  return true;
2156  case R600::MOV_IMM_I32:
2157  case R600::MOV_IMM_F32: {
2158  unsigned ImmReg = R600::ALU_LITERAL_X;
2159  uint64_t ImmValue = 0;
2160 
2161  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2163  float FloatValue = FPC->getValueAPF().convertToFloat();
2164  if (FloatValue == 0.0) {
2165  ImmReg = R600::ZERO;
2166  } else if (FloatValue == 0.5) {
2167  ImmReg = R600::HALF;
2168  } else if (FloatValue == 1.0) {
2169  ImmReg = R600::ONE;
2170  } else {
2171  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2172  }
2173  } else {
2175  uint64_t Value = C->getZExtValue();
2176  if (Value == 0) {
2177  ImmReg = R600::ZERO;
2178  } else if (Value == 1) {
2179  ImmReg = R600::ONE_INT;
2180  } else {
2181  ImmValue = Value;
2182  }
2183  }
2184 
2185  // Check that we aren't already using an immediate.
2186  // XXX: It's possible for an instruction to have more than one
2187  // immediate operand, but this is not supported yet.
2188  if (ImmReg == R600::ALU_LITERAL_X) {
2189  if (!Imm.getNode())
2190  return false;
2192  assert(C);
2193  if (C->getZExtValue())
2194  return false;
2195  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2196  }
2197  Src = DAG.getRegister(ImmReg, MVT::i32);
2198  return true;
2199  }
2200  default:
2201  return false;
2202  }
2203 }
2204 
2205 /// Fold the instructions after selecting them
2206 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2207  SelectionDAG &DAG) const {
2208  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2209  if (!Node->isMachineOpcode())
2210  return Node;
2211 
2212  unsigned Opcode = Node->getMachineOpcode();
2213  SDValue FakeOp;
2214 
2215  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2216 
2217  if (Opcode == R600::DOT_4) {
2218  int OperandIdx[] = {
2219  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2220  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2221  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2222  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2223  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2224  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2225  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2226  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2227  };
2228  int NegIdx[] = {
2229  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2230  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2231  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2232  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2233  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2234  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2235  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2236  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2237  };
2238  int AbsIdx[] = {
2239  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2240  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2241  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2242  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2243  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2244  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2245  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2246  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2247  };
2248  for (unsigned i = 0; i < 8; i++) {
2249  if (OperandIdx[i] < 0)
2250  return Node;
2251  SDValue &Src = Ops[OperandIdx[i] - 1];
2252  SDValue &Neg = Ops[NegIdx[i] - 1];
2253  SDValue &Abs = Ops[AbsIdx[i] - 1];
2254  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2255  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2256  if (HasDst)
2257  SelIdx--;
2258  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2259  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2260  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2261  }
2262  } else if (Opcode == R600::REG_SEQUENCE) {
2263  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2264  SDValue &Src = Ops[i];
2265  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2266  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2267  }
2268  } else {
2269  if (!TII->hasInstrModifiers(Opcode))
2270  return Node;
2271  int OperandIdx[] = {
2272  TII->getOperandIdx(Opcode, R600::OpName::src0),
2273  TII->getOperandIdx(Opcode, R600::OpName::src1),
2274  TII->getOperandIdx(Opcode, R600::OpName::src2)
2275  };
2276  int NegIdx[] = {
2277  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2278  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2279  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2280  };
2281  int AbsIdx[] = {
2282  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2283  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2284  -1
2285  };
2286  for (unsigned i = 0; i < 3; i++) {
2287  if (OperandIdx[i] < 0)
2288  return Node;
2289  SDValue &Src = Ops[OperandIdx[i] - 1];
2290  SDValue &Neg = Ops[NegIdx[i] - 1];
2291  SDValue FakeAbs;
2292  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2293  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2294  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2295  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2296  if (HasDst) {
2297  SelIdx--;
2298  ImmIdx--;
2299  }
2300  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2301  SDValue &Imm = Ops[ImmIdx];
2302  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2303  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2304  }
2305  }
2306 
2307  return Node;
2308 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:537
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:651
This class represents lattice values for constants.
Definition: AllocatorList.h:23
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:227
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1097
const SDValue & getChain() const
Function Alias Analysis Results
Address space for private memory.
Definition: AMDGPU.h:256
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:434
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:455
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
#define MO_FLAG_ABS
Definition: R600Defines.h:18
Shift and rotation operations.
Definition: ISDOpcodes.h:409
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:205
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:462
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:403
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:400
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
#define MO_FLAG_NEG
Definition: R600Defines.h:17
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:857
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:968
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:581
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
Class to represent pointers.
Definition: DerivedTypes.h:498
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:523
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:326
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
const R600FrameLowering * getFrameLowering() const override
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasFP32Denormals() const
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:234
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1410
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:282
Address space for indirect addressible parameter memory (VTX1).
Definition: AMDGPU.h:265
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:943
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:251
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:331
This file contains the declarations for the subclasses of Constant, which represent the different fla...
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:246
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
Address space for constant memory (VTX2).
Definition: AMDGPU.h:254
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:415
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1424
size_t size() const
Definition: SmallVector.h:52
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getAddressSpace() const
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:188
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
const APFloat & getValueAPF() const
Definition: Constants.h:302
const R600RegisterInfo * getRegisterInfo() const override
static bool isUndef(ArrayRef< int > Mask)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:338
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:841
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:739
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:21
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:95
int getLDSNoRetOp(uint16_t Opcode)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:645
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Promote Memory to Register
Definition: Mem2Reg.cpp:109
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:419
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:311
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool isLDSRetInstr(unsigned Opcode) const
bool hasBCNT(unsigned Size) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:215
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:710
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:485
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
iterator end()
Definition: DenseMap.h:108
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:122
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:72
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:301
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:967
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:19
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:305
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:442
APInt bitcastToAPInt() const
Definition: APFloat.h:1093
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
Conversion operators.
Definition: ISDOpcodes.h:464
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:793
unsigned getLocReg() const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:585
Address space for local memory.
Definition: AMDGPU.h:255
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:789
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:220
LLVMContext * getContext() const
Definition: SelectionDAG.h:409
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
#define MO_FLAG_PUSH
Definition: R600Defines.h:20
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:627
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.