LLVM  10.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPUFrameLowering.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600FrameLowering.h"
19 #include "R600InstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
44 #include <cassert>
45 #include <cstdint>
46 #include <iterator>
47 #include <utility>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #include "R600GenCallingConv.inc"
53 
55  const R600Subtarget &STI)
56  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
57  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
58  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
59  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
60  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
61  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
62  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
63 
65 
66  // Legalize loads and stores to the private address space.
70 
71  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
72  // spaces, so it is custom lowered to handle those where it isn't.
73  for (MVT VT : MVT::integer_valuetypes()) {
77 
81 
85  }
86 
87  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
91 
95 
100 
103  // We need to include these since trunc STORES to PRIVATE need
104  // special handling to accommodate RMW
115 
116  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
119 
120  // Set condition code actions
133 
138 
141 
144 
148 
150 
155 
158 
165 
170 
171  // ADD, SUB overflow.
172  // TODO: turn these into Legal?
173  if (Subtarget->hasCARRY())
175 
176  if (Subtarget->hasBORROW())
178 
179  // Expand sign extension of vectors
180  if (!Subtarget->hasBFE())
182 
185 
186  if (!Subtarget->hasBFE())
190 
191  if (!Subtarget->hasBFE())
195 
199 
201 
203 
208 
213 
214  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
215  // to be Legal/Custom in order to avoid library calls.
219 
220  if (!Subtarget->hasFMA()) {
223  }
224 
225  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
226  // need it for R600.
227  if (!Subtarget->hasFP32Denormals())
229 
230  if (!Subtarget->hasBFI()) {
231  // fcopysign can be done in a single instruction with BFI.
234  }
235 
236  if (!Subtarget->hasBCNT(32))
238 
239  if (!Subtarget->hasBCNT(64))
241 
242  if (Subtarget->hasFFBH())
244 
245  if (Subtarget->hasFFBL())
247 
248  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
249  // need it for R600.
250  if (Subtarget->hasBFE())
251  setHasExtractBitsInsn(true);
252 
254 
255  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
256  for (MVT VT : ScalarIntVTs) {
261  }
262 
263  // LLVM will expand these to atomic_cmp_swap(0)
264  // and atomic_swap, respectively.
267 
268  // We need to custom lower some of the intrinsics
271 
273 
280 }
281 
282 static inline bool isEOP(MachineBasicBlock::iterator I) {
283  if (std::next(I) == I->getParent()->end())
284  return false;
285  return std::next(I)->getOpcode() == R600::RETURN;
286 }
287 
290  MachineBasicBlock *BB) const {
291  MachineFunction *MF = BB->getParent();
294  const R600InstrInfo *TII = Subtarget->getInstrInfo();
295 
296  switch (MI.getOpcode()) {
297  default:
298  // Replace LDS_*_RET instruction that don't have any uses with the
299  // equivalent LDS_*_NORET instruction.
300  if (TII->isLDSRetInstr(MI.getOpcode())) {
301  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
302  assert(DstIdx != -1);
303  MachineInstrBuilder NewMI;
304  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
305  // LDS_1A2D support and remove this special case.
306  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
307  MI.getOpcode() == R600::LDS_CMPST_RET)
308  return BB;
309 
310  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
311  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
312  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
313  NewMI.add(MI.getOperand(i));
314  }
315  } else {
317  }
318  break;
319 
320  case R600::FABS_R600: {
322  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
323  MI.getOperand(1).getReg());
324  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
325  break;
326  }
327 
328  case R600::FNEG_R600: {
330  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
331  MI.getOperand(1).getReg());
332  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
333  break;
334  }
335 
336  case R600::MASK_WRITE: {
337  Register maskedRegister = MI.getOperand(0).getReg();
338  assert(Register::isVirtualRegister(maskedRegister));
339  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
340  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
341  break;
342  }
343 
344  case R600::MOV_IMM_F32:
345  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
346  .getFPImm()
347  ->getValueAPF()
348  .bitcastToAPInt()
349  .getZExtValue());
350  break;
351 
352  case R600::MOV_IMM_I32:
353  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
354  MI.getOperand(1).getImm());
355  break;
356 
357  case R600::MOV_IMM_GLOBAL_ADDR: {
358  //TODO: Perhaps combine this instruction with the next if possible
359  auto MIB = TII->buildDefaultInstruction(
360  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
361  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
362  //TODO: Ugh this is rather ugly
363  MIB->getOperand(Idx) = MI.getOperand(1);
364  break;
365  }
366 
367  case R600::CONST_COPY: {
369  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
370  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
371  MI.getOperand(1).getImm());
372  break;
373  }
374 
375  case R600::RAT_WRITE_CACHELESS_32_eg:
376  case R600::RAT_WRITE_CACHELESS_64_eg:
377  case R600::RAT_WRITE_CACHELESS_128_eg:
378  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
379  .add(MI.getOperand(0))
380  .add(MI.getOperand(1))
381  .addImm(isEOP(I)); // Set End of program bit
382  break;
383 
384  case R600::RAT_STORE_TYPED_eg:
385  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
386  .add(MI.getOperand(0))
387  .add(MI.getOperand(1))
388  .add(MI.getOperand(2))
389  .addImm(isEOP(I)); // Set End of program bit
390  break;
391 
392  case R600::BRANCH:
393  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
394  .add(MI.getOperand(0));
395  break;
396 
397  case R600::BRANCH_COND_f32: {
398  MachineInstr *NewMI =
399  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
400  R600::PREDICATE_BIT)
401  .add(MI.getOperand(1))
402  .addImm(R600::PRED_SETNE)
403  .addImm(0); // Flags
404  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
405  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
406  .add(MI.getOperand(0))
407  .addReg(R600::PREDICATE_BIT, RegState::Kill);
408  break;
409  }
410 
411  case R600::BRANCH_COND_i32: {
412  MachineInstr *NewMI =
413  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
414  R600::PREDICATE_BIT)
415  .add(MI.getOperand(1))
416  .addImm(R600::PRED_SETNE_INT)
417  .addImm(0); // Flags
418  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
419  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
420  .add(MI.getOperand(0))
421  .addReg(R600::PREDICATE_BIT, RegState::Kill);
422  break;
423  }
424 
425  case R600::EG_ExportSwz:
426  case R600::R600_ExportSwz: {
427  // Instruction is left unmodified if its not the last one of its type
428  bool isLastInstructionOfItsType = true;
429  unsigned InstExportType = MI.getOperand(1).getImm();
430  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
431  EndBlock = BB->end(); NextExportInst != EndBlock;
432  NextExportInst = std::next(NextExportInst)) {
433  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
434  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
435  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
436  .getImm();
437  if (CurrentInstExportType == InstExportType) {
438  isLastInstructionOfItsType = false;
439  break;
440  }
441  }
442  }
443  bool EOP = isEOP(I);
444  if (!EOP && !isLastInstructionOfItsType)
445  return BB;
446  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
447  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
448  .add(MI.getOperand(0))
449  .add(MI.getOperand(1))
450  .add(MI.getOperand(2))
451  .add(MI.getOperand(3))
452  .add(MI.getOperand(4))
453  .add(MI.getOperand(5))
454  .add(MI.getOperand(6))
455  .addImm(CfInst)
456  .addImm(EOP);
457  break;
458  }
459  case R600::RETURN: {
460  return BB;
461  }
462  }
463 
464  MI.eraseFromParent();
465  return BB;
466 }
467 
468 //===----------------------------------------------------------------------===//
469 // Custom DAG Lowering Operations
470 //===----------------------------------------------------------------------===//
471 
475  switch (Op.getOpcode()) {
476  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
477  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
478  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
479  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
480  case ISD::SRA_PARTS:
481  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
482  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
483  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
484  case ISD::FCOS:
485  case ISD::FSIN: return LowerTrig(Op, DAG);
486  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
487  case ISD::STORE: return LowerSTORE(Op, DAG);
488  case ISD::LOAD: {
489  SDValue Result = LowerLOAD(Op, DAG);
490  assert((!Result.getNode() ||
491  Result.getNode()->getNumValues() == 2) &&
492  "Load should return a value and a chain");
493  return Result;
494  }
495 
496  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
497  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
498  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
499  case ISD::INTRINSIC_VOID: {
500  SDValue Chain = Op.getOperand(0);
501  unsigned IntrinsicID =
502  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
503  switch (IntrinsicID) {
504  case Intrinsic::r600_store_swizzle: {
505  SDLoc DL(Op);
506  const SDValue Args[8] = {
507  Chain,
508  Op.getOperand(2), // Export Value
509  Op.getOperand(3), // ArrayBase
510  Op.getOperand(4), // Type
511  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
512  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
513  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
514  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
515  };
516  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
517  }
518 
519  // default for switch(IntrinsicID)
520  default: break;
521  }
522  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
523  break;
524  }
526  unsigned IntrinsicID =
527  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
528  EVT VT = Op.getValueType();
529  SDLoc DL(Op);
530  switch (IntrinsicID) {
531  case Intrinsic::r600_tex:
532  case Intrinsic::r600_texc: {
533  unsigned TextureOp;
534  switch (IntrinsicID) {
535  case Intrinsic::r600_tex:
536  TextureOp = 0;
537  break;
538  case Intrinsic::r600_texc:
539  TextureOp = 1;
540  break;
541  default:
542  llvm_unreachable("unhandled texture operation");
543  }
544 
545  SDValue TexArgs[19] = {
546  DAG.getConstant(TextureOp, DL, MVT::i32),
547  Op.getOperand(1),
548  DAG.getConstant(0, DL, MVT::i32),
549  DAG.getConstant(1, DL, MVT::i32),
550  DAG.getConstant(2, DL, MVT::i32),
551  DAG.getConstant(3, DL, MVT::i32),
552  Op.getOperand(2),
553  Op.getOperand(3),
554  Op.getOperand(4),
555  DAG.getConstant(0, DL, MVT::i32),
556  DAG.getConstant(1, DL, MVT::i32),
557  DAG.getConstant(2, DL, MVT::i32),
558  DAG.getConstant(3, DL, MVT::i32),
559  Op.getOperand(5),
560  Op.getOperand(6),
561  Op.getOperand(7),
562  Op.getOperand(8),
563  Op.getOperand(9),
564  Op.getOperand(10)
565  };
566  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
567  }
568  case Intrinsic::r600_dot4: {
569  SDValue Args[8] = {
571  DAG.getConstant(0, DL, MVT::i32)),
573  DAG.getConstant(0, DL, MVT::i32)),
575  DAG.getConstant(1, DL, MVT::i32)),
577  DAG.getConstant(1, DL, MVT::i32)),
579  DAG.getConstant(2, DL, MVT::i32)),
581  DAG.getConstant(2, DL, MVT::i32)),
583  DAG.getConstant(3, DL, MVT::i32)),
585  DAG.getConstant(3, DL, MVT::i32))
586  };
587  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
588  }
589 
590  case Intrinsic::r600_implicitarg_ptr: {
593  return DAG.getConstant(ByteOffset, DL, PtrVT);
594  }
595  case Intrinsic::r600_read_ngroups_x:
596  return LowerImplicitParameter(DAG, VT, DL, 0);
597  case Intrinsic::r600_read_ngroups_y:
598  return LowerImplicitParameter(DAG, VT, DL, 1);
599  case Intrinsic::r600_read_ngroups_z:
600  return LowerImplicitParameter(DAG, VT, DL, 2);
601  case Intrinsic::r600_read_global_size_x:
602  return LowerImplicitParameter(DAG, VT, DL, 3);
603  case Intrinsic::r600_read_global_size_y:
604  return LowerImplicitParameter(DAG, VT, DL, 4);
605  case Intrinsic::r600_read_global_size_z:
606  return LowerImplicitParameter(DAG, VT, DL, 5);
607  case Intrinsic::r600_read_local_size_x:
608  return LowerImplicitParameter(DAG, VT, DL, 6);
609  case Intrinsic::r600_read_local_size_y:
610  return LowerImplicitParameter(DAG, VT, DL, 7);
611  case Intrinsic::r600_read_local_size_z:
612  return LowerImplicitParameter(DAG, VT, DL, 8);
613 
614  case Intrinsic::r600_read_tgid_x:
615  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
616  R600::T1_X, VT);
617  case Intrinsic::r600_read_tgid_y:
618  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
619  R600::T1_Y, VT);
620  case Intrinsic::r600_read_tgid_z:
621  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
622  R600::T1_Z, VT);
623  case Intrinsic::r600_read_tidig_x:
624  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
625  R600::T0_X, VT);
626  case Intrinsic::r600_read_tidig_y:
627  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
628  R600::T0_Y, VT);
629  case Intrinsic::r600_read_tidig_z:
630  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
631  R600::T0_Z, VT);
632 
633  case Intrinsic::r600_recipsqrt_ieee:
634  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
635 
636  case Intrinsic::r600_recipsqrt_clamped:
637  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
638  default:
639  return Op;
640  }
641 
642  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
643  break;
644  }
645  } // end switch(Op.getOpcode())
646  return SDValue();
647 }
648 
651  SelectionDAG &DAG) const {
652  switch (N->getOpcode()) {
653  default:
655  return;
656  case ISD::FP_TO_UINT:
657  if (N->getValueType(0) == MVT::i1) {
658  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
659  return;
660  }
661  // Since we don't care about out of bounds values we can use FP_TO_SINT for
662  // uints too. The DAGLegalizer code for uint considers some extra cases
663  // which are not necessary here.
665  case ISD::FP_TO_SINT: {
666  if (N->getValueType(0) == MVT::i1) {
667  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
668  return;
669  }
670 
671  SDValue Result;
672  if (expandFP_TO_SINT(N, Result, DAG))
673  Results.push_back(Result);
674  return;
675  }
676  case ISD::SDIVREM: {
677  SDValue Op = SDValue(N, 1);
678  SDValue RES = LowerSDIVREM(Op, DAG);
679  Results.push_back(RES);
680  Results.push_back(RES.getValue(1));
681  break;
682  }
683  case ISD::UDIVREM: {
684  SDValue Op = SDValue(N, 0);
685  LowerUDIVREM64(Op, DAG, Results);
686  break;
687  }
688  }
689 }
690 
691 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
692  SDValue Vector) const {
693  SDLoc DL(Vector);
694  EVT VecVT = Vector.getValueType();
695  EVT EltVT = VecVT.getVectorElementType();
697 
698  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
699  Args.push_back(DAG.getNode(
700  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
701  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
702  }
703 
704  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
705 }
706 
707 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
708  SelectionDAG &DAG) const {
709  SDLoc DL(Op);
710  SDValue Vector = Op.getOperand(0);
711  SDValue Index = Op.getOperand(1);
712 
713  if (isa<ConstantSDNode>(Index) ||
715  return Op;
716 
717  Vector = vectorToVerticalVector(DAG, Vector);
718  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
719  Vector, Index);
720 }
721 
722 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
723  SelectionDAG &DAG) const {
724  SDLoc DL(Op);
725  SDValue Vector = Op.getOperand(0);
726  SDValue Value = Op.getOperand(1);
727  SDValue Index = Op.getOperand(2);
728 
729  if (isa<ConstantSDNode>(Index) ||
731  return Op;
732 
733  Vector = vectorToVerticalVector(DAG, Vector);
734  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
735  Vector, Value, Index);
736  return vectorToVerticalVector(DAG, Insert);
737 }
738 
739 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
740  SDValue Op,
741  SelectionDAG &DAG) const {
742  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
744  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
745 
746  const DataLayout &DL = DAG.getDataLayout();
747  const GlobalValue *GV = GSD->getGlobal();
748  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
749 
750  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
751  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
752 }
753 
754 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
755  // On hw >= R700, COS/SIN input must be between -1. and 1.
756  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
757  EVT VT = Op.getValueType();
758  SDValue Arg = Op.getOperand(0);
759  SDLoc DL(Op);
760 
761  // TODO: Should this propagate fast-math-flags?
762  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
763  DAG.getNode(ISD::FADD, DL, VT,
764  DAG.getNode(ISD::FMUL, DL, VT, Arg,
765  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
766  DAG.getConstantFP(0.5, DL, MVT::f32)));
767  unsigned TrigNode;
768  switch (Op.getOpcode()) {
769  case ISD::FCOS:
770  TrigNode = AMDGPUISD::COS_HW;
771  break;
772  case ISD::FSIN:
773  TrigNode = AMDGPUISD::SIN_HW;
774  break;
775  default:
776  llvm_unreachable("Wrong trig opcode");
777  }
778  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
779  DAG.getNode(ISD::FADD, DL, VT, FractPart,
780  DAG.getConstantFP(-0.5, DL, MVT::f32)));
781  if (Gen >= AMDGPUSubtarget::R700)
782  return TrigVal;
783  // On R600 hw, COS/SIN input must be between -Pi and Pi.
784  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
785  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
786 }
787 
788 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
789  SDLoc DL(Op);
790  EVT VT = Op.getValueType();
791 
792  SDValue Lo = Op.getOperand(0);
793  SDValue Hi = Op.getOperand(1);
794  SDValue Shift = Op.getOperand(2);
795  SDValue Zero = DAG.getConstant(0, DL, VT);
796  SDValue One = DAG.getConstant(1, DL, VT);
797 
798  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
799  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
800  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
801  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
802 
803  // The dance around Width1 is necessary for 0 special case.
804  // Without it the CompShift might be 32, producing incorrect results in
805  // Overflow. So we do the shift in two steps, the alternative is to
806  // add a conditional to filter the special case.
807 
808  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
809  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
810 
811  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
812  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
813  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
814 
815  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
816  SDValue LoBig = Zero;
817 
818  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
819  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
820 
821  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
822 }
823 
824 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
825  SDLoc DL(Op);
826  EVT VT = Op.getValueType();
827 
828  SDValue Lo = Op.getOperand(0);
829  SDValue Hi = Op.getOperand(1);
830  SDValue Shift = Op.getOperand(2);
831  SDValue Zero = DAG.getConstant(0, DL, VT);
832  SDValue One = DAG.getConstant(1, DL, VT);
833 
834  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
835 
836  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
837  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
838  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
839  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
840 
841  // The dance around Width1 is necessary for 0 special case.
842  // Without it the CompShift might be 32, producing incorrect results in
843  // Overflow. So we do the shift in two steps, the alternative is to
844  // add a conditional to filter the special case.
845 
846  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
847  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
848 
849  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
850  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
851  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
852 
853  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
854  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
855 
856  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
857  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
858 
859  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
860 }
861 
862 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
863  unsigned mainop, unsigned ovf) const {
864  SDLoc DL(Op);
865  EVT VT = Op.getValueType();
866 
867  SDValue Lo = Op.getOperand(0);
868  SDValue Hi = Op.getOperand(1);
869 
870  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
871  // Extend sign.
872  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
873  DAG.getValueType(MVT::i1));
874 
875  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
876 
877  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
878 }
879 
880 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
881  SDLoc DL(Op);
882  return DAG.getNode(
883  ISD::SETCC,
884  DL,
885  MVT::i1,
886  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
887  DAG.getCondCode(ISD::SETEQ));
888 }
889 
890 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
891  SDLoc DL(Op);
892  return DAG.getNode(
893  ISD::SETCC,
894  DL,
895  MVT::i1,
896  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
897  DAG.getCondCode(ISD::SETEQ));
898 }
899 
900 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
901  const SDLoc &DL,
902  unsigned DwordOffset) const {
903  unsigned ByteOffset = DwordOffset * 4;
904  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
906 
907  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
908  assert(isInt<16>(ByteOffset));
909 
910  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
911  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
913 }
914 
915 bool R600TargetLowering::isZero(SDValue Op) const {
916  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
917  return Cst->isNullValue();
918  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
919  return CstFP->isZero();
920  } else {
921  return false;
922  }
923 }
924 
925 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
926  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
927  return CFP->isExactlyValue(1.0);
928  }
929  return isAllOnesConstant(Op);
930 }
931 
932 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
933  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
934  return CFP->getValueAPF().isZero();
935  }
936  return isNullConstant(Op);
937 }
938 
939 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
940  SDLoc DL(Op);
941  EVT VT = Op.getValueType();
942 
943  SDValue LHS = Op.getOperand(0);
944  SDValue RHS = Op.getOperand(1);
945  SDValue True = Op.getOperand(2);
946  SDValue False = Op.getOperand(3);
947  SDValue CC = Op.getOperand(4);
948  SDValue Temp;
949 
950  if (VT == MVT::f32) {
951  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
952  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
953  if (MinMax)
954  return MinMax;
955  }
956 
957  // LHS and RHS are guaranteed to be the same value type
958  EVT CompareVT = LHS.getValueType();
959 
960  // Check if we can lower this to a native operation.
961 
962  // Try to lower to a SET* instruction:
963  //
964  // SET* can match the following patterns:
965  //
966  // select_cc f32, f32, -1, 0, cc_supported
967  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
968  // select_cc i32, i32, -1, 0, cc_supported
969  //
970 
971  // Move hardware True/False values to the correct operand.
972  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
973  ISD::CondCode InverseCC =
974  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
975  if (isHWTrueValue(False) && isHWFalseValue(True)) {
976  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
977  std::swap(False, True);
978  CC = DAG.getCondCode(InverseCC);
979  } else {
980  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
981  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
982  std::swap(False, True);
983  std::swap(LHS, RHS);
984  CC = DAG.getCondCode(SwapInvCC);
985  }
986  }
987  }
988 
989  if (isHWTrueValue(True) && isHWFalseValue(False) &&
990  (CompareVT == VT || VT == MVT::i32)) {
991  // This can be matched by a SET* instruction.
992  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
993  }
994 
995  // Try to lower to a CND* instruction:
996  //
997  // CND* can match the following patterns:
998  //
999  // select_cc f32, 0.0, f32, f32, cc_supported
1000  // select_cc f32, 0.0, i32, i32, cc_supported
1001  // select_cc i32, 0, f32, f32, cc_supported
1002  // select_cc i32, 0, i32, i32, cc_supported
1003  //
1004 
1005  // Try to move the zero value to the RHS
1006  if (isZero(LHS)) {
1007  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1008  // Try swapping the operands
1009  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1010  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1011  std::swap(LHS, RHS);
1012  CC = DAG.getCondCode(CCSwapped);
1013  } else {
1014  // Try inverting the conditon and then swapping the operands
1015  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1016  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1017  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1018  std::swap(True, False);
1019  std::swap(LHS, RHS);
1020  CC = DAG.getCondCode(CCSwapped);
1021  }
1022  }
1023  }
1024  if (isZero(RHS)) {
1025  SDValue Cond = LHS;
1026  SDValue Zero = RHS;
1027  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1028  if (CompareVT != VT) {
1029  // Bitcast True / False to the correct types. This will end up being
1030  // a nop, but it allows us to define only a single pattern in the
1031  // .TD files for each CND* instruction rather than having to have
1032  // one pattern for integer True/False and one for fp True/False
1033  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1034  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1035  }
1036 
1037  switch (CCOpcode) {
1038  case ISD::SETONE:
1039  case ISD::SETUNE:
1040  case ISD::SETNE:
1041  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1042  Temp = True;
1043  True = False;
1044  False = Temp;
1045  break;
1046  default:
1047  break;
1048  }
1049  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1050  Cond, Zero,
1051  True, False,
1052  DAG.getCondCode(CCOpcode));
1053  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1054  }
1055 
1056  // If we make it this for it means we have no native instructions to handle
1057  // this SELECT_CC, so we must lower it.
1058  SDValue HWTrue, HWFalse;
1059 
1060  if (CompareVT == MVT::f32) {
1061  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1062  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1063  } else if (CompareVT == MVT::i32) {
1064  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1065  HWFalse = DAG.getConstant(0, DL, CompareVT);
1066  }
1067  else {
1068  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1069  }
1070 
1071  // Lower this unsupported SELECT_CC into a combination of two supported
1072  // SELECT_CC operations.
1073  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1074 
1075  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1076  Cond, HWFalse,
1077  True, False,
1078  DAG.getCondCode(ISD::SETNE));
1079 }
1080 
1081 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1082 /// convert these pointers to a register index. Each register holds
1083 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1084 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1085 /// for indirect addressing.
1086 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1087  unsigned StackWidth,
1088  SelectionDAG &DAG) const {
1089  unsigned SRLPad;
1090  switch(StackWidth) {
1091  case 1:
1092  SRLPad = 2;
1093  break;
1094  case 2:
1095  SRLPad = 3;
1096  break;
1097  case 4:
1098  SRLPad = 4;
1099  break;
1100  default: llvm_unreachable("Invalid stack width");
1101  }
1102 
1103  SDLoc DL(Ptr);
1104  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1105  DAG.getConstant(SRLPad, DL, MVT::i32));
1106 }
1107 
1108 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1109  unsigned ElemIdx,
1110  unsigned &Channel,
1111  unsigned &PtrIncr) const {
1112  switch (StackWidth) {
1113  default:
1114  case 1:
1115  Channel = 0;
1116  if (ElemIdx > 0) {
1117  PtrIncr = 1;
1118  } else {
1119  PtrIncr = 0;
1120  }
1121  break;
1122  case 2:
1123  Channel = ElemIdx % 2;
1124  if (ElemIdx == 2) {
1125  PtrIncr = 1;
1126  } else {
1127  PtrIncr = 0;
1128  }
1129  break;
1130  case 4:
1131  Channel = ElemIdx;
1132  PtrIncr = 0;
1133  break;
1134  }
1135 }
1136 
1137 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1138  SelectionDAG &DAG) const {
1139  SDLoc DL(Store);
1140  //TODO: Who creates the i8 stores?
1141  assert(Store->isTruncatingStore()
1142  || Store->getValue().getValueType() == MVT::i8);
1144 
1145  SDValue Mask;
1146  if (Store->getMemoryVT() == MVT::i8) {
1147  assert(Store->getAlignment() >= 1);
1148  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1149  } else if (Store->getMemoryVT() == MVT::i16) {
1150  assert(Store->getAlignment() >= 2);
1151  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1152  } else {
1153  llvm_unreachable("Unsupported private trunc store");
1154  }
1155 
1156  SDValue OldChain = Store->getChain();
1157  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1158  // Skip dummy
1159  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1160  SDValue BasePtr = Store->getBasePtr();
1161  SDValue Offset = Store->getOffset();
1162  EVT MemVT = Store->getMemoryVT();
1163 
1164  SDValue LoadPtr = BasePtr;
1165  if (!Offset.isUndef()) {
1166  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1167  }
1168 
1169  // Get dword location
1170  // TODO: this should be eliminated by the future SHR ptr, 2
1171  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1172  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1173 
1174  // Load dword
1175  // TODO: can we be smarter about machine pointer info?
1178  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1179 
1180  Chain = Dst.getValue(1);
1181 
1182  // Get offset in dword
1183  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1184  DAG.getConstant(0x3, DL, MVT::i32));
1185 
1186  // Convert byte offset to bit shift
1187  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1188  DAG.getConstant(3, DL, MVT::i32));
1189 
1190  // TODO: Contrary to the name of the functiom,
1191  // it also handles sub i32 non-truncating stores (like i1)
1192  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1193  Store->getValue());
1194 
1195  // Mask the value to the right type
1196  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1197 
1198  // Shift the value in place
1199  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1200  MaskedValue, ShiftAmt);
1201 
1202  // Shift the mask in place
1203  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1204 
1205  // Invert the mask. NOTE: if we had native ROL instructions we could
1206  // use inverted mask
1207  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1208 
1209  // Cleanup the target bits
1210  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1211 
1212  // Add the new bits
1213  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1214 
1215  // Store dword
1216  // TODO: Can we be smarter about MachinePointerInfo?
1217  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1218 
1219  // If we are part of expanded vector, make our neighbors depend on this store
1220  if (VectorTrunc) {
1221  // Make all other vector elements depend on this store
1222  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1223  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1224  }
1225  return NewStore;
1226 }
1227 
1228 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1229  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1230  unsigned AS = StoreNode->getAddressSpace();
1231 
1232  SDValue Chain = StoreNode->getChain();
1233  SDValue Ptr = StoreNode->getBasePtr();
1234  SDValue Value = StoreNode->getValue();
1235 
1236  EVT VT = Value.getValueType();
1237  EVT MemVT = StoreNode->getMemoryVT();
1238  EVT PtrVT = Ptr.getValueType();
1239 
1240  SDLoc DL(Op);
1241 
1242  const bool TruncatingStore = StoreNode->isTruncatingStore();
1243 
1244  // Neither LOCAL nor PRIVATE can do vectors at the moment
1245  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1246  TruncatingStore) &&
1247  VT.isVector()) {
1248  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1249  // Add an extra level of chain to isolate this vector
1250  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1251  // TODO: can the chain be replaced without creating a new store?
1252  SDValue NewStore = DAG.getTruncStore(
1253  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1254  MemVT, StoreNode->getAlignment(),
1255  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1256  StoreNode = cast<StoreSDNode>(NewStore);
1257  }
1258 
1259  return scalarizeVectorStore(StoreNode, DAG);
1260  }
1261 
1262  unsigned Align = StoreNode->getAlignment();
1263  if (Align < MemVT.getStoreSize() &&
1265  MemVT, AS, Align, StoreNode->getMemOperand()->getFlags(), nullptr)) {
1266  return expandUnalignedStore(StoreNode, DAG);
1267  }
1268 
1269  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1270  DAG.getConstant(2, DL, PtrVT));
1271 
1272  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1273  // It is beneficial to create MSKOR here instead of combiner to avoid
1274  // artificial dependencies introduced by RMW
1275  if (TruncatingStore) {
1276  assert(VT.bitsLE(MVT::i32));
1277  SDValue MaskConstant;
1278  if (MemVT == MVT::i8) {
1279  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1280  } else {
1281  assert(MemVT == MVT::i16);
1282  assert(StoreNode->getAlignment() >= 2);
1283  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1284  }
1285 
1286  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1287  DAG.getConstant(0x00000003, DL, PtrVT));
1288  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1289  DAG.getConstant(3, DL, VT));
1290 
1291  // Put the mask in correct place
1292  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1293 
1294  // Put the value bits in correct place
1295  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1296  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1297 
1298  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1299  // vector instead.
1300  SDValue Src[4] = {
1301  ShiftedValue,
1302  DAG.getConstant(0, DL, MVT::i32),
1303  DAG.getConstant(0, DL, MVT::i32),
1304  Mask
1305  };
1306  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1307  SDValue Args[3] = { Chain, Input, DWordAddr };
1309  Op->getVTList(), Args, MemVT,
1310  StoreNode->getMemOperand());
1311  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1312  // Convert pointer from byte address to dword address.
1313  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1314 
1315  if (StoreNode->isIndexed()) {
1316  llvm_unreachable("Indexed stores not supported yet");
1317  } else {
1318  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1319  }
1320  return Chain;
1321  }
1322  }
1323 
1324  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1325  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1326  return SDValue();
1327 
1328  if (MemVT.bitsLT(MVT::i32))
1329  return lowerPrivateTruncStore(StoreNode, DAG);
1330 
1331  // Standard i32+ store, tag it with DWORDADDR to note that the address
1332  // has been shifted
1333  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1334  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1335  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1336  }
1337 
1338  // Tagged i32+ stores will be matched by patterns
1339  return SDValue();
1340 }
1341 
1342 // return (512 + (kc_bank << 12)
1343 static int
1345  switch (AddressSpace) {
1347  return 512;
1349  return 512 + 4096;
1351  return 512 + 4096 * 2;
1353  return 512 + 4096 * 3;
1355  return 512 + 4096 * 4;
1357  return 512 + 4096 * 5;
1359  return 512 + 4096 * 6;
1361  return 512 + 4096 * 7;
1363  return 512 + 4096 * 8;
1365  return 512 + 4096 * 9;
1367  return 512 + 4096 * 10;
1369  return 512 + 4096 * 11;
1371  return 512 + 4096 * 12;
1373  return 512 + 4096 * 13;
1375  return 512 + 4096 * 14;
1377  return 512 + 4096 * 15;
1378  default:
1379  return -1;
1380  }
1381 }
1382 
1383 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1384  SelectionDAG &DAG) const {
1385  SDLoc DL(Op);
1386  LoadSDNode *Load = cast<LoadSDNode>(Op);
1388  EVT MemVT = Load->getMemoryVT();
1389  assert(Load->getAlignment() >= MemVT.getStoreSize());
1390 
1391  SDValue BasePtr = Load->getBasePtr();
1392  SDValue Chain = Load->getChain();
1393  SDValue Offset = Load->getOffset();
1394 
1395  SDValue LoadPtr = BasePtr;
1396  if (!Offset.isUndef()) {
1397  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1398  }
1399 
1400  // Get dword location
1401  // NOTE: this should be eliminated by the future SHR ptr, 2
1402  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1403  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1404 
1405  // Load dword
1406  // TODO: can we be smarter about machine pointer info?
1409  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1410 
1411  // Get offset within the register.
1412  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1413  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1414 
1415  // Bit offset of target byte (byteIdx * 8).
1416  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1417  DAG.getConstant(3, DL, MVT::i32));
1418 
1419  // Shift to the right.
1420  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1421 
1422  // Eliminate the upper bits by setting them to ...
1423  EVT MemEltVT = MemVT.getScalarType();
1424 
1425  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1426  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1427  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1428  } else { // ... or zeros.
1429  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1430  }
1431 
1432  SDValue Ops[] = {
1433  Ret,
1434  Read.getValue(1) // This should be our output chain
1435  };
1436 
1437  return DAG.getMergeValues(Ops, DL);
1438 }
1439 
1440 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1441  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1442  unsigned AS = LoadNode->getAddressSpace();
1443  EVT MemVT = LoadNode->getMemoryVT();
1445 
1446  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1447  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1448  return lowerPrivateExtLoad(Op, DAG);
1449  }
1450 
1451  SDLoc DL(Op);
1452  EVT VT = Op.getValueType();
1453  SDValue Chain = LoadNode->getChain();
1454  SDValue Ptr = LoadNode->getBasePtr();
1455 
1456  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1457  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1458  VT.isVector()) {
1459  return scalarizeVectorLoad(LoadNode, DAG);
1460  }
1461 
1462  // This is still used for explicit load from addrspace(8)
1463  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1464  if (ConstantBlock > -1 &&
1465  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1466  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1467  SDValue Result;
1468  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1469  isa<ConstantSDNode>(Ptr)) {
1470  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1471  } else {
1472  //TODO: Does this even work?
1473  // non-constant ptr can't be folded, keeps it as a v4f32 load
1474  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1475  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1476  DAG.getConstant(4, DL, MVT::i32)),
1477  DAG.getConstant(LoadNode->getAddressSpace() -
1479  );
1480  }
1481 
1482  if (!VT.isVector()) {
1483  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1484  DAG.getConstant(0, DL, MVT::i32));
1485  }
1486 
1487  SDValue MergedValues[2] = {
1488  Result,
1489  Chain
1490  };
1491  return DAG.getMergeValues(MergedValues, DL);
1492  }
1493 
1494  // For most operations returning SDValue() will result in the node being
1495  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1496  // need to manually expand loads that may be legal in some address spaces and
1497  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1498  // compute shaders, since the data is sign extended when it is uploaded to the
1499  // buffer. However SEXT loads from other address spaces are not supported, so
1500  // we need to expand them here.
1501  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1502  EVT MemVT = LoadNode->getMemoryVT();
1503  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1504  SDValue NewLoad = DAG.getExtLoad(
1505  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1506  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1507  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1508  DAG.getValueType(MemVT));
1509 
1510  SDValue MergedValues[2] = { Res, Chain };
1511  return DAG.getMergeValues(MergedValues, DL);
1512  }
1513 
1514  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1515  return SDValue();
1516  }
1517 
1518  // DWORDADDR ISD marks already shifted address
1519  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1520  assert(VT == MVT::i32);
1521  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1522  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1523  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1524  }
1525  return SDValue();
1526 }
1527 
1528 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1529  SDValue Chain = Op.getOperand(0);
1530  SDValue Cond = Op.getOperand(1);
1531  SDValue Jump = Op.getOperand(2);
1532 
1533  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1534  Chain, Jump, Cond);
1535 }
1536 
1537 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1538  SelectionDAG &DAG) const {
1539  MachineFunction &MF = DAG.getMachineFunction();
1540  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1541 
1542  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1543 
1544  unsigned FrameIndex = FIN->getIndex();
1545  unsigned IgnoredFrameReg;
1546  unsigned Offset =
1547  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1548  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1549  Op.getValueType());
1550 }
1551 
1553  bool IsVarArg) const {
1554  switch (CC) {
1557  case CallingConv::C:
1558  case CallingConv::Fast:
1559  case CallingConv::Cold:
1560  llvm_unreachable("kernels should not be handled here");
1568  return CC_R600;
1569  default:
1570  report_fatal_error("Unsupported calling convention.");
1571  }
1572 }
1573 
1574 /// XXX Only kernel functions are supported, so we can assume for now that
1575 /// every function is a kernel function, but in the future we should use
1576 /// separate calling conventions for kernel and non-kernel functions.
1578  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1579  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1580  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1582  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1583  *DAG.getContext());
1584  MachineFunction &MF = DAG.getMachineFunction();
1586 
1587  if (AMDGPU::isShader(CallConv)) {
1588  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1589  } else {
1590  analyzeFormalArgumentsCompute(CCInfo, Ins);
1591  }
1592 
1593  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1594  CCValAssign &VA = ArgLocs[i];
1595  const ISD::InputArg &In = Ins[i];
1596  EVT VT = In.VT;
1597  EVT MemVT = VA.getLocVT();
1598  if (!VT.isVector() && MemVT.isVector()) {
1599  // Get load source type if scalarized.
1600  MemVT = MemVT.getVectorElementType();
1601  }
1602 
1603  if (AMDGPU::isShader(CallConv)) {
1604  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1605  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1606  InVals.push_back(Register);
1607  continue;
1608  }
1609 
1612 
1613  // i64 isn't a legal type, so the register type used ends up as i32, which
1614  // isn't expected here. It attempts to create this sextload, but it ends up
1615  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1616  // for <1 x i64>.
1617 
1618  // The first 36 bytes of the input buffer contains information about
1619  // thread group and global sizes.
1621  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1622  // FIXME: This should really check the extload type, but the handling of
1623  // extload vector parameters seems to be broken.
1624 
1625  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1626  Ext = ISD::SEXTLOAD;
1627  }
1628 
1629  // Compute the offset from the value.
1630  // XXX - I think PartOffset should give you this, but it seems to give the
1631  // size of the register which isn't useful.
1632 
1633  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1634  unsigned PartOffset = VA.getLocMemOffset();
1635  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1636 
1637  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1638  SDValue Arg = DAG.getLoad(
1639  ISD::UNINDEXED, Ext, VT, DL, Chain,
1640  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1641  PtrInfo,
1642  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1645 
1646  InVals.push_back(Arg);
1647  }
1648  return Chain;
1649 }
1650 
1652  EVT VT) const {
1653  if (!VT.isVector())
1654  return MVT::i32;
1656 }
1657 
1659  const SelectionDAG &DAG) const {
1660  // Local and Private addresses do not handle vectors. Limit to i32
1661  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1662  return (MemVT.getSizeInBits() <= 32);
1663  }
1664  return true;
1665 }
1666 
1668  EVT VT, unsigned AddrSpace, unsigned Align, MachineMemOperand::Flags Flags,
1669  bool *IsFast) const {
1670  if (IsFast)
1671  *IsFast = false;
1672 
1673  if (!VT.isSimple() || VT == MVT::Other)
1674  return false;
1675 
1676  if (VT.bitsLT(MVT::i32))
1677  return false;
1678 
1679  // TODO: This is a rough estimate.
1680  if (IsFast)
1681  *IsFast = true;
1682 
1683  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1684 }
1685 
1687  SelectionDAG &DAG, SDValue VectorEntry,
1688  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1689  assert(RemapSwizzle.empty());
1690 
1691  SDLoc DL(VectorEntry);
1692  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1693 
1694  SDValue NewBldVec[4];
1695  for (unsigned i = 0; i < 4; i++)
1696  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1697  DAG.getIntPtrConstant(i, DL));
1698 
1699  for (unsigned i = 0; i < 4; i++) {
1700  if (NewBldVec[i].isUndef())
1701  // We mask write here to teach later passes that the ith element of this
1702  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1703  // break false dependencies and additionnaly make assembly easier to read.
1704  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1705  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1706  if (C->isZero()) {
1707  RemapSwizzle[i] = 4; // SEL_0
1708  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1709  } else if (C->isExactlyValue(1.0)) {
1710  RemapSwizzle[i] = 5; // SEL_1
1711  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1712  }
1713  }
1714 
1715  if (NewBldVec[i].isUndef())
1716  continue;
1717  // Fix spurious warning with gcc 7.3 -O3
1718  // warning: array subscript is above array bounds [-Warray-bounds]
1719  // if (NewBldVec[i] == NewBldVec[j]) {
1720  // ~~~~~~~~~~~^
1721  if (i >= 4)
1722  continue;
1723  for (unsigned j = 0; j < i; j++) {
1724  if (NewBldVec[i] == NewBldVec[j]) {
1725  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1726  RemapSwizzle[i] = j;
1727  break;
1728  }
1729  }
1730  }
1731 
1732  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1733  NewBldVec);
1734 }
1735 
1737  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1738  assert(RemapSwizzle.empty());
1739 
1740  SDLoc DL(VectorEntry);
1741  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1742 
1743  SDValue NewBldVec[4];
1744  bool isUnmovable[4] = {false, false, false, false};
1745  for (unsigned i = 0; i < 4; i++)
1746  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1747  DAG.getIntPtrConstant(i, DL));
1748 
1749  for (unsigned i = 0; i < 4; i++) {
1750  RemapSwizzle[i] = i;
1751  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1752  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1753  ->getZExtValue();
1754  if (i == Idx)
1755  isUnmovable[Idx] = true;
1756  }
1757  }
1758 
1759  for (unsigned i = 0; i < 4; i++) {
1760  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1761  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1762  ->getZExtValue();
1763  if (isUnmovable[Idx])
1764  continue;
1765  // Swap i and Idx
1766  std::swap(NewBldVec[Idx], NewBldVec[i]);
1767  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1768  break;
1769  }
1770  }
1771 
1772  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1773  NewBldVec);
1774 }
1775 
1776 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1777  SelectionDAG &DAG,
1778  const SDLoc &DL) const {
1779  // Old -> New swizzle values
1780  DenseMap<unsigned, unsigned> SwizzleRemap;
1781 
1782  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1783  for (unsigned i = 0; i < 4; i++) {
1784  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1785  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1786  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1787  }
1788 
1789  SwizzleRemap.clear();
1790  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1791  for (unsigned i = 0; i < 4; i++) {
1792  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1793  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1794  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1795  }
1796 
1797  return BuildVector;
1798 }
1799 
1800 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1801  SelectionDAG &DAG) const {
1802  SDLoc DL(LoadNode);
1803  EVT VT = LoadNode->getValueType(0);
1804  SDValue Chain = LoadNode->getChain();
1805  SDValue Ptr = LoadNode->getBasePtr();
1806  assert (isa<ConstantSDNode>(Ptr));
1807 
1808  //TODO: Support smaller loads
1809  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1810  return SDValue();
1811 
1812  if (LoadNode->getAlignment() < 4)
1813  return SDValue();
1814 
1815  int ConstantBlock = ConstantAddressBlock(Block);
1816 
1817  SDValue Slots[4];
1818  for (unsigned i = 0; i < 4; i++) {
1819  // We want Const position encoded with the following formula :
1820  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1821  // const_index is Ptr computed by llvm using an alignment of 16.
1822  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1823  // then div by 4 at the ISel step
1824  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1825  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1826  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1827  }
1828  EVT NewVT = MVT::v4i32;
1829  unsigned NumElements = 4;
1830  if (VT.isVector()) {
1831  NewVT = VT;
1832  NumElements = VT.getVectorNumElements();
1833  }
1834  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1835  if (!VT.isVector()) {
1836  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1837  DAG.getConstant(0, DL, MVT::i32));
1838  }
1839  SDValue MergedValues[2] = {
1840  Result,
1841  Chain
1842  };
1843  return DAG.getMergeValues(MergedValues, DL);
1844 }
1845 
1846 //===----------------------------------------------------------------------===//
1847 // Custom DAG Optimizations
1848 //===----------------------------------------------------------------------===//
1849 
1851  DAGCombinerInfo &DCI) const {
1852  SelectionDAG &DAG = DCI.DAG;
1853  SDLoc DL(N);
1854 
1855  switch (N->getOpcode()) {
1856  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1857  case ISD::FP_ROUND: {
1858  SDValue Arg = N->getOperand(0);
1859  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1860  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1861  Arg.getOperand(0));
1862  }
1863  break;
1864  }
1865 
1866  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1867  // (i32 select_cc f32, f32, -1, 0 cc)
1868  //
1869  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1870  // this to one of the SET*_DX10 instructions.
1871  case ISD::FP_TO_SINT: {
1872  SDValue FNeg = N->getOperand(0);
1873  if (FNeg.getOpcode() != ISD::FNEG) {
1874  return SDValue();
1875  }
1876  SDValue SelectCC = FNeg.getOperand(0);
1877  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1878  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1879  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1880  !isHWTrueValue(SelectCC.getOperand(2)) ||
1881  !isHWFalseValue(SelectCC.getOperand(3))) {
1882  return SDValue();
1883  }
1884 
1885  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1886  SelectCC.getOperand(0), // LHS
1887  SelectCC.getOperand(1), // RHS
1888  DAG.getConstant(-1, DL, MVT::i32), // True
1889  DAG.getConstant(0, DL, MVT::i32), // False
1890  SelectCC.getOperand(4)); // CC
1891 
1892  break;
1893  }
1894 
1895  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1896  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1897  case ISD::INSERT_VECTOR_ELT: {
1898  SDValue InVec = N->getOperand(0);
1899  SDValue InVal = N->getOperand(1);
1900  SDValue EltNo = N->getOperand(2);
1901 
1902  // If the inserted element is an UNDEF, just use the input vector.
1903  if (InVal.isUndef())
1904  return InVec;
1905 
1906  EVT VT = InVec.getValueType();
1907 
1908  // If we can't generate a legal BUILD_VECTOR, exit
1910  return SDValue();
1911 
1912  // Check that we know which element is being inserted
1913  if (!isa<ConstantSDNode>(EltNo))
1914  return SDValue();
1915  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1916 
1917  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1918  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1919  // vector elements.
1921  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1922  Ops.append(InVec.getNode()->op_begin(),
1923  InVec.getNode()->op_end());
1924  } else if (InVec.isUndef()) {
1925  unsigned NElts = VT.getVectorNumElements();
1926  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1927  } else {
1928  return SDValue();
1929  }
1930 
1931  // Insert the element
1932  if (Elt < Ops.size()) {
1933  // All the operands of BUILD_VECTOR must have the same type;
1934  // we enforce that here.
1935  EVT OpVT = Ops[0].getValueType();
1936  if (InVal.getValueType() != OpVT)
1937  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1938  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1939  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1940  Ops[Elt] = InVal;
1941  }
1942 
1943  // Return the new vector
1944  return DAG.getBuildVector(VT, DL, Ops);
1945  }
1946 
1947  // Extract_vec (Build_vector) generated by custom lowering
1948  // also needs to be customly combined
1949  case ISD::EXTRACT_VECTOR_ELT: {
1950  SDValue Arg = N->getOperand(0);
1951  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1952  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1953  unsigned Element = Const->getZExtValue();
1954  return Arg->getOperand(Element);
1955  }
1956  }
1957  if (Arg.getOpcode() == ISD::BITCAST &&
1958  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1961  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1962  unsigned Element = Const->getZExtValue();
1963  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1964  Arg->getOperand(0).getOperand(Element));
1965  }
1966  }
1967  break;
1968  }
1969 
1970  case ISD::SELECT_CC: {
1971  // Try common optimizations
1973  return Ret;
1974 
1975  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1976  // selectcc x, y, a, b, inv(cc)
1977  //
1978  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1979  // selectcc x, y, a, b, cc
1980  SDValue LHS = N->getOperand(0);
1981  if (LHS.getOpcode() != ISD::SELECT_CC) {
1982  return SDValue();
1983  }
1984 
1985  SDValue RHS = N->getOperand(1);
1986  SDValue True = N->getOperand(2);
1987  SDValue False = N->getOperand(3);
1988  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1989 
1990  if (LHS.getOperand(2).getNode() != True.getNode() ||
1991  LHS.getOperand(3).getNode() != False.getNode() ||
1992  RHS.getNode() != False.getNode()) {
1993  return SDValue();
1994  }
1995 
1996  switch (NCC) {
1997  default: return SDValue();
1998  case ISD::SETNE: return LHS;
1999  case ISD::SETEQ: {
2000  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
2001  LHSCC = ISD::getSetCCInverse(LHSCC,
2002  LHS.getOperand(0).getValueType().isInteger());
2003  if (DCI.isBeforeLegalizeOps() ||
2005  return DAG.getSelectCC(DL,
2006  LHS.getOperand(0),
2007  LHS.getOperand(1),
2008  LHS.getOperand(2),
2009  LHS.getOperand(3),
2010  LHSCC);
2011  break;
2012  }
2013  }
2014  return SDValue();
2015  }
2016 
2017  case AMDGPUISD::R600_EXPORT: {
2018  SDValue Arg = N->getOperand(1);
2019  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2020  break;
2021 
2022  SDValue NewArgs[8] = {
2023  N->getOperand(0), // Chain
2024  SDValue(),
2025  N->getOperand(2), // ArrayBase
2026  N->getOperand(3), // Type
2027  N->getOperand(4), // SWZ_X
2028  N->getOperand(5), // SWZ_Y
2029  N->getOperand(6), // SWZ_Z
2030  N->getOperand(7) // SWZ_W
2031  };
2032  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2033  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
2034  }
2035  case AMDGPUISD::TEXTURE_FETCH: {
2036  SDValue Arg = N->getOperand(1);
2037  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2038  break;
2039 
2040  SDValue NewArgs[19] = {
2041  N->getOperand(0),
2042  N->getOperand(1),
2043  N->getOperand(2),
2044  N->getOperand(3),
2045  N->getOperand(4),
2046  N->getOperand(5),
2047  N->getOperand(6),
2048  N->getOperand(7),
2049  N->getOperand(8),
2050  N->getOperand(9),
2051  N->getOperand(10),
2052  N->getOperand(11),
2053  N->getOperand(12),
2054  N->getOperand(13),
2055  N->getOperand(14),
2056  N->getOperand(15),
2057  N->getOperand(16),
2058  N->getOperand(17),
2059  N->getOperand(18),
2060  };
2061  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2062  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2063  }
2064 
2065  case ISD::LOAD: {
2066  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
2067  SDValue Ptr = LoadNode->getBasePtr();
2068  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
2069  isa<ConstantSDNode>(Ptr))
2070  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
2071  break;
2072  }
2073 
2074  default: break;
2075  }
2076 
2078 }
2079 
2080 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2081  SDValue &Src, SDValue &Neg, SDValue &Abs,
2082  SDValue &Sel, SDValue &Imm,
2083  SelectionDAG &DAG) const {
2084  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2085  if (!Src.isMachineOpcode())
2086  return false;
2087 
2088  switch (Src.getMachineOpcode()) {
2089  case R600::FNEG_R600:
2090  if (!Neg.getNode())
2091  return false;
2092  Src = Src.getOperand(0);
2093  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2094  return true;
2095  case R600::FABS_R600:
2096  if (!Abs.getNode())
2097  return false;
2098  Src = Src.getOperand(0);
2099  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2100  return true;
2101  case R600::CONST_COPY: {
2102  unsigned Opcode = ParentNode->getMachineOpcode();
2103  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2104 
2105  if (!Sel.getNode())
2106  return false;
2107 
2108  SDValue CstOffset = Src.getOperand(0);
2109  if (ParentNode->getValueType(0).isVector())
2110  return false;
2111 
2112  // Gather constants values
2113  int SrcIndices[] = {
2114  TII->getOperandIdx(Opcode, R600::OpName::src0),
2115  TII->getOperandIdx(Opcode, R600::OpName::src1),
2116  TII->getOperandIdx(Opcode, R600::OpName::src2),
2117  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2118  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2119  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2120  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2121  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2122  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2123  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2124  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2125  };
2126  std::vector<unsigned> Consts;
2127  for (int OtherSrcIdx : SrcIndices) {
2128  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2129  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2130  continue;
2131  if (HasDst) {
2132  OtherSrcIdx--;
2133  OtherSelIdx--;
2134  }
2135  if (RegisterSDNode *Reg =
2136  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2137  if (Reg->getReg() == R600::ALU_CONST) {
2138  ConstantSDNode *Cst
2139  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2140  Consts.push_back(Cst->getZExtValue());
2141  }
2142  }
2143  }
2144 
2145  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2146  Consts.push_back(Cst->getZExtValue());
2147  if (!TII->fitsConstReadLimitations(Consts)) {
2148  return false;
2149  }
2150 
2151  Sel = CstOffset;
2152  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2153  return true;
2154  }
2155  case R600::MOV_IMM_GLOBAL_ADDR:
2156  // Check if the Imm slot is used. Taken from below.
2157  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2158  return false;
2159  Imm = Src.getOperand(0);
2160  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2161  return true;
2162  case R600::MOV_IMM_I32:
2163  case R600::MOV_IMM_F32: {
2164  unsigned ImmReg = R600::ALU_LITERAL_X;
2165  uint64_t ImmValue = 0;
2166 
2167  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2169  float FloatValue = FPC->getValueAPF().convertToFloat();
2170  if (FloatValue == 0.0) {
2171  ImmReg = R600::ZERO;
2172  } else if (FloatValue == 0.5) {
2173  ImmReg = R600::HALF;
2174  } else if (FloatValue == 1.0) {
2175  ImmReg = R600::ONE;
2176  } else {
2177  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2178  }
2179  } else {
2181  uint64_t Value = C->getZExtValue();
2182  if (Value == 0) {
2183  ImmReg = R600::ZERO;
2184  } else if (Value == 1) {
2185  ImmReg = R600::ONE_INT;
2186  } else {
2187  ImmValue = Value;
2188  }
2189  }
2190 
2191  // Check that we aren't already using an immediate.
2192  // XXX: It's possible for an instruction to have more than one
2193  // immediate operand, but this is not supported yet.
2194  if (ImmReg == R600::ALU_LITERAL_X) {
2195  if (!Imm.getNode())
2196  return false;
2198  assert(C);
2199  if (C->getZExtValue())
2200  return false;
2201  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2202  }
2203  Src = DAG.getRegister(ImmReg, MVT::i32);
2204  return true;
2205  }
2206  default:
2207  return false;
2208  }
2209 }
2210 
2211 /// Fold the instructions after selecting them
2212 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2213  SelectionDAG &DAG) const {
2214  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2215  if (!Node->isMachineOpcode())
2216  return Node;
2217 
2218  unsigned Opcode = Node->getMachineOpcode();
2219  SDValue FakeOp;
2220 
2221  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2222 
2223  if (Opcode == R600::DOT_4) {
2224  int OperandIdx[] = {
2225  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2226  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2227  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2228  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2229  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2230  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2231  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2232  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2233  };
2234  int NegIdx[] = {
2235  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2236  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2237  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2238  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2239  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2240  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2241  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2242  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2243  };
2244  int AbsIdx[] = {
2245  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2246  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2247  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2248  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2249  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2250  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2251  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2252  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2253  };
2254  for (unsigned i = 0; i < 8; i++) {
2255  if (OperandIdx[i] < 0)
2256  return Node;
2257  SDValue &Src = Ops[OperandIdx[i] - 1];
2258  SDValue &Neg = Ops[NegIdx[i] - 1];
2259  SDValue &Abs = Ops[AbsIdx[i] - 1];
2260  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2261  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2262  if (HasDst)
2263  SelIdx--;
2264  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2265  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2266  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2267  }
2268  } else if (Opcode == R600::REG_SEQUENCE) {
2269  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2270  SDValue &Src = Ops[i];
2271  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2272  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2273  }
2274  } else {
2275  if (!TII->hasInstrModifiers(Opcode))
2276  return Node;
2277  int OperandIdx[] = {
2278  TII->getOperandIdx(Opcode, R600::OpName::src0),
2279  TII->getOperandIdx(Opcode, R600::OpName::src1),
2280  TII->getOperandIdx(Opcode, R600::OpName::src2)
2281  };
2282  int NegIdx[] = {
2283  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2284  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2285  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2286  };
2287  int AbsIdx[] = {
2288  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2289  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2290  -1
2291  };
2292  for (unsigned i = 0; i < 3; i++) {
2293  if (OperandIdx[i] < 0)
2294  return Node;
2295  SDValue &Src = Ops[OperandIdx[i] - 1];
2296  SDValue &Neg = Ops[NegIdx[i] - 1];
2297  SDValue FakeAbs;
2298  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2299  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2300  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2301  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2302  if (HasDst) {
2303  SelIdx--;
2304  ImmIdx--;
2305  }
2306  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2307  SDValue &Imm = Ops[ImmIdx];
2308  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2309  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2310  }
2311  }
2312 
2313  return Node;
2314 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:595
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:562
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:111
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1569
Address space for indirect addressible parameter memory (VTX1).
Definition: AMDGPU.h:284
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:678
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Register getLocReg() const
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
Address space for local memory.
Definition: AMDGPU.h:274
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:227
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:632
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
float convertToFloat() const
Definition: APFloat.h:1108
const SDValue & getChain() const
Function Alias Analysis Results
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Address space for constant memory (VTX2).
Definition: AMDGPU.h:273
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:459
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:480
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
#define MO_FLAG_ABS
Definition: R600Defines.h:18
Shift and rotation operations.
Definition: ISDOpcodes.h:434
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:414
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:261
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:270
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:411
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:473
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:413
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:410
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
#define MO_FLAG_NEG
Definition: R600Defines.h:17
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:874
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:1012
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:592
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
Class to represent pointers.
Definition: DerivedTypes.h:544
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:548
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:351
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
const R600FrameLowering * getFrameLowering() const override
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:614
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasFP32Denormals() const
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:234
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
unsigned const MachineRegisterInfo * MRI
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1417
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:64
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:287
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:987
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:356
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:246
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
This file declares a class to represent arbitrary precision floating point values and provide a varie...
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:440
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Address space for private memory.
Definition: AMDGPU.h:275
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1431
size_t size() const
Definition: SmallVector.h:52
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:40
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getAddressSpace() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
const APFloat & getValueAPF() const
Definition: Constants.h:302
const R600RegisterInfo * getRegisterInfo() const override
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
static bool isUndef(ArrayRef< int > Mask)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:638
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:363
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
static uint64_t add(uint64_t LeftOp, uint64_t RightOp)
Definition: FileCheck.cpp:243
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:837
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:746
CCValAssign - Represent assignment of one arg/retval to a location.
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
AddressSpace
Definition: NVPTXBaseInfo.h:21
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:95
int getLDSNoRetOp(uint16_t Opcode)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:672
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:444
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:495
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:336
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:188
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:387
bool isLDSRetInstr(unsigned Opcode) const
bool hasBCNT(unsigned Size) const
Flags
Flags values. These may be or&#39;d together.
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:64
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:215
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:411
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:717
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:510
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:642
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
iterator end()
Definition: DenseMap.h:108
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:332
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:122
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:73
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:326
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:69
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:984
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:258
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:65
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:19
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:330
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:467
APInt bitcastToAPInt() const
Definition: APFloat.h:1104
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
Register getReg() const
getReg - Returns the register number.
Conversion operators.
Definition: ISDOpcodes.h:489
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:820
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:498
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:416
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:610
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:220
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:816
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
LLVMContext * getContext() const
Definition: SelectionDAG.h:420
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
#define MO_FLAG_PUSH
Definition: R600Defines.h:20
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.