LLVM  9.0.0svn
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPUFrameLowering.h"
16 #include "AMDGPUSubtarget.h"
17 #include "R600Defines.h"
18 #include "R600FrameLowering.h"
19 #include "R600InstrInfo.h"
22 #include "Utils/AMDGPUBaseInfo.h"
23 #include "llvm/ADT/APFloat.h"
24 #include "llvm/ADT/APInt.h"
25 #include "llvm/ADT/ArrayRef.h"
26 #include "llvm/ADT/DenseMap.h"
27 #include "llvm/ADT/SmallVector.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DerivedTypes.h"
40 #include "llvm/Support/Casting.h"
41 #include "llvm/Support/Compiler.h"
44 #include <cassert>
45 #include <cstdint>
46 #include <iterator>
47 #include <utility>
48 #include <vector>
49 
50 using namespace llvm;
51 
52 #include "R600GenCallingConv.inc"
53 
55  const R600Subtarget &STI)
56  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
57  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
58  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
59  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
60  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
61  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
62  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
63 
65 
66  // Legalize loads and stores to the private address space.
70 
71  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
72  // spaces, so it is custom lowered to handle those where it isn't.
73  for (MVT VT : MVT::integer_valuetypes()) {
77 
81 
85  }
86 
87  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
91 
95 
100 
103  // We need to include these since trunc STORES to PRIVATE need
104  // special handling to accommodate RMW
115 
116  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
119 
120  // Set condition code actions
133 
138 
141 
144 
148 
150 
155 
158 
165 
170 
171  // ADD, SUB overflow.
172  // TODO: turn these into Legal?
173  if (Subtarget->hasCARRY())
175 
176  if (Subtarget->hasBORROW())
178 
179  // Expand sign extension of vectors
180  if (!Subtarget->hasBFE())
182 
185 
186  if (!Subtarget->hasBFE())
190 
191  if (!Subtarget->hasBFE())
195 
199 
201 
203 
208 
213 
214  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
215  // to be Legal/Custom in order to avoid library calls.
219 
220  if (!Subtarget->hasFMA()) {
223  }
224 
225  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
226  // need it for R600.
227  if (!Subtarget->hasFP32Denormals())
229 
230  if (!Subtarget->hasBFI()) {
231  // fcopysign can be done in a single instruction with BFI.
234  }
235 
236  if (!Subtarget->hasBCNT(32))
238 
239  if (!Subtarget->hasBCNT(64))
241 
242  if (Subtarget->hasFFBH())
244 
245  if (Subtarget->hasFFBL())
247 
248  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
249  // need it for R600.
250  if (Subtarget->hasBFE())
251  setHasExtractBitsInsn(true);
252 
254 
255  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
256  for (MVT VT : ScalarIntVTs) {
261  }
262 
263  // LLVM will expand these to atomic_cmp_swap(0)
264  // and atomic_swap, respectively.
267 
268  // We need to custom lower some of the intrinsics
271 
273 
280 }
281 
282 static inline bool isEOP(MachineBasicBlock::iterator I) {
283  if (std::next(I) == I->getParent()->end())
284  return false;
285  return std::next(I)->getOpcode() == R600::RETURN;
286 }
287 
290  MachineBasicBlock *BB) const {
291  MachineFunction *MF = BB->getParent();
294  const R600InstrInfo *TII = Subtarget->getInstrInfo();
295 
296  switch (MI.getOpcode()) {
297  default:
298  // Replace LDS_*_RET instruction that don't have any uses with the
299  // equivalent LDS_*_NORET instruction.
300  if (TII->isLDSRetInstr(MI.getOpcode())) {
301  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
302  assert(DstIdx != -1);
303  MachineInstrBuilder NewMI;
304  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
305  // LDS_1A2D support and remove this special case.
306  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
307  MI.getOpcode() == R600::LDS_CMPST_RET)
308  return BB;
309 
310  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
311  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
312  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
313  NewMI.add(MI.getOperand(i));
314  }
315  } else {
317  }
318  break;
319 
320  case R600::FABS_R600: {
322  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
323  MI.getOperand(1).getReg());
324  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
325  break;
326  }
327 
328  case R600::FNEG_R600: {
330  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
331  MI.getOperand(1).getReg());
332  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
333  break;
334  }
335 
336  case R600::MASK_WRITE: {
337  unsigned maskedRegister = MI.getOperand(0).getReg();
339  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
340  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
341  break;
342  }
343 
344  case R600::MOV_IMM_F32:
345  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
346  .getFPImm()
347  ->getValueAPF()
348  .bitcastToAPInt()
349  .getZExtValue());
350  break;
351 
352  case R600::MOV_IMM_I32:
353  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
354  MI.getOperand(1).getImm());
355  break;
356 
357  case R600::MOV_IMM_GLOBAL_ADDR: {
358  //TODO: Perhaps combine this instruction with the next if possible
359  auto MIB = TII->buildDefaultInstruction(
360  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
361  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
362  //TODO: Ugh this is rather ugly
363  MIB->getOperand(Idx) = MI.getOperand(1);
364  break;
365  }
366 
367  case R600::CONST_COPY: {
369  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
370  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
371  MI.getOperand(1).getImm());
372  break;
373  }
374 
375  case R600::RAT_WRITE_CACHELESS_32_eg:
376  case R600::RAT_WRITE_CACHELESS_64_eg:
377  case R600::RAT_WRITE_CACHELESS_128_eg:
378  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
379  .add(MI.getOperand(0))
380  .add(MI.getOperand(1))
381  .addImm(isEOP(I)); // Set End of program bit
382  break;
383 
384  case R600::RAT_STORE_TYPED_eg:
385  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
386  .add(MI.getOperand(0))
387  .add(MI.getOperand(1))
388  .add(MI.getOperand(2))
389  .addImm(isEOP(I)); // Set End of program bit
390  break;
391 
392  case R600::BRANCH:
393  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
394  .add(MI.getOperand(0));
395  break;
396 
397  case R600::BRANCH_COND_f32: {
398  MachineInstr *NewMI =
399  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
400  R600::PREDICATE_BIT)
401  .add(MI.getOperand(1))
402  .addImm(R600::PRED_SETNE)
403  .addImm(0); // Flags
404  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
405  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
406  .add(MI.getOperand(0))
407  .addReg(R600::PREDICATE_BIT, RegState::Kill);
408  break;
409  }
410 
411  case R600::BRANCH_COND_i32: {
412  MachineInstr *NewMI =
413  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
414  R600::PREDICATE_BIT)
415  .add(MI.getOperand(1))
416  .addImm(R600::PRED_SETNE_INT)
417  .addImm(0); // Flags
418  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
419  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
420  .add(MI.getOperand(0))
421  .addReg(R600::PREDICATE_BIT, RegState::Kill);
422  break;
423  }
424 
425  case R600::EG_ExportSwz:
426  case R600::R600_ExportSwz: {
427  // Instruction is left unmodified if its not the last one of its type
428  bool isLastInstructionOfItsType = true;
429  unsigned InstExportType = MI.getOperand(1).getImm();
430  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
431  EndBlock = BB->end(); NextExportInst != EndBlock;
432  NextExportInst = std::next(NextExportInst)) {
433  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
434  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
435  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
436  .getImm();
437  if (CurrentInstExportType == InstExportType) {
438  isLastInstructionOfItsType = false;
439  break;
440  }
441  }
442  }
443  bool EOP = isEOP(I);
444  if (!EOP && !isLastInstructionOfItsType)
445  return BB;
446  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
447  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
448  .add(MI.getOperand(0))
449  .add(MI.getOperand(1))
450  .add(MI.getOperand(2))
451  .add(MI.getOperand(3))
452  .add(MI.getOperand(4))
453  .add(MI.getOperand(5))
454  .add(MI.getOperand(6))
455  .addImm(CfInst)
456  .addImm(EOP);
457  break;
458  }
459  case R600::RETURN: {
460  return BB;
461  }
462  }
463 
464  MI.eraseFromParent();
465  return BB;
466 }
467 
468 //===----------------------------------------------------------------------===//
469 // Custom DAG Lowering Operations
470 //===----------------------------------------------------------------------===//
471 
475  switch (Op.getOpcode()) {
476  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
477  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
478  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
479  case ISD::SHL_PARTS: return LowerSHLParts(Op, DAG);
480  case ISD::SRA_PARTS:
481  case ISD::SRL_PARTS: return LowerSRXParts(Op, DAG);
482  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
483  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
484  case ISD::FCOS:
485  case ISD::FSIN: return LowerTrig(Op, DAG);
486  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
487  case ISD::STORE: return LowerSTORE(Op, DAG);
488  case ISD::LOAD: {
489  SDValue Result = LowerLOAD(Op, DAG);
490  assert((!Result.getNode() ||
491  Result.getNode()->getNumValues() == 2) &&
492  "Load should return a value and a chain");
493  return Result;
494  }
495 
496  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
497  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
498  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
499  case ISD::INTRINSIC_VOID: {
500  SDValue Chain = Op.getOperand(0);
501  unsigned IntrinsicID =
502  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
503  switch (IntrinsicID) {
504  case Intrinsic::r600_store_swizzle: {
505  SDLoc DL(Op);
506  const SDValue Args[8] = {
507  Chain,
508  Op.getOperand(2), // Export Value
509  Op.getOperand(3), // ArrayBase
510  Op.getOperand(4), // Type
511  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
512  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
513  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
514  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
515  };
516  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
517  }
518 
519  // default for switch(IntrinsicID)
520  default: break;
521  }
522  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
523  break;
524  }
526  unsigned IntrinsicID =
527  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
528  EVT VT = Op.getValueType();
529  SDLoc DL(Op);
530  switch (IntrinsicID) {
531  case Intrinsic::r600_tex:
532  case Intrinsic::r600_texc: {
533  unsigned TextureOp;
534  switch (IntrinsicID) {
535  case Intrinsic::r600_tex:
536  TextureOp = 0;
537  break;
538  case Intrinsic::r600_texc:
539  TextureOp = 1;
540  break;
541  default:
542  llvm_unreachable("unhandled texture operation");
543  }
544 
545  SDValue TexArgs[19] = {
546  DAG.getConstant(TextureOp, DL, MVT::i32),
547  Op.getOperand(1),
548  DAG.getConstant(0, DL, MVT::i32),
549  DAG.getConstant(1, DL, MVT::i32),
550  DAG.getConstant(2, DL, MVT::i32),
551  DAG.getConstant(3, DL, MVT::i32),
552  Op.getOperand(2),
553  Op.getOperand(3),
554  Op.getOperand(4),
555  DAG.getConstant(0, DL, MVT::i32),
556  DAG.getConstant(1, DL, MVT::i32),
557  DAG.getConstant(2, DL, MVT::i32),
558  DAG.getConstant(3, DL, MVT::i32),
559  Op.getOperand(5),
560  Op.getOperand(6),
561  Op.getOperand(7),
562  Op.getOperand(8),
563  Op.getOperand(9),
564  Op.getOperand(10)
565  };
566  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
567  }
568  case Intrinsic::r600_dot4: {
569  SDValue Args[8] = {
571  DAG.getConstant(0, DL, MVT::i32)),
573  DAG.getConstant(0, DL, MVT::i32)),
575  DAG.getConstant(1, DL, MVT::i32)),
577  DAG.getConstant(1, DL, MVT::i32)),
579  DAG.getConstant(2, DL, MVT::i32)),
581  DAG.getConstant(2, DL, MVT::i32)),
583  DAG.getConstant(3, DL, MVT::i32)),
585  DAG.getConstant(3, DL, MVT::i32))
586  };
587  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
588  }
589 
590  case Intrinsic::r600_implicitarg_ptr: {
593  return DAG.getConstant(ByteOffset, DL, PtrVT);
594  }
595  case Intrinsic::r600_read_ngroups_x:
596  return LowerImplicitParameter(DAG, VT, DL, 0);
597  case Intrinsic::r600_read_ngroups_y:
598  return LowerImplicitParameter(DAG, VT, DL, 1);
599  case Intrinsic::r600_read_ngroups_z:
600  return LowerImplicitParameter(DAG, VT, DL, 2);
601  case Intrinsic::r600_read_global_size_x:
602  return LowerImplicitParameter(DAG, VT, DL, 3);
603  case Intrinsic::r600_read_global_size_y:
604  return LowerImplicitParameter(DAG, VT, DL, 4);
605  case Intrinsic::r600_read_global_size_z:
606  return LowerImplicitParameter(DAG, VT, DL, 5);
607  case Intrinsic::r600_read_local_size_x:
608  return LowerImplicitParameter(DAG, VT, DL, 6);
609  case Intrinsic::r600_read_local_size_y:
610  return LowerImplicitParameter(DAG, VT, DL, 7);
611  case Intrinsic::r600_read_local_size_z:
612  return LowerImplicitParameter(DAG, VT, DL, 8);
613 
614  case Intrinsic::r600_read_tgid_x:
615  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
616  R600::T1_X, VT);
617  case Intrinsic::r600_read_tgid_y:
618  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
619  R600::T1_Y, VT);
620  case Intrinsic::r600_read_tgid_z:
621  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
622  R600::T1_Z, VT);
623  case Intrinsic::r600_read_tidig_x:
624  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
625  R600::T0_X, VT);
626  case Intrinsic::r600_read_tidig_y:
627  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
628  R600::T0_Y, VT);
629  case Intrinsic::r600_read_tidig_z:
630  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
631  R600::T0_Z, VT);
632 
633  case Intrinsic::r600_recipsqrt_ieee:
634  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
635 
636  case Intrinsic::r600_recipsqrt_clamped:
637  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
638  default:
639  return Op;
640  }
641 
642  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
643  break;
644  }
645  } // end switch(Op.getOpcode())
646  return SDValue();
647 }
648 
651  SelectionDAG &DAG) const {
652  switch (N->getOpcode()) {
653  default:
655  return;
656  case ISD::FP_TO_UINT:
657  if (N->getValueType(0) == MVT::i1) {
658  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
659  return;
660  }
661  // Since we don't care about out of bounds values we can use FP_TO_SINT for
662  // uints too. The DAGLegalizer code for uint considers some extra cases
663  // which are not necessary here.
665  case ISD::FP_TO_SINT: {
666  if (N->getValueType(0) == MVT::i1) {
667  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
668  return;
669  }
670 
671  SDValue Result;
672  if (expandFP_TO_SINT(N, Result, DAG))
673  Results.push_back(Result);
674  return;
675  }
676  case ISD::SDIVREM: {
677  SDValue Op = SDValue(N, 1);
678  SDValue RES = LowerSDIVREM(Op, DAG);
679  Results.push_back(RES);
680  Results.push_back(RES.getValue(1));
681  break;
682  }
683  case ISD::UDIVREM: {
684  SDValue Op = SDValue(N, 0);
685  LowerUDIVREM64(Op, DAG, Results);
686  break;
687  }
688  }
689 }
690 
691 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
692  SDValue Vector) const {
693  SDLoc DL(Vector);
694  EVT VecVT = Vector.getValueType();
695  EVT EltVT = VecVT.getVectorElementType();
697 
698  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
699  Args.push_back(DAG.getNode(
700  ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
701  DAG.getConstant(i, DL, getVectorIdxTy(DAG.getDataLayout()))));
702  }
703 
704  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
705 }
706 
707 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
708  SelectionDAG &DAG) const {
709  SDLoc DL(Op);
710  SDValue Vector = Op.getOperand(0);
711  SDValue Index = Op.getOperand(1);
712 
713  if (isa<ConstantSDNode>(Index) ||
715  return Op;
716 
717  Vector = vectorToVerticalVector(DAG, Vector);
718  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
719  Vector, Index);
720 }
721 
722 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
723  SelectionDAG &DAG) const {
724  SDLoc DL(Op);
725  SDValue Vector = Op.getOperand(0);
726  SDValue Value = Op.getOperand(1);
727  SDValue Index = Op.getOperand(2);
728 
729  if (isa<ConstantSDNode>(Index) ||
731  return Op;
732 
733  Vector = vectorToVerticalVector(DAG, Vector);
734  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
735  Vector, Value, Index);
736  return vectorToVerticalVector(DAG, Insert);
737 }
738 
739 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
740  SDValue Op,
741  SelectionDAG &DAG) const {
742  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
744  return AMDGPUTargetLowering::LowerGlobalAddress(MFI, Op, DAG);
745 
746  const DataLayout &DL = DAG.getDataLayout();
747  const GlobalValue *GV = GSD->getGlobal();
748  MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
749 
750  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
751  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
752 }
753 
754 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
755  // On hw >= R700, COS/SIN input must be between -1. and 1.
756  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
757  EVT VT = Op.getValueType();
758  SDValue Arg = Op.getOperand(0);
759  SDLoc DL(Op);
760 
761  // TODO: Should this propagate fast-math-flags?
762  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
763  DAG.getNode(ISD::FADD, DL, VT,
764  DAG.getNode(ISD::FMUL, DL, VT, Arg,
765  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
766  DAG.getConstantFP(0.5, DL, MVT::f32)));
767  unsigned TrigNode;
768  switch (Op.getOpcode()) {
769  case ISD::FCOS:
770  TrigNode = AMDGPUISD::COS_HW;
771  break;
772  case ISD::FSIN:
773  TrigNode = AMDGPUISD::SIN_HW;
774  break;
775  default:
776  llvm_unreachable("Wrong trig opcode");
777  }
778  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
779  DAG.getNode(ISD::FADD, DL, VT, FractPart,
780  DAG.getConstantFP(-0.5, DL, MVT::f32)));
781  if (Gen >= AMDGPUSubtarget::R700)
782  return TrigVal;
783  // On R600 hw, COS/SIN input must be between -Pi and Pi.
784  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
785  DAG.getConstantFP(3.14159265359, DL, MVT::f32));
786 }
787 
788 SDValue R600TargetLowering::LowerSHLParts(SDValue Op, SelectionDAG &DAG) const {
789  SDLoc DL(Op);
790  EVT VT = Op.getValueType();
791 
792  SDValue Lo = Op.getOperand(0);
793  SDValue Hi = Op.getOperand(1);
794  SDValue Shift = Op.getOperand(2);
795  SDValue Zero = DAG.getConstant(0, DL, VT);
796  SDValue One = DAG.getConstant(1, DL, VT);
797 
798  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
799  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
800  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
801  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
802 
803  // The dance around Width1 is necessary for 0 special case.
804  // Without it the CompShift might be 32, producing incorrect results in
805  // Overflow. So we do the shift in two steps, the alternative is to
806  // add a conditional to filter the special case.
807 
808  SDValue Overflow = DAG.getNode(ISD::SRL, DL, VT, Lo, CompShift);
809  Overflow = DAG.getNode(ISD::SRL, DL, VT, Overflow, One);
810 
811  SDValue HiSmall = DAG.getNode(ISD::SHL, DL, VT, Hi, Shift);
812  HiSmall = DAG.getNode(ISD::OR, DL, VT, HiSmall, Overflow);
813  SDValue LoSmall = DAG.getNode(ISD::SHL, DL, VT, Lo, Shift);
814 
815  SDValue HiBig = DAG.getNode(ISD::SHL, DL, VT, Lo, BigShift);
816  SDValue LoBig = Zero;
817 
818  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
819  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
820 
821  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
822 }
823 
824 SDValue R600TargetLowering::LowerSRXParts(SDValue Op, SelectionDAG &DAG) const {
825  SDLoc DL(Op);
826  EVT VT = Op.getValueType();
827 
828  SDValue Lo = Op.getOperand(0);
829  SDValue Hi = Op.getOperand(1);
830  SDValue Shift = Op.getOperand(2);
831  SDValue Zero = DAG.getConstant(0, DL, VT);
832  SDValue One = DAG.getConstant(1, DL, VT);
833 
834  const bool SRA = Op.getOpcode() == ISD::SRA_PARTS;
835 
836  SDValue Width = DAG.getConstant(VT.getSizeInBits(), DL, VT);
837  SDValue Width1 = DAG.getConstant(VT.getSizeInBits() - 1, DL, VT);
838  SDValue BigShift = DAG.getNode(ISD::SUB, DL, VT, Shift, Width);
839  SDValue CompShift = DAG.getNode(ISD::SUB, DL, VT, Width1, Shift);
840 
841  // The dance around Width1 is necessary for 0 special case.
842  // Without it the CompShift might be 32, producing incorrect results in
843  // Overflow. So we do the shift in two steps, the alternative is to
844  // add a conditional to filter the special case.
845 
846  SDValue Overflow = DAG.getNode(ISD::SHL, DL, VT, Hi, CompShift);
847  Overflow = DAG.getNode(ISD::SHL, DL, VT, Overflow, One);
848 
849  SDValue HiSmall = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, Shift);
850  SDValue LoSmall = DAG.getNode(ISD::SRL, DL, VT, Lo, Shift);
851  LoSmall = DAG.getNode(ISD::OR, DL, VT, LoSmall, Overflow);
852 
853  SDValue LoBig = DAG.getNode(SRA ? ISD::SRA : ISD::SRL, DL, VT, Hi, BigShift);
854  SDValue HiBig = SRA ? DAG.getNode(ISD::SRA, DL, VT, Hi, Width1) : Zero;
855 
856  Hi = DAG.getSelectCC(DL, Shift, Width, HiSmall, HiBig, ISD::SETULT);
857  Lo = DAG.getSelectCC(DL, Shift, Width, LoSmall, LoBig, ISD::SETULT);
858 
859  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT,VT), Lo, Hi);
860 }
861 
862 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
863  unsigned mainop, unsigned ovf) const {
864  SDLoc DL(Op);
865  EVT VT = Op.getValueType();
866 
867  SDValue Lo = Op.getOperand(0);
868  SDValue Hi = Op.getOperand(1);
869 
870  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
871  // Extend sign.
872  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
873  DAG.getValueType(MVT::i1));
874 
875  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
876 
877  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
878 }
879 
880 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
881  SDLoc DL(Op);
882  return DAG.getNode(
883  ISD::SETCC,
884  DL,
885  MVT::i1,
886  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
887  DAG.getCondCode(ISD::SETEQ));
888 }
889 
890 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
891  SDLoc DL(Op);
892  return DAG.getNode(
893  ISD::SETCC,
894  DL,
895  MVT::i1,
896  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
897  DAG.getCondCode(ISD::SETEQ));
898 }
899 
900 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
901  const SDLoc &DL,
902  unsigned DwordOffset) const {
903  unsigned ByteOffset = DwordOffset * 4;
904  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
906 
907  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
908  assert(isInt<16>(ByteOffset));
909 
910  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
911  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
913 }
914 
915 bool R600TargetLowering::isZero(SDValue Op) const {
916  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
917  return Cst->isNullValue();
918  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
919  return CstFP->isZero();
920  } else {
921  return false;
922  }
923 }
924 
925 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
926  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
927  return CFP->isExactlyValue(1.0);
928  }
929  return isAllOnesConstant(Op);
930 }
931 
932 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
933  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
934  return CFP->getValueAPF().isZero();
935  }
936  return isNullConstant(Op);
937 }
938 
939 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
940  SDLoc DL(Op);
941  EVT VT = Op.getValueType();
942 
943  SDValue LHS = Op.getOperand(0);
944  SDValue RHS = Op.getOperand(1);
945  SDValue True = Op.getOperand(2);
946  SDValue False = Op.getOperand(3);
947  SDValue CC = Op.getOperand(4);
948  SDValue Temp;
949 
950  if (VT == MVT::f32) {
951  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
952  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
953  if (MinMax)
954  return MinMax;
955  }
956 
957  // LHS and RHS are guaranteed to be the same value type
958  EVT CompareVT = LHS.getValueType();
959 
960  // Check if we can lower this to a native operation.
961 
962  // Try to lower to a SET* instruction:
963  //
964  // SET* can match the following patterns:
965  //
966  // select_cc f32, f32, -1, 0, cc_supported
967  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
968  // select_cc i32, i32, -1, 0, cc_supported
969  //
970 
971  // Move hardware True/False values to the correct operand.
972  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
973  ISD::CondCode InverseCC =
974  ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
975  if (isHWTrueValue(False) && isHWFalseValue(True)) {
976  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
977  std::swap(False, True);
978  CC = DAG.getCondCode(InverseCC);
979  } else {
980  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
981  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
982  std::swap(False, True);
983  std::swap(LHS, RHS);
984  CC = DAG.getCondCode(SwapInvCC);
985  }
986  }
987  }
988 
989  if (isHWTrueValue(True) && isHWFalseValue(False) &&
990  (CompareVT == VT || VT == MVT::i32)) {
991  // This can be matched by a SET* instruction.
992  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
993  }
994 
995  // Try to lower to a CND* instruction:
996  //
997  // CND* can match the following patterns:
998  //
999  // select_cc f32, 0.0, f32, f32, cc_supported
1000  // select_cc f32, 0.0, i32, i32, cc_supported
1001  // select_cc i32, 0, f32, f32, cc_supported
1002  // select_cc i32, 0, i32, i32, cc_supported
1003  //
1004 
1005  // Try to move the zero value to the RHS
1006  if (isZero(LHS)) {
1007  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1008  // Try swapping the operands
1009  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
1010  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1011  std::swap(LHS, RHS);
1012  CC = DAG.getCondCode(CCSwapped);
1013  } else {
1014  // Try inverting the conditon and then swapping the operands
1015  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT.isInteger());
1016  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
1017  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
1018  std::swap(True, False);
1019  std::swap(LHS, RHS);
1020  CC = DAG.getCondCode(CCSwapped);
1021  }
1022  }
1023  }
1024  if (isZero(RHS)) {
1025  SDValue Cond = LHS;
1026  SDValue Zero = RHS;
1027  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
1028  if (CompareVT != VT) {
1029  // Bitcast True / False to the correct types. This will end up being
1030  // a nop, but it allows us to define only a single pattern in the
1031  // .TD files for each CND* instruction rather than having to have
1032  // one pattern for integer True/False and one for fp True/False
1033  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
1034  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
1035  }
1036 
1037  switch (CCOpcode) {
1038  case ISD::SETONE:
1039  case ISD::SETUNE:
1040  case ISD::SETNE:
1041  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT == MVT::i32);
1042  Temp = True;
1043  True = False;
1044  False = Temp;
1045  break;
1046  default:
1047  break;
1048  }
1049  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
1050  Cond, Zero,
1051  True, False,
1052  DAG.getCondCode(CCOpcode));
1053  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
1054  }
1055 
1056  // If we make it this for it means we have no native instructions to handle
1057  // this SELECT_CC, so we must lower it.
1058  SDValue HWTrue, HWFalse;
1059 
1060  if (CompareVT == MVT::f32) {
1061  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
1062  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
1063  } else if (CompareVT == MVT::i32) {
1064  HWTrue = DAG.getConstant(-1, DL, CompareVT);
1065  HWFalse = DAG.getConstant(0, DL, CompareVT);
1066  }
1067  else {
1068  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
1069  }
1070 
1071  // Lower this unsupported SELECT_CC into a combination of two supported
1072  // SELECT_CC operations.
1073  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
1074 
1075  return DAG.getNode(ISD::SELECT_CC, DL, VT,
1076  Cond, HWFalse,
1077  True, False,
1078  DAG.getCondCode(ISD::SETNE));
1079 }
1080 
1081 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
1082 /// convert these pointers to a register index. Each register holds
1083 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
1084 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
1085 /// for indirect addressing.
1086 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1087  unsigned StackWidth,
1088  SelectionDAG &DAG) const {
1089  unsigned SRLPad;
1090  switch(StackWidth) {
1091  case 1:
1092  SRLPad = 2;
1093  break;
1094  case 2:
1095  SRLPad = 3;
1096  break;
1097  case 4:
1098  SRLPad = 4;
1099  break;
1100  default: llvm_unreachable("Invalid stack width");
1101  }
1102 
1103  SDLoc DL(Ptr);
1104  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1105  DAG.getConstant(SRLPad, DL, MVT::i32));
1106 }
1107 
1108 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1109  unsigned ElemIdx,
1110  unsigned &Channel,
1111  unsigned &PtrIncr) const {
1112  switch (StackWidth) {
1113  default:
1114  case 1:
1115  Channel = 0;
1116  if (ElemIdx > 0) {
1117  PtrIncr = 1;
1118  } else {
1119  PtrIncr = 0;
1120  }
1121  break;
1122  case 2:
1123  Channel = ElemIdx % 2;
1124  if (ElemIdx == 2) {
1125  PtrIncr = 1;
1126  } else {
1127  PtrIncr = 0;
1128  }
1129  break;
1130  case 4:
1131  Channel = ElemIdx;
1132  PtrIncr = 0;
1133  break;
1134  }
1135 }
1136 
1137 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1138  SelectionDAG &DAG) const {
1139  SDLoc DL(Store);
1140  //TODO: Who creates the i8 stores?
1141  assert(Store->isTruncatingStore()
1142  || Store->getValue().getValueType() == MVT::i8);
1144 
1145  SDValue Mask;
1146  if (Store->getMemoryVT() == MVT::i8) {
1147  assert(Store->getAlignment() >= 1);
1148  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1149  } else if (Store->getMemoryVT() == MVT::i16) {
1150  assert(Store->getAlignment() >= 2);
1151  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1152  } else {
1153  llvm_unreachable("Unsupported private trunc store");
1154  }
1155 
1156  SDValue OldChain = Store->getChain();
1157  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1158  // Skip dummy
1159  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1160  SDValue BasePtr = Store->getBasePtr();
1161  SDValue Offset = Store->getOffset();
1162  EVT MemVT = Store->getMemoryVT();
1163 
1164  SDValue LoadPtr = BasePtr;
1165  if (!Offset.isUndef()) {
1166  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1167  }
1168 
1169  // Get dword location
1170  // TODO: this should be eliminated by the future SHR ptr, 2
1171  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1172  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1173 
1174  // Load dword
1175  // TODO: can we be smarter about machine pointer info?
1178  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1179 
1180  Chain = Dst.getValue(1);
1181 
1182  // Get offset in dword
1183  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1184  DAG.getConstant(0x3, DL, MVT::i32));
1185 
1186  // Convert byte offset to bit shift
1187  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1188  DAG.getConstant(3, DL, MVT::i32));
1189 
1190  // TODO: Contrary to the name of the functiom,
1191  // it also handles sub i32 non-truncating stores (like i1)
1192  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1193  Store->getValue());
1194 
1195  // Mask the value to the right type
1196  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1197 
1198  // Shift the value in place
1199  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1200  MaskedValue, ShiftAmt);
1201 
1202  // Shift the mask in place
1203  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1204 
1205  // Invert the mask. NOTE: if we had native ROL instructions we could
1206  // use inverted mask
1207  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1208 
1209  // Cleanup the target bits
1210  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1211 
1212  // Add the new bits
1213  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1214 
1215  // Store dword
1216  // TODO: Can we be smarter about MachinePointerInfo?
1217  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1218 
1219  // If we are part of expanded vector, make our neighbors depend on this store
1220  if (VectorTrunc) {
1221  // Make all other vector elements depend on this store
1222  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1223  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1224  }
1225  return NewStore;
1226 }
1227 
1228 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1229  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1230  unsigned AS = StoreNode->getAddressSpace();
1231 
1232  SDValue Chain = StoreNode->getChain();
1233  SDValue Ptr = StoreNode->getBasePtr();
1234  SDValue Value = StoreNode->getValue();
1235 
1236  EVT VT = Value.getValueType();
1237  EVT MemVT = StoreNode->getMemoryVT();
1238  EVT PtrVT = Ptr.getValueType();
1239 
1240  SDLoc DL(Op);
1241 
1242  // Neither LOCAL nor PRIVATE can do vectors at the moment
1243  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1244  VT.isVector()) {
1245  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) &&
1246  StoreNode->isTruncatingStore()) {
1247  // Add an extra level of chain to isolate this vector
1248  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1249  // TODO: can the chain be replaced without creating a new store?
1250  SDValue NewStore = DAG.getTruncStore(
1251  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1252  MemVT, StoreNode->getAlignment(),
1253  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1254  StoreNode = cast<StoreSDNode>(NewStore);
1255  }
1256 
1257  return scalarizeVectorStore(StoreNode, DAG);
1258  }
1259 
1260  unsigned Align = StoreNode->getAlignment();
1261  if (Align < MemVT.getStoreSize() &&
1262  !allowsMisalignedMemoryAccesses(MemVT, AS, Align, nullptr)) {
1263  return expandUnalignedStore(StoreNode, DAG);
1264  }
1265 
1266  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1267  DAG.getConstant(2, DL, PtrVT));
1268 
1269  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1270  // It is beneficial to create MSKOR here instead of combiner to avoid
1271  // artificial dependencies introduced by RMW
1272  if (StoreNode->isTruncatingStore()) {
1273  assert(VT.bitsLE(MVT::i32));
1274  SDValue MaskConstant;
1275  if (MemVT == MVT::i8) {
1276  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1277  } else {
1278  assert(MemVT == MVT::i16);
1279  assert(StoreNode->getAlignment() >= 2);
1280  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1281  }
1282 
1283  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1284  DAG.getConstant(0x00000003, DL, PtrVT));
1285  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1286  DAG.getConstant(3, DL, VT));
1287 
1288  // Put the mask in correct place
1289  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1290 
1291  // Put the value bits in correct place
1292  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1293  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1294 
1295  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1296  // vector instead.
1297  SDValue Src[4] = {
1298  ShiftedValue,
1299  DAG.getConstant(0, DL, MVT::i32),
1300  DAG.getConstant(0, DL, MVT::i32),
1301  Mask
1302  };
1303  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1304  SDValue Args[3] = { Chain, Input, DWordAddr };
1306  Op->getVTList(), Args, MemVT,
1307  StoreNode->getMemOperand());
1308  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1309  // Convert pointer from byte address to dword address.
1310  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1311 
1312  if (StoreNode->isTruncatingStore() || StoreNode->isIndexed()) {
1313  llvm_unreachable("Truncated and indexed stores not supported yet");
1314  } else {
1315  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1316  }
1317  return Chain;
1318  }
1319  }
1320 
1321  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1322  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1323  return SDValue();
1324 
1325  if (MemVT.bitsLT(MVT::i32))
1326  return lowerPrivateTruncStore(StoreNode, DAG);
1327 
1328  // Standard i32+ store, tag it with DWORDADDR to note that the address
1329  // has been shifted
1330  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1331  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1332  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1333  }
1334 
1335  // Tagged i32+ stores will be matched by patterns
1336  return SDValue();
1337 }
1338 
1339 // return (512 + (kc_bank << 12)
1340 static int
1342  switch (AddressSpace) {
1344  return 512;
1346  return 512 + 4096;
1348  return 512 + 4096 * 2;
1350  return 512 + 4096 * 3;
1352  return 512 + 4096 * 4;
1354  return 512 + 4096 * 5;
1356  return 512 + 4096 * 6;
1358  return 512 + 4096 * 7;
1360  return 512 + 4096 * 8;
1362  return 512 + 4096 * 9;
1364  return 512 + 4096 * 10;
1366  return 512 + 4096 * 11;
1368  return 512 + 4096 * 12;
1370  return 512 + 4096 * 13;
1372  return 512 + 4096 * 14;
1374  return 512 + 4096 * 15;
1375  default:
1376  return -1;
1377  }
1378 }
1379 
1380 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1381  SelectionDAG &DAG) const {
1382  SDLoc DL(Op);
1383  LoadSDNode *Load = cast<LoadSDNode>(Op);
1385  EVT MemVT = Load->getMemoryVT();
1386  assert(Load->getAlignment() >= MemVT.getStoreSize());
1387 
1388  SDValue BasePtr = Load->getBasePtr();
1389  SDValue Chain = Load->getChain();
1390  SDValue Offset = Load->getOffset();
1391 
1392  SDValue LoadPtr = BasePtr;
1393  if (!Offset.isUndef()) {
1394  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1395  }
1396 
1397  // Get dword location
1398  // NOTE: this should be eliminated by the future SHR ptr, 2
1399  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1400  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1401 
1402  // Load dword
1403  // TODO: can we be smarter about machine pointer info?
1406  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1407 
1408  // Get offset within the register.
1409  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1410  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1411 
1412  // Bit offset of target byte (byteIdx * 8).
1413  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1414  DAG.getConstant(3, DL, MVT::i32));
1415 
1416  // Shift to the right.
1417  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1418 
1419  // Eliminate the upper bits by setting them to ...
1420  EVT MemEltVT = MemVT.getScalarType();
1421 
1422  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1423  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1424  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1425  } else { // ... or zeros.
1426  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1427  }
1428 
1429  SDValue Ops[] = {
1430  Ret,
1431  Read.getValue(1) // This should be our output chain
1432  };
1433 
1434  return DAG.getMergeValues(Ops, DL);
1435 }
1436 
1437 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1438  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1439  unsigned AS = LoadNode->getAddressSpace();
1440  EVT MemVT = LoadNode->getMemoryVT();
1442 
1443  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1444  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1445  return lowerPrivateExtLoad(Op, DAG);
1446  }
1447 
1448  SDLoc DL(Op);
1449  EVT VT = Op.getValueType();
1450  SDValue Chain = LoadNode->getChain();
1451  SDValue Ptr = LoadNode->getBasePtr();
1452 
1453  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1454  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1455  VT.isVector()) {
1456  return scalarizeVectorLoad(LoadNode, DAG);
1457  }
1458 
1459  // This is still used for explicit load from addrspace(8)
1460  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1461  if (ConstantBlock > -1 &&
1462  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1463  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1464  SDValue Result;
1465  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1466  isa<ConstantSDNode>(Ptr)) {
1467  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1468  } else {
1469  //TODO: Does this even work?
1470  // non-constant ptr can't be folded, keeps it as a v4f32 load
1471  Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1472  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1473  DAG.getConstant(4, DL, MVT::i32)),
1474  DAG.getConstant(LoadNode->getAddressSpace() -
1476  );
1477  }
1478 
1479  if (!VT.isVector()) {
1480  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1481  DAG.getConstant(0, DL, MVT::i32));
1482  }
1483 
1484  SDValue MergedValues[2] = {
1485  Result,
1486  Chain
1487  };
1488  return DAG.getMergeValues(MergedValues, DL);
1489  }
1490 
1491  // For most operations returning SDValue() will result in the node being
1492  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1493  // need to manually expand loads that may be legal in some address spaces and
1494  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1495  // compute shaders, since the data is sign extended when it is uploaded to the
1496  // buffer. However SEXT loads from other address spaces are not supported, so
1497  // we need to expand them here.
1498  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1499  EVT MemVT = LoadNode->getMemoryVT();
1500  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1501  SDValue NewLoad = DAG.getExtLoad(
1502  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1503  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1504  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1505  DAG.getValueType(MemVT));
1506 
1507  SDValue MergedValues[2] = { Res, Chain };
1508  return DAG.getMergeValues(MergedValues, DL);
1509  }
1510 
1511  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1512  return SDValue();
1513  }
1514 
1515  // DWORDADDR ISD marks already shifted address
1516  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1517  assert(VT == MVT::i32);
1518  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1519  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1520  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1521  }
1522  return SDValue();
1523 }
1524 
1525 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1526  SDValue Chain = Op.getOperand(0);
1527  SDValue Cond = Op.getOperand(1);
1528  SDValue Jump = Op.getOperand(2);
1529 
1530  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1531  Chain, Jump, Cond);
1532 }
1533 
1534 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1535  SelectionDAG &DAG) const {
1536  MachineFunction &MF = DAG.getMachineFunction();
1537  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1538 
1539  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1540 
1541  unsigned FrameIndex = FIN->getIndex();
1542  unsigned IgnoredFrameReg;
1543  unsigned Offset =
1544  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1545  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), SDLoc(Op),
1546  Op.getValueType());
1547 }
1548 
1550  bool IsVarArg) const {
1551  switch (CC) {
1554  case CallingConv::C:
1555  case CallingConv::Fast:
1556  case CallingConv::Cold:
1557  llvm_unreachable("kernels should not be handled here");
1565  return CC_R600;
1566  default:
1567  report_fatal_error("Unsupported calling convention.");
1568  }
1569 }
1570 
1571 /// XXX Only kernel functions are supported, so we can assume for now that
1572 /// every function is a kernel function, but in the future we should use
1573 /// separate calling conventions for kernel and non-kernel functions.
1575  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1576  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1577  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1579  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1580  *DAG.getContext());
1581  MachineFunction &MF = DAG.getMachineFunction();
1583 
1584  if (AMDGPU::isShader(CallConv)) {
1585  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1586  } else {
1587  analyzeFormalArgumentsCompute(CCInfo, Ins);
1588  }
1589 
1590  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1591  CCValAssign &VA = ArgLocs[i];
1592  const ISD::InputArg &In = Ins[i];
1593  EVT VT = In.VT;
1594  EVT MemVT = VA.getLocVT();
1595  if (!VT.isVector() && MemVT.isVector()) {
1596  // Get load source type if scalarized.
1597  MemVT = MemVT.getVectorElementType();
1598  }
1599 
1600  if (AMDGPU::isShader(CallConv)) {
1601  unsigned Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1602  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1603  InVals.push_back(Register);
1604  continue;
1605  }
1606 
1609 
1610  // i64 isn't a legal type, so the register type used ends up as i32, which
1611  // isn't expected here. It attempts to create this sextload, but it ends up
1612  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1613  // for <1 x i64>.
1614 
1615  // The first 36 bytes of the input buffer contains information about
1616  // thread group and global sizes.
1618  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1619  // FIXME: This should really check the extload type, but the handling of
1620  // extload vector parameters seems to be broken.
1621 
1622  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1623  Ext = ISD::SEXTLOAD;
1624  }
1625 
1626  // Compute the offset from the value.
1627  // XXX - I think PartOffset should give you this, but it seems to give the
1628  // size of the register which isn't useful.
1629 
1630  unsigned ValBase = ArgLocs[In.getOrigArgIndex()].getLocMemOffset();
1631  unsigned PartOffset = VA.getLocMemOffset();
1632  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1633 
1634  MachinePointerInfo PtrInfo(UndefValue::get(PtrTy), PartOffset - ValBase);
1635  SDValue Arg = DAG.getLoad(
1636  ISD::UNINDEXED, Ext, VT, DL, Chain,
1637  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1638  PtrInfo,
1639  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1642 
1643  InVals.push_back(Arg);
1644  }
1645  return Chain;
1646 }
1647 
1649  EVT VT) const {
1650  if (!VT.isVector())
1651  return MVT::i32;
1653 }
1654 
1656  const SelectionDAG &DAG) const {
1657  // Local and Private addresses do not handle vectors. Limit to i32
1658  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1659  return (MemVT.getSizeInBits() <= 32);
1660  }
1661  return true;
1662 }
1663 
1665  unsigned AddrSpace,
1666  unsigned Align,
1667  bool *IsFast) const {
1668  if (IsFast)
1669  *IsFast = false;
1670 
1671  if (!VT.isSimple() || VT == MVT::Other)
1672  return false;
1673 
1674  if (VT.bitsLT(MVT::i32))
1675  return false;
1676 
1677  // TODO: This is a rough estimate.
1678  if (IsFast)
1679  *IsFast = true;
1680 
1681  return VT.bitsGT(MVT::i32) && Align % 4 == 0;
1682 }
1683 
1685  SelectionDAG &DAG, SDValue VectorEntry,
1686  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1687  assert(RemapSwizzle.empty());
1688 
1689  SDLoc DL(VectorEntry);
1690  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1691 
1692  SDValue NewBldVec[4];
1693  for (unsigned i = 0; i < 4; i++)
1694  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1695  DAG.getIntPtrConstant(i, DL));
1696 
1697  for (unsigned i = 0; i < 4; i++) {
1698  if (NewBldVec[i].isUndef())
1699  // We mask write here to teach later passes that the ith element of this
1700  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1701  // break false dependencies and additionnaly make assembly easier to read.
1702  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1703  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1704  if (C->isZero()) {
1705  RemapSwizzle[i] = 4; // SEL_0
1706  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1707  } else if (C->isExactlyValue(1.0)) {
1708  RemapSwizzle[i] = 5; // SEL_1
1709  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1710  }
1711  }
1712 
1713  if (NewBldVec[i].isUndef())
1714  continue;
1715  for (unsigned j = 0; j < i; j++) {
1716  if (NewBldVec[i] == NewBldVec[j]) {
1717  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1718  RemapSwizzle[i] = j;
1719  break;
1720  }
1721  }
1722  }
1723 
1724  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1725  NewBldVec);
1726 }
1727 
1729  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1730  assert(RemapSwizzle.empty());
1731 
1732  SDLoc DL(VectorEntry);
1733  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1734 
1735  SDValue NewBldVec[4];
1736  bool isUnmovable[4] = {false, false, false, false};
1737  for (unsigned i = 0; i < 4; i++)
1738  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1739  DAG.getIntPtrConstant(i, DL));
1740 
1741  for (unsigned i = 0; i < 4; i++) {
1742  RemapSwizzle[i] = i;
1743  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1744  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1745  ->getZExtValue();
1746  if (i == Idx)
1747  isUnmovable[Idx] = true;
1748  }
1749  }
1750 
1751  for (unsigned i = 0; i < 4; i++) {
1752  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1753  unsigned Idx = dyn_cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1754  ->getZExtValue();
1755  if (isUnmovable[Idx])
1756  continue;
1757  // Swap i and Idx
1758  std::swap(NewBldVec[Idx], NewBldVec[i]);
1759  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1760  break;
1761  }
1762  }
1763 
1764  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1765  NewBldVec);
1766 }
1767 
1768 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1769  SelectionDAG &DAG,
1770  const SDLoc &DL) const {
1771  // Old -> New swizzle values
1772  DenseMap<unsigned, unsigned> SwizzleRemap;
1773 
1774  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1775  for (unsigned i = 0; i < 4; i++) {
1776  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1777  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1778  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1779  }
1780 
1781  SwizzleRemap.clear();
1782  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1783  for (unsigned i = 0; i < 4; i++) {
1784  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1785  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1786  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1787  }
1788 
1789  return BuildVector;
1790 }
1791 
1792 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1793  SelectionDAG &DAG) const {
1794  SDLoc DL(LoadNode);
1795  EVT VT = LoadNode->getValueType(0);
1796  SDValue Chain = LoadNode->getChain();
1797  SDValue Ptr = LoadNode->getBasePtr();
1798  assert (isa<ConstantSDNode>(Ptr));
1799 
1800  //TODO: Support smaller loads
1801  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1802  return SDValue();
1803 
1804  if (LoadNode->getAlignment() < 4)
1805  return SDValue();
1806 
1807  int ConstantBlock = ConstantAddressBlock(Block);
1808 
1809  SDValue Slots[4];
1810  for (unsigned i = 0; i < 4; i++) {
1811  // We want Const position encoded with the following formula :
1812  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1813  // const_index is Ptr computed by llvm using an alignment of 16.
1814  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1815  // then div by 4 at the ISel step
1816  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1817  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1818  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1819  }
1820  EVT NewVT = MVT::v4i32;
1821  unsigned NumElements = 4;
1822  if (VT.isVector()) {
1823  NewVT = VT;
1824  NumElements = VT.getVectorNumElements();
1825  }
1826  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1827  if (!VT.isVector()) {
1828  Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1829  DAG.getConstant(0, DL, MVT::i32));
1830  }
1831  SDValue MergedValues[2] = {
1832  Result,
1833  Chain
1834  };
1835  return DAG.getMergeValues(MergedValues, DL);
1836 }
1837 
1838 //===----------------------------------------------------------------------===//
1839 // Custom DAG Optimizations
1840 //===----------------------------------------------------------------------===//
1841 
1843  DAGCombinerInfo &DCI) const {
1844  SelectionDAG &DAG = DCI.DAG;
1845  SDLoc DL(N);
1846 
1847  switch (N->getOpcode()) {
1848  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1849  case ISD::FP_ROUND: {
1850  SDValue Arg = N->getOperand(0);
1851  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1852  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1853  Arg.getOperand(0));
1854  }
1855  break;
1856  }
1857 
1858  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1859  // (i32 select_cc f32, f32, -1, 0 cc)
1860  //
1861  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1862  // this to one of the SET*_DX10 instructions.
1863  case ISD::FP_TO_SINT: {
1864  SDValue FNeg = N->getOperand(0);
1865  if (FNeg.getOpcode() != ISD::FNEG) {
1866  return SDValue();
1867  }
1868  SDValue SelectCC = FNeg.getOperand(0);
1869  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1870  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1871  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1872  !isHWTrueValue(SelectCC.getOperand(2)) ||
1873  !isHWFalseValue(SelectCC.getOperand(3))) {
1874  return SDValue();
1875  }
1876 
1877  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1878  SelectCC.getOperand(0), // LHS
1879  SelectCC.getOperand(1), // RHS
1880  DAG.getConstant(-1, DL, MVT::i32), // True
1881  DAG.getConstant(0, DL, MVT::i32), // False
1882  SelectCC.getOperand(4)); // CC
1883 
1884  break;
1885  }
1886 
1887  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1888  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1889  case ISD::INSERT_VECTOR_ELT: {
1890  SDValue InVec = N->getOperand(0);
1891  SDValue InVal = N->getOperand(1);
1892  SDValue EltNo = N->getOperand(2);
1893 
1894  // If the inserted element is an UNDEF, just use the input vector.
1895  if (InVal.isUndef())
1896  return InVec;
1897 
1898  EVT VT = InVec.getValueType();
1899 
1900  // If we can't generate a legal BUILD_VECTOR, exit
1902  return SDValue();
1903 
1904  // Check that we know which element is being inserted
1905  if (!isa<ConstantSDNode>(EltNo))
1906  return SDValue();
1907  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1908 
1909  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1910  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1911  // vector elements.
1913  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1914  Ops.append(InVec.getNode()->op_begin(),
1915  InVec.getNode()->op_end());
1916  } else if (InVec.isUndef()) {
1917  unsigned NElts = VT.getVectorNumElements();
1918  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1919  } else {
1920  return SDValue();
1921  }
1922 
1923  // Insert the element
1924  if (Elt < Ops.size()) {
1925  // All the operands of BUILD_VECTOR must have the same type;
1926  // we enforce that here.
1927  EVT OpVT = Ops[0].getValueType();
1928  if (InVal.getValueType() != OpVT)
1929  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1930  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1931  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1932  Ops[Elt] = InVal;
1933  }
1934 
1935  // Return the new vector
1936  return DAG.getBuildVector(VT, DL, Ops);
1937  }
1938 
1939  // Extract_vec (Build_vector) generated by custom lowering
1940  // also needs to be customly combined
1941  case ISD::EXTRACT_VECTOR_ELT: {
1942  SDValue Arg = N->getOperand(0);
1943  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1944  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1945  unsigned Element = Const->getZExtValue();
1946  return Arg->getOperand(Element);
1947  }
1948  }
1949  if (Arg.getOpcode() == ISD::BITCAST &&
1950  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1953  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1954  unsigned Element = Const->getZExtValue();
1955  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1956  Arg->getOperand(0).getOperand(Element));
1957  }
1958  }
1959  break;
1960  }
1961 
1962  case ISD::SELECT_CC: {
1963  // Try common optimizations
1965  return Ret;
1966 
1967  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1968  // selectcc x, y, a, b, inv(cc)
1969  //
1970  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1971  // selectcc x, y, a, b, cc
1972  SDValue LHS = N->getOperand(0);
1973  if (LHS.getOpcode() != ISD::SELECT_CC) {
1974  return SDValue();
1975  }
1976 
1977  SDValue RHS = N->getOperand(1);
1978  SDValue True = N->getOperand(2);
1979  SDValue False = N->getOperand(3);
1980  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1981 
1982  if (LHS.getOperand(2).getNode() != True.getNode() ||
1983  LHS.getOperand(3).getNode() != False.getNode() ||
1984  RHS.getNode() != False.getNode()) {
1985  return SDValue();
1986  }
1987 
1988  switch (NCC) {
1989  default: return SDValue();
1990  case ISD::SETNE: return LHS;
1991  case ISD::SETEQ: {
1992  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1993  LHSCC = ISD::getSetCCInverse(LHSCC,
1994  LHS.getOperand(0).getValueType().isInteger());
1995  if (DCI.isBeforeLegalizeOps() ||
1997  return DAG.getSelectCC(DL,
1998  LHS.getOperand(0),
1999  LHS.getOperand(1),
2000  LHS.getOperand(2),
2001  LHS.getOperand(3),
2002  LHSCC);
2003  break;
2004  }
2005  }
2006  return SDValue();
2007  }
2008 
2009  case AMDGPUISD::R600_EXPORT: {
2010  SDValue Arg = N->getOperand(1);
2011  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2012  break;
2013 
2014  SDValue NewArgs[8] = {
2015  N->getOperand(0), // Chain
2016  SDValue(),
2017  N->getOperand(2), // ArrayBase
2018  N->getOperand(3), // Type
2019  N->getOperand(4), // SWZ_X
2020  N->getOperand(5), // SWZ_Y
2021  N->getOperand(6), // SWZ_Z
2022  N->getOperand(7) // SWZ_W
2023  };
2024  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
2025  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
2026  }
2027  case AMDGPUISD::TEXTURE_FETCH: {
2028  SDValue Arg = N->getOperand(1);
2029  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
2030  break;
2031 
2032  SDValue NewArgs[19] = {
2033  N->getOperand(0),
2034  N->getOperand(1),
2035  N->getOperand(2),
2036  N->getOperand(3),
2037  N->getOperand(4),
2038  N->getOperand(5),
2039  N->getOperand(6),
2040  N->getOperand(7),
2041  N->getOperand(8),
2042  N->getOperand(9),
2043  N->getOperand(10),
2044  N->getOperand(11),
2045  N->getOperand(12),
2046  N->getOperand(13),
2047  N->getOperand(14),
2048  N->getOperand(15),
2049  N->getOperand(16),
2050  N->getOperand(17),
2051  N->getOperand(18),
2052  };
2053  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
2054  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
2055  }
2056 
2057  case ISD::LOAD: {
2058  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
2059  SDValue Ptr = LoadNode->getBasePtr();
2060  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
2061  isa<ConstantSDNode>(Ptr))
2062  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
2063  break;
2064  }
2065 
2066  default: break;
2067  }
2068 
2070 }
2071 
2072 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
2073  SDValue &Src, SDValue &Neg, SDValue &Abs,
2074  SDValue &Sel, SDValue &Imm,
2075  SelectionDAG &DAG) const {
2076  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2077  if (!Src.isMachineOpcode())
2078  return false;
2079 
2080  switch (Src.getMachineOpcode()) {
2081  case R600::FNEG_R600:
2082  if (!Neg.getNode())
2083  return false;
2084  Src = Src.getOperand(0);
2085  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2086  return true;
2087  case R600::FABS_R600:
2088  if (!Abs.getNode())
2089  return false;
2090  Src = Src.getOperand(0);
2091  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2092  return true;
2093  case R600::CONST_COPY: {
2094  unsigned Opcode = ParentNode->getMachineOpcode();
2095  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2096 
2097  if (!Sel.getNode())
2098  return false;
2099 
2100  SDValue CstOffset = Src.getOperand(0);
2101  if (ParentNode->getValueType(0).isVector())
2102  return false;
2103 
2104  // Gather constants values
2105  int SrcIndices[] = {
2106  TII->getOperandIdx(Opcode, R600::OpName::src0),
2107  TII->getOperandIdx(Opcode, R600::OpName::src1),
2108  TII->getOperandIdx(Opcode, R600::OpName::src2),
2109  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2110  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2111  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2112  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2113  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2114  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2115  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2116  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2117  };
2118  std::vector<unsigned> Consts;
2119  for (int OtherSrcIdx : SrcIndices) {
2120  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2121  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2122  continue;
2123  if (HasDst) {
2124  OtherSrcIdx--;
2125  OtherSelIdx--;
2126  }
2127  if (RegisterSDNode *Reg =
2128  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2129  if (Reg->getReg() == R600::ALU_CONST) {
2130  ConstantSDNode *Cst
2131  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2132  Consts.push_back(Cst->getZExtValue());
2133  }
2134  }
2135  }
2136 
2137  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2138  Consts.push_back(Cst->getZExtValue());
2139  if (!TII->fitsConstReadLimitations(Consts)) {
2140  return false;
2141  }
2142 
2143  Sel = CstOffset;
2144  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2145  return true;
2146  }
2147  case R600::MOV_IMM_GLOBAL_ADDR:
2148  // Check if the Imm slot is used. Taken from below.
2149  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2150  return false;
2151  Imm = Src.getOperand(0);
2152  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2153  return true;
2154  case R600::MOV_IMM_I32:
2155  case R600::MOV_IMM_F32: {
2156  unsigned ImmReg = R600::ALU_LITERAL_X;
2157  uint64_t ImmValue = 0;
2158 
2159  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2161  float FloatValue = FPC->getValueAPF().convertToFloat();
2162  if (FloatValue == 0.0) {
2163  ImmReg = R600::ZERO;
2164  } else if (FloatValue == 0.5) {
2165  ImmReg = R600::HALF;
2166  } else if (FloatValue == 1.0) {
2167  ImmReg = R600::ONE;
2168  } else {
2169  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2170  }
2171  } else {
2173  uint64_t Value = C->getZExtValue();
2174  if (Value == 0) {
2175  ImmReg = R600::ZERO;
2176  } else if (Value == 1) {
2177  ImmReg = R600::ONE_INT;
2178  } else {
2179  ImmValue = Value;
2180  }
2181  }
2182 
2183  // Check that we aren't already using an immediate.
2184  // XXX: It's possible for an instruction to have more than one
2185  // immediate operand, but this is not supported yet.
2186  if (ImmReg == R600::ALU_LITERAL_X) {
2187  if (!Imm.getNode())
2188  return false;
2190  assert(C);
2191  if (C->getZExtValue())
2192  return false;
2193  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2194  }
2195  Src = DAG.getRegister(ImmReg, MVT::i32);
2196  return true;
2197  }
2198  default:
2199  return false;
2200  }
2201 }
2202 
2203 /// Fold the instructions after selecting them
2204 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2205  SelectionDAG &DAG) const {
2206  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2207  if (!Node->isMachineOpcode())
2208  return Node;
2209 
2210  unsigned Opcode = Node->getMachineOpcode();
2211  SDValue FakeOp;
2212 
2213  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2214 
2215  if (Opcode == R600::DOT_4) {
2216  int OperandIdx[] = {
2217  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2218  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2219  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2220  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2221  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2222  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2223  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2224  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2225  };
2226  int NegIdx[] = {
2227  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2228  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2229  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2230  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2231  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2232  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2233  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2234  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2235  };
2236  int AbsIdx[] = {
2237  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2238  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2239  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2240  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2241  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2242  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2243  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2244  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2245  };
2246  for (unsigned i = 0; i < 8; i++) {
2247  if (OperandIdx[i] < 0)
2248  return Node;
2249  SDValue &Src = Ops[OperandIdx[i] - 1];
2250  SDValue &Neg = Ops[NegIdx[i] - 1];
2251  SDValue &Abs = Ops[AbsIdx[i] - 1];
2252  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2253  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2254  if (HasDst)
2255  SelIdx--;
2256  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2257  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2258  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2259  }
2260  } else if (Opcode == R600::REG_SEQUENCE) {
2261  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2262  SDValue &Src = Ops[i];
2263  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2264  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2265  }
2266  } else {
2267  if (!TII->hasInstrModifiers(Opcode))
2268  return Node;
2269  int OperandIdx[] = {
2270  TII->getOperandIdx(Opcode, R600::OpName::src0),
2271  TII->getOperandIdx(Opcode, R600::OpName::src1),
2272  TII->getOperandIdx(Opcode, R600::OpName::src2)
2273  };
2274  int NegIdx[] = {
2275  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2276  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2277  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2278  };
2279  int AbsIdx[] = {
2280  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2281  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2282  -1
2283  };
2284  for (unsigned i = 0; i < 3; i++) {
2285  if (OperandIdx[i] < 0)
2286  return Node;
2287  SDValue &Src = Ops[OperandIdx[i] - 1];
2288  SDValue &Neg = Ops[NegIdx[i] - 1];
2289  SDValue FakeAbs;
2290  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2291  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2292  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2293  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2294  if (HasDst) {
2295  SelIdx--;
2296  ImmIdx--;
2297  }
2298  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2299  SDValue &Imm = Ops[ImmIdx];
2300  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2301  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2302  }
2303  }
2304 
2305  return Node;
2306 }
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
uint64_t CallInst * C
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:570
X = FP_ROUND(Y, TRUNC) - Rounding &#39;Y&#39; from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:537
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
const MachineInstrBuilder & add(const MachineOperand &MO) const
A parsed version of the target data layout string in and methods for querying it. ...
Definition: DataLayout.h:110
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
EVT getValueType() const
Return the ValueType of the referenced return value.
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
const SDValue & getOffset() const
bool isUndef() const
const GlobalValue * getGlobal() const
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1562
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
AMDGPU specific subclass of TargetSubtarget.
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
LLVM_ATTRIBUTE_NORETURN void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:139
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:651
This class represents lattice values for constants.
Definition: AllocatorList.h:23
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:200
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types...
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Interface definition for R600InstrInfo.
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:259
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
bool canMergeStoresTo(unsigned AS, EVT MemVT, const SelectionDAG &DAG) const override
Returns if it&#39;s reasonable to merge stores to MemVT size.
void addFlag(MachineInstr &MI, unsigned Operand, unsigned Flag) const
Add one of the MO_FLAG* flags to the specified Operand.
const SDValue & getBasePtr() const
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:222
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
unsigned getReg() const
getReg - Returns the register number.
static bool isVirtualRegister(unsigned Reg)
Return true if the specified register number is in the virtual register namespace.
const SDValue & getValue() const
SDVTList getVTList() const
unsigned Reg
Address space for private memory.
Definition: AMDGPU.h:260
static PointerType * getInt32PtrTy(LLVMContext &C, unsigned AS=0)
Definition: Type.cpp:227
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:252
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space...
Definition: Type.cpp:629
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change...
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
float convertToFloat() const
Definition: APFloat.h:1097
const SDValue & getChain() const
Function Alias Analysis Results
unsigned getAlignment() const
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:305
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition: ValueTypes.h:140
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:434
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
bool isTruncatingStore() const
Return true if the op does a truncation before store.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:209
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:207
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations...
Definition: ISDOpcodes.h:455
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Pointer to the start of the shader&#39;s constant data.
const HexagonInstrInfo * TII
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:450
#define MO_FLAG_ABS
Definition: R600Defines.h:18
Shift and rotation operations.
Definition: ISDOpcodes.h:409
const ConstantFP * getFPImm() const
unsigned getNumOperands() const
Retuns the total number of operands.
Definition: MachineInstr.h:411
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:201
int getFrameIndexReference(const MachineFunction &MF, int FI, unsigned &FrameReg) const override
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s), MachineInstr opcode, and operands.
static Optional< unsigned > getOpcode(ArrayRef< VPValue *> Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:196
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:41
void eraseFromParent()
Unlink &#39;this&#39; from the containing basic block and delete it.
op_iterator op_end() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn&#39;t supported on the target and indicate what to d...
unsigned getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:303
The memory access is dereferenceable (i.e., doesn&#39;t trap).
int getSelIdx(unsigned Opcode, unsigned SrcIdx) const
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:408
void setImmOperand(MachineInstr &MI, unsigned Op, int64_t Imm) const
Helper function for setting instruction flag values.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:459
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:400
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Interface to describe a layout of a stack frame on an AMDGPU target.
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
This file implements a class to represent arbitrary precision integral constant values and operations...
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
MachineInstr * getVRegDef(unsigned Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool hasInstrModifiers(unsigned Opcode) const
unsigned getScalarSizeInBits() const
Definition: ValueTypes.h:297
unsigned getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:291
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:397
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose...
Address space for local memory.
Definition: AMDGPU.h:259
#define MO_FLAG_NEG
Definition: R600Defines.h:17
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:200
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:851
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out...
Definition: ISDOpcodes.h:965
op_iterator op_begin() const
MachineInstrBuilder buildDefaultInstruction(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned Opcode, unsigned DstReg, unsigned Src0Reg, unsigned Src1Reg=0) const
buildDefaultInstruction - This function returns a MachineInstr with all the instruction modifiers ini...
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:575
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:150
Class to represent pointers.
Definition: DerivedTypes.h:498
This class is used to represent ISD::STORE nodes.
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:523
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a vector with the specified, possibly variable...
Definition: ISDOpcodes.h:326
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
const Value * getValue() const
Return the base address of the memory access.
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:176
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:215
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:228
const R600FrameLowering * getFrameLowering() const override
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:609
unsigned getNumValues() const
Return the number of values defined/returned by this operator.
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool hasFP32Denormals() const
const SDValue & getBasePtr() const
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:234
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:165
unsigned const MachineRegisterInfo * MRI
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1400
MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Machine Value Type.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type...
Simple binary floating point operators.
Definition: ISDOpcodes.h:282
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
bool isMachineOpcode() const
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:272
static int ConstantAddressBlock(unsigned AddressSpace)
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
const SDValue & getOperand(unsigned Num) const
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:940
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any DBG_VALUE and DBG_LABEL instructions...
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL...
Definition: ISDOpcodes.h:331
This file contains the declarations for the subclasses of Constant, which represent the different fla...
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:246
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:231
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
const SDValue & getOffset() const
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:255
This file declares a class to represent arbitrary precision floating point values and provide a varie...
const R600InstrInfo * getInstrInfo() const override
unsigned getMachineOpcode() const
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
int getOperandIdx(const MachineInstr &MI, unsigned Op) const
Get the index of Op in the MachineInstr.
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
The memory access is non-temporal.
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align=0, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, unsigned Size=0)
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the &#39;usesCustomInserter&#39; fla...
Address space for indirect addressible parameter memory (VTX1)
Definition: AMDGPU.h:267
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y)...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:415
Extended Value Type.
Definition: ValueTypes.h:33
static UndefValue * get(Type *T)
Static factory methods - Return an &#39;undef&#39; object of the specified type.
Definition: Constants.cpp:1414
size_t size() const
Definition: SmallVector.h:52
This class contains a discriminated union of information about pointers in memory operands...
bool isMachineOpcode() const
Test if this node has a post-isel opcode, directly corresponding to a MachineInstr opcode...
unsigned getNumOperands() const
Return the number of values used by this operation.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
EVT getValueType(const DataLayout &DL, Type *Ty, bool AllowUnknown=false) const
Return the EVT corresponding to this LLVM type.
bool fitsConstReadLimitations(const std::vector< MachineInstr *> &) const
An instruction group can only access 2 channel pair (either [XY] or [ZW]) from KCache bank on R700+...
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:136
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value...
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands...
unsigned getAddressSpace() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
const APFloat & getValueAPF() const
Definition: Constants.h:302
const R600RegisterInfo * getRegisterInfo() const override
static bool isUndef(ArrayRef< int > Mask)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, unsigned Reg, EVT VT) const
CCState - This class holds information needed while lowering arguments and return values...
SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:338
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:264
Address space for constant memory (VTX2)
Definition: AMDGPU.h:258
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:221
This is a &#39;vector&#39; (really, a variable-sized array), optimized for the case when the array is small...
Definition: SmallVector.h:839
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:733
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:188
CCValAssign - Represent assignment of one arg/retval to a location.
AddressSpace
Definition: NVPTXBaseInfo.h:21
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:95
int getLDSNoRetOp(uint16_t Opcode)
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:645
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
An SDNode that represents everything that will be needed to construct a MachineInstr.
Promote Memory to Register
Definition: Mem2Reg.cpp:109
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
CondCode getSetCCInverse(CondCode Operation, bool isInteger)
Return the operation corresponding to !(X op Y), where &#39;op&#39; is a valid SetCC operation.
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
int64_t getImm() const
static mvt_range integer_valuetypes()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:940
EVT getMemoryVT() const
Return the type of the in-memory value.
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
bool isShader(CallingConv::ID cc)
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:419
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:240
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:470
bool use_empty(unsigned RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use...
Definition: CallingConv.h:220
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors...
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:311
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:386
bool isLDSRetInstr(unsigned Opcode) const
bool hasBCNT(unsigned Size) const
MachineRegisterInfo - Keep track of information for virtual and physical registers, including vreg register classes, use/def chains for registers, etc.
unsigned getOrigArgIndex() const
Provides AMDGPU specific target descriptions.
Representation of each machine instruction.
Definition: MachineInstr.h:63
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:150
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:386
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:704
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
unsigned getLocMemOffset() const
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
R600 DAG Lowering interface definition.
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:485
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:615
#define I(x, y, z)
Definition: MD5.cpp:58
#define N
Flags getFlags() const
Return the raw flags of the source value,.
The memory access always returns the same value (or traps).
iterator end()
Definition: DenseMap.h:108
LLVM_NODISCARD std::enable_if<!is_simple_type< Y >::value, typename cast_retty< X, const Y >::ret_type >::type dyn_cast(const Y &Val)
Definition: Casting.h:322
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
unsigned getOpcode() const
SDValue getValue(unsigned R) const
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:122
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getCondCode(ISD::CondCode Cond)
const MachinePointerInfo & getPointerInfo() const
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, unsigned Alignment=0, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
LLVM Value Representation.
Definition: Value.h:72
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:301
SDValue getRegister(unsigned Reg, EVT VT)
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC&#39;s if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:961
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:250
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, unsigned Align, bool *IsFast) const override
Determine if the target supports unaligned memory accesses.
SDValue getValueType(EVT)
std::underlying_type< E >::type Mask()
Get a bitmask with 1s in all places up to the high-order bit of E&#39;s largest value.
Definition: BitmaskEnum.h:80
static bool isEOP(MachineBasicBlock::iterator I)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone...
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:58
IRTranslator LLVM IR MI
const APFloat & getValueAPF() const
#define MO_FLAG_MASK
Definition: R600Defines.h:19
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations...
Definition: ISDOpcodes.h:305
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:442
APInt bitcastToAPInt() const
Definition: APFloat.h:1093
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:197
Conversion operators.
Definition: ISDOpcodes.h:464
const SDValue & getOperand(unsigned i) const
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:793
unsigned getLocReg() const
uint64_t getZExtValue() const
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:473
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:125
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:413
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation...
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:585
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:789
MVT getVectorIdxTy(const DataLayout &) const override
Returns the type to be used for the index operand of: ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT...
LLVMContext * getContext() const
Definition: SelectionDAG.h:406
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
MachineInstr * buildMovImm(MachineBasicBlock &BB, MachineBasicBlock::iterator I, unsigned DstReg, uint64_t Imm) const
#define MO_FLAG_PUSH
Definition: R600Defines.h:20
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned char TargetFlags=0)
Definition: SelectionDAG.h:621
This class is used to represent ISD::LOAD nodes.
unsigned getStackWidth(const MachineFunction &MF) const
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.