LLVM  14.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPU.h"
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
20 #include "R600Subtarget.h"
22 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include "llvm/IR/IntrinsicsR600.h"
24 
25 using namespace llvm;
26 
27 #include "R600GenCallingConv.inc"
28 
30  const R600Subtarget &STI)
31  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
32  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
33  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
34  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
35  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
36  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
37  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
38 
41 
43 
44  // Legalize loads and stores to the private address space.
48 
49  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
50  // spaces, so it is custom lowered to handle those where it isn't.
51  for (MVT VT : MVT::integer_valuetypes()) {
55 
59 
63  }
64 
65  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
69 
73 
78 
81  // We need to include these since trunc STORES to PRIVATE need
82  // special handling to accommodate RMW
93 
94  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
97 
98  // Set condition code actions
111 
116 
119 
122 
126 
128 
133 
136 
143 
148 
149  // ADD, SUB overflow.
150  // TODO: turn these into Legal?
151  if (Subtarget->hasCARRY())
153 
154  if (Subtarget->hasBORROW())
156 
157  // Expand sign extension of vectors
158  if (!Subtarget->hasBFE())
160 
163 
164  if (!Subtarget->hasBFE())
168 
169  if (!Subtarget->hasBFE())
173 
177 
179 
181 
186 
191 
192  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
193  // to be Legal/Custom in order to avoid library calls.
197 
198  if (!Subtarget->hasFMA()) {
201  }
202 
203  // FIXME: May need no denormals check
205 
206  if (!Subtarget->hasBFI()) {
207  // fcopysign can be done in a single instruction with BFI.
210  }
211 
212  if (!Subtarget->hasBCNT(32))
214 
215  if (!Subtarget->hasBCNT(64))
217 
218  if (Subtarget->hasFFBH())
220 
221  if (Subtarget->hasFFBL())
223 
224  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
225  // need it for R600.
226  if (Subtarget->hasBFE())
227  setHasExtractBitsInsn(true);
228 
230 
231  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
232  for (MVT VT : ScalarIntVTs) {
237  }
238 
239  // LLVM will expand these to atomic_cmp_swap(0)
240  // and atomic_swap, respectively.
243 
244  // We need to custom lower some of the intrinsics
247 
249 
256 }
257 
258 static inline bool isEOP(MachineBasicBlock::iterator I) {
259  if (std::next(I) == I->getParent()->end())
260  return false;
261  return std::next(I)->getOpcode() == R600::RETURN;
262 }
263 
266  MachineBasicBlock *BB) const {
267  MachineFunction *MF = BB->getParent();
270  const R600InstrInfo *TII = Subtarget->getInstrInfo();
271 
272  switch (MI.getOpcode()) {
273  default:
274  // Replace LDS_*_RET instruction that don't have any uses with the
275  // equivalent LDS_*_NORET instruction.
276  if (TII->isLDSRetInstr(MI.getOpcode())) {
277  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
278  assert(DstIdx != -1);
279  MachineInstrBuilder NewMI;
280  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
281  // LDS_1A2D support and remove this special case.
282  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
283  MI.getOpcode() == R600::LDS_CMPST_RET)
284  return BB;
285 
286  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
287  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
288  for (unsigned i = 1, e = MI.getNumOperands(); i < e; ++i) {
289  NewMI.add(MI.getOperand(i));
290  }
291  } else {
293  }
294  break;
295 
296  case R600::FABS_R600: {
297  MachineInstr *NewMI = TII->buildDefaultInstruction(
298  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
299  MI.getOperand(1).getReg());
300  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
301  break;
302  }
303 
304  case R600::FNEG_R600: {
305  MachineInstr *NewMI = TII->buildDefaultInstruction(
306  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
307  MI.getOperand(1).getReg());
308  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
309  break;
310  }
311 
312  case R600::MASK_WRITE: {
313  Register maskedRegister = MI.getOperand(0).getReg();
314  assert(maskedRegister.isVirtual());
315  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
316  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
317  break;
318  }
319 
320  case R600::MOV_IMM_F32:
321  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
322  .getFPImm()
323  ->getValueAPF()
324  .bitcastToAPInt()
325  .getZExtValue());
326  break;
327 
328  case R600::MOV_IMM_I32:
329  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
330  MI.getOperand(1).getImm());
331  break;
332 
333  case R600::MOV_IMM_GLOBAL_ADDR: {
334  //TODO: Perhaps combine this instruction with the next if possible
335  auto MIB = TII->buildDefaultInstruction(
336  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
337  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
338  //TODO: Ugh this is rather ugly
339  MIB->getOperand(Idx) = MI.getOperand(1);
340  break;
341  }
342 
343  case R600::CONST_COPY: {
344  MachineInstr *NewMI = TII->buildDefaultInstruction(
345  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
346  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
347  MI.getOperand(1).getImm());
348  break;
349  }
350 
351  case R600::RAT_WRITE_CACHELESS_32_eg:
352  case R600::RAT_WRITE_CACHELESS_64_eg:
353  case R600::RAT_WRITE_CACHELESS_128_eg:
354  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
355  .add(MI.getOperand(0))
356  .add(MI.getOperand(1))
357  .addImm(isEOP(I)); // Set End of program bit
358  break;
359 
360  case R600::RAT_STORE_TYPED_eg:
361  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
362  .add(MI.getOperand(0))
363  .add(MI.getOperand(1))
364  .add(MI.getOperand(2))
365  .addImm(isEOP(I)); // Set End of program bit
366  break;
367 
368  case R600::BRANCH:
369  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
370  .add(MI.getOperand(0));
371  break;
372 
373  case R600::BRANCH_COND_f32: {
374  MachineInstr *NewMI =
375  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
376  R600::PREDICATE_BIT)
377  .add(MI.getOperand(1))
378  .addImm(R600::PRED_SETNE)
379  .addImm(0); // Flags
380  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
381  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
382  .add(MI.getOperand(0))
383  .addReg(R600::PREDICATE_BIT, RegState::Kill);
384  break;
385  }
386 
387  case R600::BRANCH_COND_i32: {
388  MachineInstr *NewMI =
389  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
390  R600::PREDICATE_BIT)
391  .add(MI.getOperand(1))
392  .addImm(R600::PRED_SETNE_INT)
393  .addImm(0); // Flags
394  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
395  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
396  .add(MI.getOperand(0))
397  .addReg(R600::PREDICATE_BIT, RegState::Kill);
398  break;
399  }
400 
401  case R600::EG_ExportSwz:
402  case R600::R600_ExportSwz: {
403  // Instruction is left unmodified if its not the last one of its type
404  bool isLastInstructionOfItsType = true;
405  unsigned InstExportType = MI.getOperand(1).getImm();
406  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
407  EndBlock = BB->end(); NextExportInst != EndBlock;
408  NextExportInst = std::next(NextExportInst)) {
409  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
410  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
411  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
412  .getImm();
413  if (CurrentInstExportType == InstExportType) {
414  isLastInstructionOfItsType = false;
415  break;
416  }
417  }
418  }
419  bool EOP = isEOP(I);
420  if (!EOP && !isLastInstructionOfItsType)
421  return BB;
422  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
423  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
424  .add(MI.getOperand(0))
425  .add(MI.getOperand(1))
426  .add(MI.getOperand(2))
427  .add(MI.getOperand(3))
428  .add(MI.getOperand(4))
429  .add(MI.getOperand(5))
430  .add(MI.getOperand(6))
431  .addImm(CfInst)
432  .addImm(EOP);
433  break;
434  }
435  case R600::RETURN: {
436  return BB;
437  }
438  }
439 
440  MI.eraseFromParent();
441  return BB;
442 }
443 
444 //===----------------------------------------------------------------------===//
445 // Custom DAG Lowering Operations
446 //===----------------------------------------------------------------------===//
447 
451  switch (Op.getOpcode()) {
452  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
453  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
454  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
455  case ISD::SHL_PARTS:
456  case ISD::SRA_PARTS:
457  case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
458  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
459  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
460  case ISD::FCOS:
461  case ISD::FSIN: return LowerTrig(Op, DAG);
462  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
463  case ISD::STORE: return LowerSTORE(Op, DAG);
464  case ISD::LOAD: {
465  SDValue Result = LowerLOAD(Op, DAG);
466  assert((!Result.getNode() ||
467  Result.getNode()->getNumValues() == 2) &&
468  "Load should return a value and a chain");
469  return Result;
470  }
471 
472  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
473  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
474  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
475  case ISD::INTRINSIC_VOID: {
476  SDValue Chain = Op.getOperand(0);
477  unsigned IntrinsicID =
478  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
479  switch (IntrinsicID) {
480  case Intrinsic::r600_store_swizzle: {
481  SDLoc DL(Op);
482  const SDValue Args[8] = {
483  Chain,
484  Op.getOperand(2), // Export Value
485  Op.getOperand(3), // ArrayBase
486  Op.getOperand(4), // Type
487  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
488  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
489  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
490  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
491  };
492  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
493  }
494 
495  // default for switch(IntrinsicID)
496  default: break;
497  }
498  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
499  break;
500  }
502  unsigned IntrinsicID =
503  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
504  EVT VT = Op.getValueType();
505  SDLoc DL(Op);
506  switch (IntrinsicID) {
507  case Intrinsic::r600_tex:
508  case Intrinsic::r600_texc: {
509  unsigned TextureOp;
510  switch (IntrinsicID) {
511  case Intrinsic::r600_tex:
512  TextureOp = 0;
513  break;
514  case Intrinsic::r600_texc:
515  TextureOp = 1;
516  break;
517  default:
518  llvm_unreachable("unhandled texture operation");
519  }
520 
521  SDValue TexArgs[19] = {
522  DAG.getConstant(TextureOp, DL, MVT::i32),
523  Op.getOperand(1),
524  DAG.getConstant(0, DL, MVT::i32),
525  DAG.getConstant(1, DL, MVT::i32),
526  DAG.getConstant(2, DL, MVT::i32),
527  DAG.getConstant(3, DL, MVT::i32),
528  Op.getOperand(2),
529  Op.getOperand(3),
530  Op.getOperand(4),
531  DAG.getConstant(0, DL, MVT::i32),
532  DAG.getConstant(1, DL, MVT::i32),
533  DAG.getConstant(2, DL, MVT::i32),
534  DAG.getConstant(3, DL, MVT::i32),
535  Op.getOperand(5),
536  Op.getOperand(6),
537  Op.getOperand(7),
538  Op.getOperand(8),
539  Op.getOperand(9),
540  Op.getOperand(10)
541  };
542  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
543  }
544  case Intrinsic::r600_dot4: {
545  SDValue Args[8] = {
546  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
547  DAG.getConstant(0, DL, MVT::i32)),
548  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
549  DAG.getConstant(0, DL, MVT::i32)),
550  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
551  DAG.getConstant(1, DL, MVT::i32)),
552  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
553  DAG.getConstant(1, DL, MVT::i32)),
554  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
555  DAG.getConstant(2, DL, MVT::i32)),
556  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
557  DAG.getConstant(2, DL, MVT::i32)),
558  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
559  DAG.getConstant(3, DL, MVT::i32)),
560  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
561  DAG.getConstant(3, DL, MVT::i32))
562  };
563  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
564  }
565 
566  case Intrinsic::r600_implicitarg_ptr: {
569  return DAG.getConstant(ByteOffset, DL, PtrVT);
570  }
571  case Intrinsic::r600_read_ngroups_x:
572  return LowerImplicitParameter(DAG, VT, DL, 0);
573  case Intrinsic::r600_read_ngroups_y:
574  return LowerImplicitParameter(DAG, VT, DL, 1);
575  case Intrinsic::r600_read_ngroups_z:
576  return LowerImplicitParameter(DAG, VT, DL, 2);
577  case Intrinsic::r600_read_global_size_x:
578  return LowerImplicitParameter(DAG, VT, DL, 3);
579  case Intrinsic::r600_read_global_size_y:
580  return LowerImplicitParameter(DAG, VT, DL, 4);
581  case Intrinsic::r600_read_global_size_z:
582  return LowerImplicitParameter(DAG, VT, DL, 5);
583  case Intrinsic::r600_read_local_size_x:
584  return LowerImplicitParameter(DAG, VT, DL, 6);
585  case Intrinsic::r600_read_local_size_y:
586  return LowerImplicitParameter(DAG, VT, DL, 7);
587  case Intrinsic::r600_read_local_size_z:
588  return LowerImplicitParameter(DAG, VT, DL, 8);
589 
590  case Intrinsic::r600_read_tgid_x:
591  case Intrinsic::amdgcn_workgroup_id_x:
592  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
593  R600::T1_X, VT);
594  case Intrinsic::r600_read_tgid_y:
595  case Intrinsic::amdgcn_workgroup_id_y:
596  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
597  R600::T1_Y, VT);
598  case Intrinsic::r600_read_tgid_z:
599  case Intrinsic::amdgcn_workgroup_id_z:
600  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
601  R600::T1_Z, VT);
602  case Intrinsic::r600_read_tidig_x:
603  case Intrinsic::amdgcn_workitem_id_x:
604  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
605  R600::T0_X, VT);
606  case Intrinsic::r600_read_tidig_y:
607  case Intrinsic::amdgcn_workitem_id_y:
608  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
609  R600::T0_Y, VT);
610  case Intrinsic::r600_read_tidig_z:
611  case Intrinsic::amdgcn_workitem_id_z:
612  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
613  R600::T0_Z, VT);
614 
615  case Intrinsic::r600_recipsqrt_ieee:
616  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
617 
618  case Intrinsic::r600_recipsqrt_clamped:
619  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
620  default:
621  return Op;
622  }
623 
624  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
625  break;
626  }
627  } // end switch(Op.getOpcode())
628  return SDValue();
629 }
630 
633  SelectionDAG &DAG) const {
634  switch (N->getOpcode()) {
635  default:
637  return;
638  case ISD::FP_TO_UINT:
639  if (N->getValueType(0) == MVT::i1) {
640  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
641  return;
642  }
643  // Since we don't care about out of bounds values we can use FP_TO_SINT for
644  // uints too. The DAGLegalizer code for uint considers some extra cases
645  // which are not necessary here.
647  case ISD::FP_TO_SINT: {
648  if (N->getValueType(0) == MVT::i1) {
649  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
650  return;
651  }
652 
653  SDValue Result;
654  if (expandFP_TO_SINT(N, Result, DAG))
655  Results.push_back(Result);
656  return;
657  }
658  case ISD::SDIVREM: {
659  SDValue Op = SDValue(N, 1);
660  SDValue RES = LowerSDIVREM(Op, DAG);
661  Results.push_back(RES);
662  Results.push_back(RES.getValue(1));
663  break;
664  }
665  case ISD::UDIVREM: {
666  SDValue Op = SDValue(N, 0);
667  LowerUDIVREM64(Op, DAG, Results);
668  break;
669  }
670  }
671 }
672 
673 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
674  SDValue Vector) const {
675  SDLoc DL(Vector);
676  EVT VecVT = Vector.getValueType();
677  EVT EltVT = VecVT.getVectorElementType();
679 
680  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
681  Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
682  DAG.getVectorIdxConstant(i, DL)));
683  }
684 
685  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
686 }
687 
688 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
689  SelectionDAG &DAG) const {
690  SDLoc DL(Op);
691  SDValue Vector = Op.getOperand(0);
692  SDValue Index = Op.getOperand(1);
693 
694  if (isa<ConstantSDNode>(Index) ||
696  return Op;
697 
698  Vector = vectorToVerticalVector(DAG, Vector);
699  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
700  Vector, Index);
701 }
702 
703 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
704  SelectionDAG &DAG) const {
705  SDLoc DL(Op);
706  SDValue Vector = Op.getOperand(0);
707  SDValue Value = Op.getOperand(1);
708  SDValue Index = Op.getOperand(2);
709 
710  if (isa<ConstantSDNode>(Index) ||
712  return Op;
713 
714  Vector = vectorToVerticalVector(DAG, Vector);
715  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
716  Vector, Value, Index);
717  return vectorToVerticalVector(DAG, Insert);
718 }
719 
720 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
721  SDValue Op,
722  SelectionDAG &DAG) const {
723  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
726 
727  const DataLayout &DL = DAG.getDataLayout();
728  const GlobalValue *GV = GSD->getGlobal();
730 
731  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
732  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
733 }
734 
735 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
736  // On hw >= R700, COS/SIN input must be between -1. and 1.
737  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
738  EVT VT = Op.getValueType();
739  SDValue Arg = Op.getOperand(0);
740  SDLoc DL(Op);
741 
742  // TODO: Should this propagate fast-math-flags?
743  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
744  DAG.getNode(ISD::FADD, DL, VT,
745  DAG.getNode(ISD::FMUL, DL, VT, Arg,
746  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
747  DAG.getConstantFP(0.5, DL, MVT::f32)));
748  unsigned TrigNode;
749  switch (Op.getOpcode()) {
750  case ISD::FCOS:
751  TrigNode = AMDGPUISD::COS_HW;
752  break;
753  case ISD::FSIN:
754  TrigNode = AMDGPUISD::SIN_HW;
755  break;
756  default:
757  llvm_unreachable("Wrong trig opcode");
758  }
759  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
760  DAG.getNode(ISD::FADD, DL, VT, FractPart,
761  DAG.getConstantFP(-0.5, DL, MVT::f32)));
762  if (Gen >= AMDGPUSubtarget::R700)
763  return TrigVal;
764  // On R600 hw, COS/SIN input must be between -Pi and Pi.
765  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
767 }
768 
769 SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
770  SelectionDAG &DAG) const {
771  SDValue Lo, Hi;
772  expandShiftParts(Op.getNode(), Lo, Hi, DAG);
773  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
774 }
775 
776 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
777  unsigned mainop, unsigned ovf) const {
778  SDLoc DL(Op);
779  EVT VT = Op.getValueType();
780 
781  SDValue Lo = Op.getOperand(0);
782  SDValue Hi = Op.getOperand(1);
783 
784  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
785  // Extend sign.
786  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
787  DAG.getValueType(MVT::i1));
788 
789  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
790 
791  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
792 }
793 
794 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
795  SDLoc DL(Op);
796  return DAG.getNode(
797  ISD::SETCC,
798  DL,
799  MVT::i1,
800  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
801  DAG.getCondCode(ISD::SETEQ));
802 }
803 
804 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
805  SDLoc DL(Op);
806  return DAG.getNode(
807  ISD::SETCC,
808  DL,
809  MVT::i1,
810  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
811  DAG.getCondCode(ISD::SETEQ));
812 }
813 
814 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
815  const SDLoc &DL,
816  unsigned DwordOffset) const {
817  unsigned ByteOffset = DwordOffset * 4;
818  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
820 
821  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
822  assert(isInt<16>(ByteOffset));
823 
824  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
825  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
827 }
828 
829 bool R600TargetLowering::isZero(SDValue Op) const {
830  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
831  return Cst->isZero();
832  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
833  return CstFP->isZero();
834  } else {
835  return false;
836  }
837 }
838 
839 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
840  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
841  return CFP->isExactlyValue(1.0);
842  }
843  return isAllOnesConstant(Op);
844 }
845 
846 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
847  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
848  return CFP->getValueAPF().isZero();
849  }
850  return isNullConstant(Op);
851 }
852 
853 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
854  SDLoc DL(Op);
855  EVT VT = Op.getValueType();
856 
857  SDValue LHS = Op.getOperand(0);
858  SDValue RHS = Op.getOperand(1);
859  SDValue True = Op.getOperand(2);
860  SDValue False = Op.getOperand(3);
861  SDValue CC = Op.getOperand(4);
862  SDValue Temp;
863 
864  if (VT == MVT::f32) {
865  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
866  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
867  if (MinMax)
868  return MinMax;
869  }
870 
871  // LHS and RHS are guaranteed to be the same value type
872  EVT CompareVT = LHS.getValueType();
873 
874  // Check if we can lower this to a native operation.
875 
876  // Try to lower to a SET* instruction:
877  //
878  // SET* can match the following patterns:
879  //
880  // select_cc f32, f32, -1, 0, cc_supported
881  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
882  // select_cc i32, i32, -1, 0, cc_supported
883  //
884 
885  // Move hardware True/False values to the correct operand.
886  if (isHWTrueValue(False) && isHWFalseValue(True)) {
887  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
888  ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
889  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
890  std::swap(False, True);
891  CC = DAG.getCondCode(InverseCC);
892  } else {
893  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
894  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
895  std::swap(False, True);
896  std::swap(LHS, RHS);
897  CC = DAG.getCondCode(SwapInvCC);
898  }
899  }
900  }
901 
902  if (isHWTrueValue(True) && isHWFalseValue(False) &&
903  (CompareVT == VT || VT == MVT::i32)) {
904  // This can be matched by a SET* instruction.
905  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
906  }
907 
908  // Try to lower to a CND* instruction:
909  //
910  // CND* can match the following patterns:
911  //
912  // select_cc f32, 0.0, f32, f32, cc_supported
913  // select_cc f32, 0.0, i32, i32, cc_supported
914  // select_cc i32, 0, f32, f32, cc_supported
915  // select_cc i32, 0, i32, i32, cc_supported
916  //
917 
918  // Try to move the zero value to the RHS
919  if (isZero(LHS)) {
920  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
921  // Try swapping the operands
922  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
923  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
924  std::swap(LHS, RHS);
925  CC = DAG.getCondCode(CCSwapped);
926  } else {
927  // Try inverting the conditon and then swapping the operands
928  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
929  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
930  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
931  std::swap(True, False);
932  std::swap(LHS, RHS);
933  CC = DAG.getCondCode(CCSwapped);
934  }
935  }
936  }
937  if (isZero(RHS)) {
938  SDValue Cond = LHS;
939  SDValue Zero = RHS;
940  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
941  if (CompareVT != VT) {
942  // Bitcast True / False to the correct types. This will end up being
943  // a nop, but it allows us to define only a single pattern in the
944  // .TD files for each CND* instruction rather than having to have
945  // one pattern for integer True/False and one for fp True/False
946  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
947  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
948  }
949 
950  switch (CCOpcode) {
951  case ISD::SETONE:
952  case ISD::SETUNE:
953  case ISD::SETNE:
954  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
955  Temp = True;
956  True = False;
957  False = Temp;
958  break;
959  default:
960  break;
961  }
962  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
963  Cond, Zero,
964  True, False,
965  DAG.getCondCode(CCOpcode));
966  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
967  }
968 
969  // If we make it this for it means we have no native instructions to handle
970  // this SELECT_CC, so we must lower it.
971  SDValue HWTrue, HWFalse;
972 
973  if (CompareVT == MVT::f32) {
974  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
975  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
976  } else if (CompareVT == MVT::i32) {
977  HWTrue = DAG.getConstant(-1, DL, CompareVT);
978  HWFalse = DAG.getConstant(0, DL, CompareVT);
979  }
980  else {
981  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
982  }
983 
984  // Lower this unsupported SELECT_CC into a combination of two supported
985  // SELECT_CC operations.
986  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
987 
988  return DAG.getNode(ISD::SELECT_CC, DL, VT,
989  Cond, HWFalse,
990  True, False,
991  DAG.getCondCode(ISD::SETNE));
992 }
993 
994 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
995 /// convert these pointers to a register index. Each register holds
996 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
997 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
998 /// for indirect addressing.
999 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1000  unsigned StackWidth,
1001  SelectionDAG &DAG) const {
1002  unsigned SRLPad;
1003  switch(StackWidth) {
1004  case 1:
1005  SRLPad = 2;
1006  break;
1007  case 2:
1008  SRLPad = 3;
1009  break;
1010  case 4:
1011  SRLPad = 4;
1012  break;
1013  default: llvm_unreachable("Invalid stack width");
1014  }
1015 
1016  SDLoc DL(Ptr);
1017  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1018  DAG.getConstant(SRLPad, DL, MVT::i32));
1019 }
1020 
1021 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1022  unsigned ElemIdx,
1023  unsigned &Channel,
1024  unsigned &PtrIncr) const {
1025  switch (StackWidth) {
1026  default:
1027  case 1:
1028  Channel = 0;
1029  if (ElemIdx > 0) {
1030  PtrIncr = 1;
1031  } else {
1032  PtrIncr = 0;
1033  }
1034  break;
1035  case 2:
1036  Channel = ElemIdx % 2;
1037  if (ElemIdx == 2) {
1038  PtrIncr = 1;
1039  } else {
1040  PtrIncr = 0;
1041  }
1042  break;
1043  case 4:
1044  Channel = ElemIdx;
1045  PtrIncr = 0;
1046  break;
1047  }
1048 }
1049 
1050 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1051  SelectionDAG &DAG) const {
1052  SDLoc DL(Store);
1053  //TODO: Who creates the i8 stores?
1054  assert(Store->isTruncatingStore()
1055  || Store->getValue().getValueType() == MVT::i8);
1056  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1057 
1058  SDValue Mask;
1059  if (Store->getMemoryVT() == MVT::i8) {
1060  assert(Store->getAlignment() >= 1);
1061  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1062  } else if (Store->getMemoryVT() == MVT::i16) {
1063  assert(Store->getAlignment() >= 2);
1064  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1065  } else {
1066  llvm_unreachable("Unsupported private trunc store");
1067  }
1068 
1069  SDValue OldChain = Store->getChain();
1070  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1071  // Skip dummy
1072  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1073  SDValue BasePtr = Store->getBasePtr();
1074  SDValue Offset = Store->getOffset();
1075  EVT MemVT = Store->getMemoryVT();
1076 
1077  SDValue LoadPtr = BasePtr;
1078  if (!Offset.isUndef()) {
1079  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1080  }
1081 
1082  // Get dword location
1083  // TODO: this should be eliminated by the future SHR ptr, 2
1084  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1085  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1086 
1087  // Load dword
1088  // TODO: can we be smarter about machine pointer info?
1090  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1091 
1092  Chain = Dst.getValue(1);
1093 
1094  // Get offset in dword
1095  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1096  DAG.getConstant(0x3, DL, MVT::i32));
1097 
1098  // Convert byte offset to bit shift
1099  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1100  DAG.getConstant(3, DL, MVT::i32));
1101 
1102  // TODO: Contrary to the name of the functiom,
1103  // it also handles sub i32 non-truncating stores (like i1)
1104  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1105  Store->getValue());
1106 
1107  // Mask the value to the right type
1108  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1109 
1110  // Shift the value in place
1111  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1112  MaskedValue, ShiftAmt);
1113 
1114  // Shift the mask in place
1115  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1116 
1117  // Invert the mask. NOTE: if we had native ROL instructions we could
1118  // use inverted mask
1119  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1120 
1121  // Cleanup the target bits
1122  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1123 
1124  // Add the new bits
1125  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1126 
1127  // Store dword
1128  // TODO: Can we be smarter about MachinePointerInfo?
1129  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1130 
1131  // If we are part of expanded vector, make our neighbors depend on this store
1132  if (VectorTrunc) {
1133  // Make all other vector elements depend on this store
1134  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1135  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1136  }
1137  return NewStore;
1138 }
1139 
1140 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1141  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1142  unsigned AS = StoreNode->getAddressSpace();
1143 
1144  SDValue Chain = StoreNode->getChain();
1145  SDValue Ptr = StoreNode->getBasePtr();
1146  SDValue Value = StoreNode->getValue();
1147 
1148  EVT VT = Value.getValueType();
1149  EVT MemVT = StoreNode->getMemoryVT();
1150  EVT PtrVT = Ptr.getValueType();
1151 
1152  SDLoc DL(Op);
1153 
1154  const bool TruncatingStore = StoreNode->isTruncatingStore();
1155 
1156  // Neither LOCAL nor PRIVATE can do vectors at the moment
1157  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1158  TruncatingStore) &&
1159  VT.isVector()) {
1160  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1161  // Add an extra level of chain to isolate this vector
1162  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1163  // TODO: can the chain be replaced without creating a new store?
1164  SDValue NewStore = DAG.getTruncStore(
1165  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1166  MemVT, StoreNode->getAlignment(),
1167  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1168  StoreNode = cast<StoreSDNode>(NewStore);
1169  }
1170 
1171  return scalarizeVectorStore(StoreNode, DAG);
1172  }
1173 
1174  Align Alignment = StoreNode->getAlign();
1175  if (Alignment < MemVT.getStoreSize() &&
1176  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1177  StoreNode->getMemOperand()->getFlags(),
1178  nullptr)) {
1179  return expandUnalignedStore(StoreNode, DAG);
1180  }
1181 
1182  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1183  DAG.getConstant(2, DL, PtrVT));
1184 
1185  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1186  // It is beneficial to create MSKOR here instead of combiner to avoid
1187  // artificial dependencies introduced by RMW
1188  if (TruncatingStore) {
1189  assert(VT.bitsLE(MVT::i32));
1190  SDValue MaskConstant;
1191  if (MemVT == MVT::i8) {
1192  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1193  } else {
1194  assert(MemVT == MVT::i16);
1195  assert(StoreNode->getAlignment() >= 2);
1196  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1197  }
1198 
1199  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1200  DAG.getConstant(0x00000003, DL, PtrVT));
1201  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1202  DAG.getConstant(3, DL, VT));
1203 
1204  // Put the mask in correct place
1205  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1206 
1207  // Put the value bits in correct place
1208  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1209  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1210 
1211  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1212  // vector instead.
1213  SDValue Src[4] = {
1214  ShiftedValue,
1215  DAG.getConstant(0, DL, MVT::i32),
1216  DAG.getConstant(0, DL, MVT::i32),
1217  Mask
1218  };
1219  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1220  SDValue Args[3] = { Chain, Input, DWordAddr };
1222  Op->getVTList(), Args, MemVT,
1223  StoreNode->getMemOperand());
1224  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1225  // Convert pointer from byte address to dword address.
1226  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1227 
1228  if (StoreNode->isIndexed()) {
1229  llvm_unreachable("Indexed stores not supported yet");
1230  } else {
1231  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1232  }
1233  return Chain;
1234  }
1235  }
1236 
1237  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1238  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1239  return SDValue();
1240 
1241  if (MemVT.bitsLT(MVT::i32))
1242  return lowerPrivateTruncStore(StoreNode, DAG);
1243 
1244  // Standard i32+ store, tag it with DWORDADDR to note that the address
1245  // has been shifted
1246  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1247  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1248  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1249  }
1250 
1251  // Tagged i32+ stores will be matched by patterns
1252  return SDValue();
1253 }
1254 
1255 // return (512 + (kc_bank << 12)
1256 static int
1258  switch (AddressSpace) {
1260  return 512;
1262  return 512 + 4096;
1264  return 512 + 4096 * 2;
1266  return 512 + 4096 * 3;
1268  return 512 + 4096 * 4;
1270  return 512 + 4096 * 5;
1272  return 512 + 4096 * 6;
1274  return 512 + 4096 * 7;
1276  return 512 + 4096 * 8;
1278  return 512 + 4096 * 9;
1280  return 512 + 4096 * 10;
1282  return 512 + 4096 * 11;
1284  return 512 + 4096 * 12;
1286  return 512 + 4096 * 13;
1288  return 512 + 4096 * 14;
1290  return 512 + 4096 * 15;
1291  default:
1292  return -1;
1293  }
1294 }
1295 
1296 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1297  SelectionDAG &DAG) const {
1298  SDLoc DL(Op);
1299  LoadSDNode *Load = cast<LoadSDNode>(Op);
1300  ISD::LoadExtType ExtType = Load->getExtensionType();
1301  EVT MemVT = Load->getMemoryVT();
1302  assert(Load->getAlignment() >= MemVT.getStoreSize());
1303 
1304  SDValue BasePtr = Load->getBasePtr();
1305  SDValue Chain = Load->getChain();
1306  SDValue Offset = Load->getOffset();
1307 
1308  SDValue LoadPtr = BasePtr;
1309  if (!Offset.isUndef()) {
1310  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1311  }
1312 
1313  // Get dword location
1314  // NOTE: this should be eliminated by the future SHR ptr, 2
1315  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1316  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1317 
1318  // Load dword
1319  // TODO: can we be smarter about machine pointer info?
1321  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1322 
1323  // Get offset within the register.
1324  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1325  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1326 
1327  // Bit offset of target byte (byteIdx * 8).
1328  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1329  DAG.getConstant(3, DL, MVT::i32));
1330 
1331  // Shift to the right.
1332  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1333 
1334  // Eliminate the upper bits by setting them to ...
1335  EVT MemEltVT = MemVT.getScalarType();
1336 
1337  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1338  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1339  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1340  } else { // ... or zeros.
1341  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1342  }
1343 
1344  SDValue Ops[] = {
1345  Ret,
1346  Read.getValue(1) // This should be our output chain
1347  };
1348 
1349  return DAG.getMergeValues(Ops, DL);
1350 }
1351 
1352 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1353  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1354  unsigned AS = LoadNode->getAddressSpace();
1355  EVT MemVT = LoadNode->getMemoryVT();
1356  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1357 
1358  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1359  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1360  return lowerPrivateExtLoad(Op, DAG);
1361  }
1362 
1363  SDLoc DL(Op);
1364  EVT VT = Op.getValueType();
1365  SDValue Chain = LoadNode->getChain();
1366  SDValue Ptr = LoadNode->getBasePtr();
1367 
1368  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1369  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1370  VT.isVector()) {
1371  SDValue Ops[2];
1372  std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1373  return DAG.getMergeValues(Ops, DL);
1374  }
1375 
1376  // This is still used for explicit load from addrspace(8)
1377  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1378  if (ConstantBlock > -1 &&
1379  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1380  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1381  SDValue Result;
1382  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1383  isa<ConstantSDNode>(Ptr)) {
1384  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1385  } else {
1386  //TODO: Does this even work?
1387  // non-constant ptr can't be folded, keeps it as a v4f32 load
1389  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1390  DAG.getConstant(4, DL, MVT::i32)),
1391  DAG.getConstant(LoadNode->getAddressSpace() -
1393  );
1394  }
1395 
1396  if (!VT.isVector()) {
1398  DAG.getConstant(0, DL, MVT::i32));
1399  }
1400 
1401  SDValue MergedValues[2] = {
1402  Result,
1403  Chain
1404  };
1405  return DAG.getMergeValues(MergedValues, DL);
1406  }
1407 
1408  // For most operations returning SDValue() will result in the node being
1409  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1410  // need to manually expand loads that may be legal in some address spaces and
1411  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1412  // compute shaders, since the data is sign extended when it is uploaded to the
1413  // buffer. However SEXT loads from other address spaces are not supported, so
1414  // we need to expand them here.
1415  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1416  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1417  SDValue NewLoad = DAG.getExtLoad(
1418  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1419  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1420  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1421  DAG.getValueType(MemVT));
1422 
1423  SDValue MergedValues[2] = { Res, Chain };
1424  return DAG.getMergeValues(MergedValues, DL);
1425  }
1426 
1427  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1428  return SDValue();
1429  }
1430 
1431  // DWORDADDR ISD marks already shifted address
1432  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1433  assert(VT == MVT::i32);
1434  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1435  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1436  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1437  }
1438  return SDValue();
1439 }
1440 
1441 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1442  SDValue Chain = Op.getOperand(0);
1443  SDValue Cond = Op.getOperand(1);
1444  SDValue Jump = Op.getOperand(2);
1445 
1446  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1447  Chain, Jump, Cond);
1448 }
1449 
1450 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1451  SelectionDAG &DAG) const {
1452  MachineFunction &MF = DAG.getMachineFunction();
1453  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1454 
1455  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1456 
1457  unsigned FrameIndex = FIN->getIndex();
1458  Register IgnoredFrameReg;
1460  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1461  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1462  SDLoc(Op), Op.getValueType());
1463 }
1464 
1466  bool IsVarArg) const {
1467  switch (CC) {
1470  case CallingConv::C:
1471  case CallingConv::Fast:
1472  case CallingConv::Cold:
1473  llvm_unreachable("kernels should not be handled here");
1481  return CC_R600;
1482  default:
1483  report_fatal_error("Unsupported calling convention.");
1484  }
1485 }
1486 
1487 /// XXX Only kernel functions are supported, so we can assume for now that
1488 /// every function is a kernel function, but in the future we should use
1489 /// separate calling conventions for kernel and non-kernel functions.
1491  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1492  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1493  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1495  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1496  *DAG.getContext());
1497  MachineFunction &MF = DAG.getMachineFunction();
1499 
1500  if (AMDGPU::isShader(CallConv)) {
1501  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1502  } else {
1504  }
1505 
1506  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1507  CCValAssign &VA = ArgLocs[i];
1508  const ISD::InputArg &In = Ins[i];
1509  EVT VT = In.VT;
1510  EVT MemVT = VA.getLocVT();
1511  if (!VT.isVector() && MemVT.isVector()) {
1512  // Get load source type if scalarized.
1513  MemVT = MemVT.getVectorElementType();
1514  }
1515 
1516  if (AMDGPU::isShader(CallConv)) {
1517  Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1518  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1519  InVals.push_back(Register);
1520  continue;
1521  }
1522 
1523  // i64 isn't a legal type, so the register type used ends up as i32, which
1524  // isn't expected here. It attempts to create this sextload, but it ends up
1525  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1526  // for <1 x i64>.
1527 
1528  // The first 36 bytes of the input buffer contains information about
1529  // thread group and global sizes.
1531  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1532  // FIXME: This should really check the extload type, but the handling of
1533  // extload vector parameters seems to be broken.
1534 
1535  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1536  Ext = ISD::SEXTLOAD;
1537  }
1538 
1539  // Compute the offset from the value.
1540  // XXX - I think PartOffset should give you this, but it seems to give the
1541  // size of the register which isn't useful.
1542 
1543  unsigned PartOffset = VA.getLocMemOffset();
1544  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1545 
1547  SDValue Arg = DAG.getLoad(
1548  ISD::UNINDEXED, Ext, VT, DL, Chain,
1549  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1550  PtrInfo,
1551  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1554 
1555  InVals.push_back(Arg);
1556  }
1557  return Chain;
1558 }
1559 
1561  EVT VT) const {
1562  if (!VT.isVector())
1563  return MVT::i32;
1565 }
1566 
1568  const MachineFunction &MF) const {
1569  // Local and Private addresses do not handle vectors. Limit to i32
1570  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1571  return (MemVT.getSizeInBits() <= 32);
1572  }
1573  return true;
1574 }
1575 
1577  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1578  bool *IsFast) const {
1579  if (IsFast)
1580  *IsFast = false;
1581 
1582  if (!VT.isSimple() || VT == MVT::Other)
1583  return false;
1584 
1585  if (VT.bitsLT(MVT::i32))
1586  return false;
1587 
1588  // TODO: This is a rough estimate.
1589  if (IsFast)
1590  *IsFast = true;
1591 
1592  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1593 }
1594 
1596  SelectionDAG &DAG, SDValue VectorEntry,
1597  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1598  assert(RemapSwizzle.empty());
1599 
1600  SDLoc DL(VectorEntry);
1601  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1602 
1603  SDValue NewBldVec[4];
1604  for (unsigned i = 0; i < 4; i++)
1605  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1606  DAG.getIntPtrConstant(i, DL));
1607 
1608  for (unsigned i = 0; i < 4; i++) {
1609  if (NewBldVec[i].isUndef())
1610  // We mask write here to teach later passes that the ith element of this
1611  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1612  // break false dependencies and additionnaly make assembly easier to read.
1613  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1614  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1615  if (C->isZero()) {
1616  RemapSwizzle[i] = 4; // SEL_0
1617  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1618  } else if (C->isExactlyValue(1.0)) {
1619  RemapSwizzle[i] = 5; // SEL_1
1620  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1621  }
1622  }
1623 
1624  if (NewBldVec[i].isUndef())
1625  continue;
1626 
1627  for (unsigned j = 0; j < i; j++) {
1628  if (NewBldVec[i] == NewBldVec[j]) {
1629  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1630  RemapSwizzle[i] = j;
1631  break;
1632  }
1633  }
1634  }
1635 
1636  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1637  NewBldVec);
1638 }
1639 
1641  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1642  assert(RemapSwizzle.empty());
1643 
1644  SDLoc DL(VectorEntry);
1645  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1646 
1647  SDValue NewBldVec[4];
1648  bool isUnmovable[4] = {false, false, false, false};
1649  for (unsigned i = 0; i < 4; i++)
1650  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1651  DAG.getIntPtrConstant(i, DL));
1652 
1653  for (unsigned i = 0; i < 4; i++) {
1654  RemapSwizzle[i] = i;
1655  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1656  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1657  ->getZExtValue();
1658  if (i == Idx)
1659  isUnmovable[Idx] = true;
1660  }
1661  }
1662 
1663  for (unsigned i = 0; i < 4; i++) {
1664  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1665  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1666  ->getZExtValue();
1667  if (isUnmovable[Idx])
1668  continue;
1669  // Swap i and Idx
1670  std::swap(NewBldVec[Idx], NewBldVec[i]);
1671  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1672  break;
1673  }
1674  }
1675 
1676  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1677  NewBldVec);
1678 }
1679 
1680 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1681  SelectionDAG &DAG,
1682  const SDLoc &DL) const {
1683  // Old -> New swizzle values
1684  DenseMap<unsigned, unsigned> SwizzleRemap;
1685 
1686  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1687  for (unsigned i = 0; i < 4; i++) {
1688  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1689  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1690  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1691  }
1692 
1693  SwizzleRemap.clear();
1694  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1695  for (unsigned i = 0; i < 4; i++) {
1696  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1697  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1698  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1699  }
1700 
1701  return BuildVector;
1702 }
1703 
1704 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1705  SelectionDAG &DAG) const {
1706  SDLoc DL(LoadNode);
1707  EVT VT = LoadNode->getValueType(0);
1708  SDValue Chain = LoadNode->getChain();
1709  SDValue Ptr = LoadNode->getBasePtr();
1710  assert (isa<ConstantSDNode>(Ptr));
1711 
1712  //TODO: Support smaller loads
1713  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1714  return SDValue();
1715 
1716  if (LoadNode->getAlignment() < 4)
1717  return SDValue();
1718 
1719  int ConstantBlock = ConstantAddressBlock(Block);
1720 
1721  SDValue Slots[4];
1722  for (unsigned i = 0; i < 4; i++) {
1723  // We want Const position encoded with the following formula :
1724  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1725  // const_index is Ptr computed by llvm using an alignment of 16.
1726  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1727  // then div by 4 at the ISel step
1728  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1729  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1730  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1731  }
1732  EVT NewVT = MVT::v4i32;
1733  unsigned NumElements = 4;
1734  if (VT.isVector()) {
1735  NewVT = VT;
1736  NumElements = VT.getVectorNumElements();
1737  }
1738  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1739  if (!VT.isVector()) {
1741  DAG.getConstant(0, DL, MVT::i32));
1742  }
1743  SDValue MergedValues[2] = {
1744  Result,
1745  Chain
1746  };
1747  return DAG.getMergeValues(MergedValues, DL);
1748 }
1749 
1750 //===----------------------------------------------------------------------===//
1751 // Custom DAG Optimizations
1752 //===----------------------------------------------------------------------===//
1753 
1755  DAGCombinerInfo &DCI) const {
1756  SelectionDAG &DAG = DCI.DAG;
1757  SDLoc DL(N);
1758 
1759  switch (N->getOpcode()) {
1760  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1761  case ISD::FP_ROUND: {
1762  SDValue Arg = N->getOperand(0);
1763  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1764  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1765  Arg.getOperand(0));
1766  }
1767  break;
1768  }
1769 
1770  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1771  // (i32 select_cc f32, f32, -1, 0 cc)
1772  //
1773  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1774  // this to one of the SET*_DX10 instructions.
1775  case ISD::FP_TO_SINT: {
1776  SDValue FNeg = N->getOperand(0);
1777  if (FNeg.getOpcode() != ISD::FNEG) {
1778  return SDValue();
1779  }
1780  SDValue SelectCC = FNeg.getOperand(0);
1781  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1782  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1783  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1784  !isHWTrueValue(SelectCC.getOperand(2)) ||
1785  !isHWFalseValue(SelectCC.getOperand(3))) {
1786  return SDValue();
1787  }
1788 
1789  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1790  SelectCC.getOperand(0), // LHS
1791  SelectCC.getOperand(1), // RHS
1792  DAG.getConstant(-1, DL, MVT::i32), // True
1793  DAG.getConstant(0, DL, MVT::i32), // False
1794  SelectCC.getOperand(4)); // CC
1795  }
1796 
1797  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1798  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1799  case ISD::INSERT_VECTOR_ELT: {
1800  SDValue InVec = N->getOperand(0);
1801  SDValue InVal = N->getOperand(1);
1802  SDValue EltNo = N->getOperand(2);
1803 
1804  // If the inserted element is an UNDEF, just use the input vector.
1805  if (InVal.isUndef())
1806  return InVec;
1807 
1808  EVT VT = InVec.getValueType();
1809 
1810  // If we can't generate a legal BUILD_VECTOR, exit
1812  return SDValue();
1813 
1814  // Check that we know which element is being inserted
1815  if (!isa<ConstantSDNode>(EltNo))
1816  return SDValue();
1817  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1818 
1819  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1820  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1821  // vector elements.
1823  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1824  Ops.append(InVec.getNode()->op_begin(),
1825  InVec.getNode()->op_end());
1826  } else if (InVec.isUndef()) {
1827  unsigned NElts = VT.getVectorNumElements();
1828  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1829  } else {
1830  return SDValue();
1831  }
1832 
1833  // Insert the element
1834  if (Elt < Ops.size()) {
1835  // All the operands of BUILD_VECTOR must have the same type;
1836  // we enforce that here.
1837  EVT OpVT = Ops[0].getValueType();
1838  if (InVal.getValueType() != OpVT)
1839  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1840  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1841  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1842  Ops[Elt] = InVal;
1843  }
1844 
1845  // Return the new vector
1846  return DAG.getBuildVector(VT, DL, Ops);
1847  }
1848 
1849  // Extract_vec (Build_vector) generated by custom lowering
1850  // also needs to be customly combined
1851  case ISD::EXTRACT_VECTOR_ELT: {
1852  SDValue Arg = N->getOperand(0);
1853  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1854  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1855  unsigned Element = Const->getZExtValue();
1856  return Arg->getOperand(Element);
1857  }
1858  }
1859  if (Arg.getOpcode() == ISD::BITCAST &&
1860  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1861  (Arg.getOperand(0).getValueType().getVectorNumElements() ==
1862  Arg.getValueType().getVectorNumElements())) {
1863  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1864  unsigned Element = Const->getZExtValue();
1865  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1866  Arg->getOperand(0).getOperand(Element));
1867  }
1868  }
1869  break;
1870  }
1871 
1872  case ISD::SELECT_CC: {
1873  // Try common optimizations
1875  return Ret;
1876 
1877  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1878  // selectcc x, y, a, b, inv(cc)
1879  //
1880  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1881  // selectcc x, y, a, b, cc
1882  SDValue LHS = N->getOperand(0);
1883  if (LHS.getOpcode() != ISD::SELECT_CC) {
1884  return SDValue();
1885  }
1886 
1887  SDValue RHS = N->getOperand(1);
1888  SDValue True = N->getOperand(2);
1889  SDValue False = N->getOperand(3);
1890  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1891 
1892  if (LHS.getOperand(2).getNode() != True.getNode() ||
1893  LHS.getOperand(3).getNode() != False.getNode() ||
1894  RHS.getNode() != False.getNode()) {
1895  return SDValue();
1896  }
1897 
1898  switch (NCC) {
1899  default: return SDValue();
1900  case ISD::SETNE: return LHS;
1901  case ISD::SETEQ: {
1902  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1903  LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1904  if (DCI.isBeforeLegalizeOps() ||
1906  return DAG.getSelectCC(DL,
1907  LHS.getOperand(0),
1908  LHS.getOperand(1),
1909  LHS.getOperand(2),
1910  LHS.getOperand(3),
1911  LHSCC);
1912  break;
1913  }
1914  }
1915  return SDValue();
1916  }
1917 
1918  case AMDGPUISD::R600_EXPORT: {
1919  SDValue Arg = N->getOperand(1);
1920  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1921  break;
1922 
1923  SDValue NewArgs[8] = {
1924  N->getOperand(0), // Chain
1925  SDValue(),
1926  N->getOperand(2), // ArrayBase
1927  N->getOperand(3), // Type
1928  N->getOperand(4), // SWZ_X
1929  N->getOperand(5), // SWZ_Y
1930  N->getOperand(6), // SWZ_Z
1931  N->getOperand(7) // SWZ_W
1932  };
1933  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1934  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1935  }
1936  case AMDGPUISD::TEXTURE_FETCH: {
1937  SDValue Arg = N->getOperand(1);
1938  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1939  break;
1940 
1941  SDValue NewArgs[19] = {
1942  N->getOperand(0),
1943  N->getOperand(1),
1944  N->getOperand(2),
1945  N->getOperand(3),
1946  N->getOperand(4),
1947  N->getOperand(5),
1948  N->getOperand(6),
1949  N->getOperand(7),
1950  N->getOperand(8),
1951  N->getOperand(9),
1952  N->getOperand(10),
1953  N->getOperand(11),
1954  N->getOperand(12),
1955  N->getOperand(13),
1956  N->getOperand(14),
1957  N->getOperand(15),
1958  N->getOperand(16),
1959  N->getOperand(17),
1960  N->getOperand(18),
1961  };
1962  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1963  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1964  }
1965 
1966  case ISD::LOAD: {
1967  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1968  SDValue Ptr = LoadNode->getBasePtr();
1969  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1970  isa<ConstantSDNode>(Ptr))
1971  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1972  break;
1973  }
1974 
1975  default: break;
1976  }
1977 
1979 }
1980 
1981 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1982  SDValue &Src, SDValue &Neg, SDValue &Abs,
1983  SDValue &Sel, SDValue &Imm,
1984  SelectionDAG &DAG) const {
1985  const R600InstrInfo *TII = Subtarget->getInstrInfo();
1986  if (!Src.isMachineOpcode())
1987  return false;
1988 
1989  switch (Src.getMachineOpcode()) {
1990  case R600::FNEG_R600:
1991  if (!Neg.getNode())
1992  return false;
1993  Src = Src.getOperand(0);
1994  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1995  return true;
1996  case R600::FABS_R600:
1997  if (!Abs.getNode())
1998  return false;
1999  Src = Src.getOperand(0);
2000  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2001  return true;
2002  case R600::CONST_COPY: {
2003  unsigned Opcode = ParentNode->getMachineOpcode();
2004  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2005 
2006  if (!Sel.getNode())
2007  return false;
2008 
2009  SDValue CstOffset = Src.getOperand(0);
2010  if (ParentNode->getValueType(0).isVector())
2011  return false;
2012 
2013  // Gather constants values
2014  int SrcIndices[] = {
2015  TII->getOperandIdx(Opcode, R600::OpName::src0),
2016  TII->getOperandIdx(Opcode, R600::OpName::src1),
2017  TII->getOperandIdx(Opcode, R600::OpName::src2),
2018  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2019  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2020  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2021  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2022  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2023  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2024  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2025  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2026  };
2027  std::vector<unsigned> Consts;
2028  for (int OtherSrcIdx : SrcIndices) {
2029  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2030  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2031  continue;
2032  if (HasDst) {
2033  OtherSrcIdx--;
2034  OtherSelIdx--;
2035  }
2036  if (RegisterSDNode *Reg =
2037  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2038  if (Reg->getReg() == R600::ALU_CONST) {
2039  ConstantSDNode *Cst
2040  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2041  Consts.push_back(Cst->getZExtValue());
2042  }
2043  }
2044  }
2045 
2046  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2047  Consts.push_back(Cst->getZExtValue());
2048  if (!TII->fitsConstReadLimitations(Consts)) {
2049  return false;
2050  }
2051 
2052  Sel = CstOffset;
2053  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2054  return true;
2055  }
2056  case R600::MOV_IMM_GLOBAL_ADDR:
2057  // Check if the Imm slot is used. Taken from below.
2058  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2059  return false;
2060  Imm = Src.getOperand(0);
2061  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2062  return true;
2063  case R600::MOV_IMM_I32:
2064  case R600::MOV_IMM_F32: {
2065  unsigned ImmReg = R600::ALU_LITERAL_X;
2066  uint64_t ImmValue = 0;
2067 
2068  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2069  ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2070  float FloatValue = FPC->getValueAPF().convertToFloat();
2071  if (FloatValue == 0.0) {
2072  ImmReg = R600::ZERO;
2073  } else if (FloatValue == 0.5) {
2074  ImmReg = R600::HALF;
2075  } else if (FloatValue == 1.0) {
2076  ImmReg = R600::ONE;
2077  } else {
2078  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2079  }
2080  } else {
2081  ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
2082  uint64_t Value = C->getZExtValue();
2083  if (Value == 0) {
2084  ImmReg = R600::ZERO;
2085  } else if (Value == 1) {
2086  ImmReg = R600::ONE_INT;
2087  } else {
2088  ImmValue = Value;
2089  }
2090  }
2091 
2092  // Check that we aren't already using an immediate.
2093  // XXX: It's possible for an instruction to have more than one
2094  // immediate operand, but this is not supported yet.
2095  if (ImmReg == R600::ALU_LITERAL_X) {
2096  if (!Imm.getNode())
2097  return false;
2098  ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2099  if (C->getZExtValue())
2100  return false;
2101  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2102  }
2103  Src = DAG.getRegister(ImmReg, MVT::i32);
2104  return true;
2105  }
2106  default:
2107  return false;
2108  }
2109 }
2110 
2111 /// Fold the instructions after selecting them
2112 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2113  SelectionDAG &DAG) const {
2114  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2115  if (!Node->isMachineOpcode())
2116  return Node;
2117 
2118  unsigned Opcode = Node->getMachineOpcode();
2119  SDValue FakeOp;
2120 
2121  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2122 
2123  if (Opcode == R600::DOT_4) {
2124  int OperandIdx[] = {
2125  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2126  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2127  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2128  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2129  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2130  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2131  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2132  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2133  };
2134  int NegIdx[] = {
2135  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2136  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2137  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2138  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2139  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2140  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2141  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2142  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2143  };
2144  int AbsIdx[] = {
2145  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2146  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2147  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2148  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2149  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2150  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2151  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2152  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2153  };
2154  for (unsigned i = 0; i < 8; i++) {
2155  if (OperandIdx[i] < 0)
2156  return Node;
2157  SDValue &Src = Ops[OperandIdx[i] - 1];
2158  SDValue &Neg = Ops[NegIdx[i] - 1];
2159  SDValue &Abs = Ops[AbsIdx[i] - 1];
2160  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2161  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2162  if (HasDst)
2163  SelIdx--;
2164  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2165  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2166  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2167  }
2168  } else if (Opcode == R600::REG_SEQUENCE) {
2169  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2170  SDValue &Src = Ops[i];
2171  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2172  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2173  }
2174  } else {
2175  if (!TII->hasInstrModifiers(Opcode))
2176  return Node;
2177  int OperandIdx[] = {
2178  TII->getOperandIdx(Opcode, R600::OpName::src0),
2179  TII->getOperandIdx(Opcode, R600::OpName::src1),
2180  TII->getOperandIdx(Opcode, R600::OpName::src2)
2181  };
2182  int NegIdx[] = {
2183  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2184  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2185  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2186  };
2187  int AbsIdx[] = {
2188  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2189  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2190  -1
2191  };
2192  for (unsigned i = 0; i < 3; i++) {
2193  if (OperandIdx[i] < 0)
2194  return Node;
2195  SDValue &Src = Ops[OperandIdx[i] - 1];
2196  SDValue &Neg = Ops[NegIdx[i] - 1];
2197  SDValue FakeAbs;
2198  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2199  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2200  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2201  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2202  if (HasDst) {
2203  SelIdx--;
2204  ImmIdx--;
2205  }
2206  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2207  SDValue &Imm = Ops[ImmIdx];
2208  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2209  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2210  }
2211  }
2212 
2213  return Node;
2214 }
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1368
llvm::AMDGPUISD::STORE_MSKOR
@ STORE_MSKOR
Definition: AMDGPUISelLowering.h:485
llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition: TargetLowering.cpp:7433
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1556
llvm::RegisterSDNode
Definition: SelectionDAGNodes.h:2098
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4915
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2331
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2134
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1379
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1364
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:103
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
---------------------— PointerInfo ------------------------------------—
Definition: AllocatorList.h:23
Reg
unsigned Reg
Definition: MachineSink.cpp:1566
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:966
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1086
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:197
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:112
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: SelectionDAGISel.cpp:294
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:848
llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition: AMDGPU.h:380
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1380
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:152
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:674
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1722
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:691
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:223
llvm::R600FrameLowering
Definition: R600FrameLowering.h:16
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1168
llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition: AMDGPUISelLowering.h:437
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1375
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8551
llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition: AMDGPU.h:378
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:65
llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:56
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2746
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:145
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1508
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1336
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1359
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:455
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3550
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2281
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1370
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:90
isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition: R600ISelLowering.cpp:258
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:715
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1318
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7512
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:143
llvm::AMDGPUISD::CARRY
@ CARRY
Definition: AMDGPUISelLowering.h:417
llvm::R600Subtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:98
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::R600Subtarget::hasCARRY
bool hasCARRY() const
Definition: R600Subtarget.h:109
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:81
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:847
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: AMDGPUISelLowering.cpp:1320
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition: AMDGPU.h:374
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1335
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2269
llvm::R600Subtarget::hasFFBL
bool hasFFBL() const
Definition: R600Subtarget.h:117
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:281
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
R600ISelLowering.h
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1366
llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition: TargetLowering.cpp:7752
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::R600MachineFunctionInfo
Definition: R600MachineFunctionInfo.h:19
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:956
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:1985
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:912
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:912
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:206
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1411
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:885
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1335
llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition: R600ISelLowering.cpp:1490
llvm::R600Subtarget::hasBFE
bool hasBFE() const
Definition: R600Subtarget.h:90
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:963
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7462
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:629
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
llvm::R600Subtarget::hasFMA
bool hasFMA() const
Definition: R600Subtarget.h:125
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:636
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1121
llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:387
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1359
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3556
llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition: AMDGPU.h:385
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7563
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:648
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:724
MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition: AMDGPUISelLowering.h:436
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:211
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Definition: TargetLowering.h:1364
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1373
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3544
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:234
llvm::AMDGPUISD::DOT4
@ DOT4
Definition: AMDGPUISelLowering.h:416
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:129
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2181
llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition: AMDGPU.h:379
MO_FLAG_MASK
#define MO_FLAG_MASK
Definition: R600Defines.h:17
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:140
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1453
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:881
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1118
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1744
llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition: R600ISelLowering.cpp:1465
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7239
R600InstrInfo.h
llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition: R600ISelLowering.cpp:1567
llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition: R600ISelLowering.cpp:1576
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1361
Align
uint64_t Align
Definition: ELFObjHandler.cpp:83
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1730
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1132
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1304
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1218
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2129
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:400
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:273
llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:1345
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:91
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:78
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition: R600ISelLowering.cpp:1257
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2325
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1362
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1367
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:231
R600MachineFunctionInfo.h
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:77
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:471
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1371
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
R600MCTargetDesc.h
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2198
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:94
llvm::pdb::OMFSegDescFlags::Read
@ Read
llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition: AMDGPUISelLowering.cpp:1817
llvm::R600Subtarget
Definition: R600Subtarget.h:35
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:2087
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition: AMDGPU.h:381
llvm::R600Subtarget::hasBORROW
bool hasBORROW() const
Definition: R600Subtarget.h:105
llvm::AMDGPUISD::BORROW
@ BORROW
Definition: AMDGPUISelLowering.h:418
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition: TargetLowering.cpp:6799
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
Index
uint32_t Index
Definition: ELFObjHandler.cpp:84
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2164
uint64_t
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:786
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1605
llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition: R600FrameLowering.cpp:18
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1340
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:921
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1496
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:204
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:198
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1757
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:68
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< unsigned, unsigned >
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::R600Subtarget::hasBFI
bool hasBFI() const
Definition: R600Subtarget.h:94
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:675
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:904
I
#define I(x, y, z)
Definition: MD5.cpp:59
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1755
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8316
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1349
llvm::R600::getLDSNoRetOp
int getLDSNoRetOp(uint16_t Opcode)
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2296
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:7523
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:131
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:354
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2309
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:172
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
llvm::R600Subtarget::hasFFBH
bool hasFFBH() const
Definition: R600Subtarget.h:121
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:79
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:384
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1369
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1571
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:6747
llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition: AMDGPUISelLowering.h:435
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:409
llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition: AMDGPU.h:376
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:141
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1087
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1355
MO_FLAG_NEG
#define MO_FLAG_NEG
Definition: R600Defines.h:15
MO_FLAG_ABS
#define MO_FLAG_ABS
Definition: R600Defines.h:16
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8989
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:2901
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2330
llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:716
llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition: AMDGPUISelLowering.cpp:1430
llvm::MachineFunction
Definition: MachineFunction.h:230
llvm::R600Subtarget::getFrameLowering
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:58
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:653
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1304
llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition: AMDGPUISelLowering.cpp:4314
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:100
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPU.h:364
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:101
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
AMDGPU.h
llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition: AMDGPU.h:382
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:136
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:353
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9577
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:155
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: AMDGPUISelLowering.cpp:1278
llvm::MinMax
Definition: AssumeBundleQueries.h:72
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1129
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition: AMDGPU.h:373
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2264
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2218
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:477
llvm::SDValue::getSimpleValueType
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:183
llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition: AMDGPU.h:386
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:273
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:109
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1335
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1552
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2300
llvm::DenseMapBase::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition: AMDGPUISelLowering.h:290
llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition: AMDGPU.h:375
j
return j(j<< 16)
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1378
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:9941
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:355
llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition: AMDGPUISelLowering.cpp:1055
CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1595
llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: R600ISelLowering.cpp:448
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2206
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1718
llvm::AMDGPUISD::RSQ_CLAMP
@ RSQ_CLAMP
Definition: AMDGPUISelLowering.h:413
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:9931
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:321
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1347
llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition: AMDGPU.h:383
llvm::MemSDNode::getAlignment
unsigned getAlignment() const
Definition: SelectionDAGNodes.h:1274
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:871
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:879
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:454
llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: R600ISelLowering.cpp:1754
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:870
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:372
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:201
llvm::R600Subtarget::getRegisterInfo
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:66
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
R600Subtarget.h
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1365
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:138
llvm::R600InstrInfo
Definition: R600InstrInfo.h:39
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:922
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:107
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:717
llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: R600ISelLowering.cpp:1560
llvm::AMDGPUISD::BRANCH_COND
@ BRANCH_COND
Definition: AMDGPUISelLowering.h:341
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1157
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:686
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:476
llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AMDGPUISelLowering.cpp:4014
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1272
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2115
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:911
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:236
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:364
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
R600Defines.h
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:199
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2295
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1285
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
llvm::GlobalAddressSDNode::getAddressSpace
unsigned getAddressSpace() const
Definition: SelectionDAG.cpp:10682
llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: R600ISelLowering.cpp:631
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:383
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1117
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::numbers::pif
constexpr float pif
Definition: MathExtras.h:78
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1363
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:866
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: R600ISelLowering.cpp:265
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:350
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:363
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:265
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:79
llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition: AMDGPUFrameLowering.cpp:22
MachineFunction.h
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:880
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:89
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:20
llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1616
ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1640
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7479
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:814
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::Value
LLVM Value Representation.
Definition: Value.h:75
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:220
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1114
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1342
llvm::AMDGPUISD::CONST_DATA_PTR
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
Definition: AMDGPUISelLowering.h:473
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:346
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition: AMDGPU.h:377
llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition: AMDGPUISelLowering.h:476
llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition: R600ISelLowering.cpp:29
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7228
llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition: AMDGPU.h:384
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:66
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1763
llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition: AMDGPUISelLowering.h:315