LLVM  14.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1 //===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// Custom DAG lowering for R600
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "R600ISelLowering.h"
15 #include "AMDGPU.h"
17 #include "R600Defines.h"
18 #include "R600InstrInfo.h"
20 #include "R600Subtarget.h"
22 #include "llvm/IR/IntrinsicsAMDGPU.h"
23 #include "llvm/IR/IntrinsicsR600.h"
24 
25 using namespace llvm;
26 
27 #include "R600GenCallingConv.inc"
28 
30  const R600Subtarget &STI)
31  : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
32  addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
33  addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
34  addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
35  addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
36  addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
37  addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
38 
41 
43 
44  // Legalize loads and stores to the private address space.
48 
49  // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
50  // spaces, so it is custom lowered to handle those where it isn't.
51  for (MVT VT : MVT::integer_valuetypes()) {
55 
59 
63  }
64 
65  // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
69 
73 
78 
81  // We need to include these since trunc STORES to PRIVATE need
82  // special handling to accommodate RMW
93 
94  // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
97 
98  // Set condition code actions
111 
116 
119 
122 
126 
128 
133 
136 
143 
148 
149  // ADD, SUB overflow.
150  // TODO: turn these into Legal?
151  if (Subtarget->hasCARRY())
153 
154  if (Subtarget->hasBORROW())
156 
157  // Expand sign extension of vectors
158  if (!Subtarget->hasBFE())
160 
163 
164  if (!Subtarget->hasBFE())
168 
169  if (!Subtarget->hasBFE())
173 
177 
179 
181 
186 
191 
192  // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
193  // to be Legal/Custom in order to avoid library calls.
197 
198  if (!Subtarget->hasFMA()) {
201  }
202 
203  // FIXME: May need no denormals check
205 
206  if (!Subtarget->hasBFI()) {
207  // fcopysign can be done in a single instruction with BFI.
210  }
211 
212  if (!Subtarget->hasBCNT(32))
214 
215  if (!Subtarget->hasBCNT(64))
217 
218  if (Subtarget->hasFFBH())
220 
221  if (Subtarget->hasFFBL())
223 
224  // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
225  // need it for R600.
226  if (Subtarget->hasBFE())
227  setHasExtractBitsInsn(true);
228 
230 
231  const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
232  for (MVT VT : ScalarIntVTs) {
237  }
238 
239  // LLVM will expand these to atomic_cmp_swap(0)
240  // and atomic_swap, respectively.
243 
244  // We need to custom lower some of the intrinsics
247 
249 
256 }
257 
258 static inline bool isEOP(MachineBasicBlock::iterator I) {
259  if (std::next(I) == I->getParent()->end())
260  return false;
261  return std::next(I)->getOpcode() == R600::RETURN;
262 }
263 
266  MachineBasicBlock *BB) const {
267  MachineFunction *MF = BB->getParent();
270  const R600InstrInfo *TII = Subtarget->getInstrInfo();
271 
272  switch (MI.getOpcode()) {
273  default:
274  // Replace LDS_*_RET instruction that don't have any uses with the
275  // equivalent LDS_*_NORET instruction.
276  if (TII->isLDSRetInstr(MI.getOpcode())) {
277  int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
278  assert(DstIdx != -1);
279  MachineInstrBuilder NewMI;
280  // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
281  // LDS_1A2D support and remove this special case.
282  if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
283  MI.getOpcode() == R600::LDS_CMPST_RET)
284  return BB;
285 
286  NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
287  TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
288  for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
289  NewMI.add(MO);
290  } else {
292  }
293  break;
294 
295  case R600::FABS_R600: {
296  MachineInstr *NewMI = TII->buildDefaultInstruction(
297  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
298  MI.getOperand(1).getReg());
299  TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
300  break;
301  }
302 
303  case R600::FNEG_R600: {
304  MachineInstr *NewMI = TII->buildDefaultInstruction(
305  *BB, I, R600::MOV, MI.getOperand(0).getReg(),
306  MI.getOperand(1).getReg());
307  TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
308  break;
309  }
310 
311  case R600::MASK_WRITE: {
312  Register maskedRegister = MI.getOperand(0).getReg();
313  assert(maskedRegister.isVirtual());
314  MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
315  TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
316  break;
317  }
318 
319  case R600::MOV_IMM_F32:
320  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
321  .getFPImm()
322  ->getValueAPF()
323  .bitcastToAPInt()
324  .getZExtValue());
325  break;
326 
327  case R600::MOV_IMM_I32:
328  TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
329  MI.getOperand(1).getImm());
330  break;
331 
332  case R600::MOV_IMM_GLOBAL_ADDR: {
333  //TODO: Perhaps combine this instruction with the next if possible
334  auto MIB = TII->buildDefaultInstruction(
335  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
336  int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
337  //TODO: Ugh this is rather ugly
338  const MachineOperand &MO = MI.getOperand(1);
339  MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
340  MO.getTargetFlags());
341  break;
342  }
343 
344  case R600::CONST_COPY: {
345  MachineInstr *NewMI = TII->buildDefaultInstruction(
346  *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
347  TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
348  MI.getOperand(1).getImm());
349  break;
350  }
351 
352  case R600::RAT_WRITE_CACHELESS_32_eg:
353  case R600::RAT_WRITE_CACHELESS_64_eg:
354  case R600::RAT_WRITE_CACHELESS_128_eg:
355  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
356  .add(MI.getOperand(0))
357  .add(MI.getOperand(1))
358  .addImm(isEOP(I)); // Set End of program bit
359  break;
360 
361  case R600::RAT_STORE_TYPED_eg:
362  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
363  .add(MI.getOperand(0))
364  .add(MI.getOperand(1))
365  .add(MI.getOperand(2))
366  .addImm(isEOP(I)); // Set End of program bit
367  break;
368 
369  case R600::BRANCH:
370  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
371  .add(MI.getOperand(0));
372  break;
373 
374  case R600::BRANCH_COND_f32: {
375  MachineInstr *NewMI =
376  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
377  R600::PREDICATE_BIT)
378  .add(MI.getOperand(1))
379  .addImm(R600::PRED_SETNE)
380  .addImm(0); // Flags
381  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
382  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
383  .add(MI.getOperand(0))
384  .addReg(R600::PREDICATE_BIT, RegState::Kill);
385  break;
386  }
387 
388  case R600::BRANCH_COND_i32: {
389  MachineInstr *NewMI =
390  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
391  R600::PREDICATE_BIT)
392  .add(MI.getOperand(1))
393  .addImm(R600::PRED_SETNE_INT)
394  .addImm(0); // Flags
395  TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
396  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
397  .add(MI.getOperand(0))
398  .addReg(R600::PREDICATE_BIT, RegState::Kill);
399  break;
400  }
401 
402  case R600::EG_ExportSwz:
403  case R600::R600_ExportSwz: {
404  // Instruction is left unmodified if its not the last one of its type
405  bool isLastInstructionOfItsType = true;
406  unsigned InstExportType = MI.getOperand(1).getImm();
407  for (MachineBasicBlock::iterator NextExportInst = std::next(I),
408  EndBlock = BB->end(); NextExportInst != EndBlock;
409  NextExportInst = std::next(NextExportInst)) {
410  if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
411  NextExportInst->getOpcode() == R600::R600_ExportSwz) {
412  unsigned CurrentInstExportType = NextExportInst->getOperand(1)
413  .getImm();
414  if (CurrentInstExportType == InstExportType) {
415  isLastInstructionOfItsType = false;
416  break;
417  }
418  }
419  }
420  bool EOP = isEOP(I);
421  if (!EOP && !isLastInstructionOfItsType)
422  return BB;
423  unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
424  BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
425  .add(MI.getOperand(0))
426  .add(MI.getOperand(1))
427  .add(MI.getOperand(2))
428  .add(MI.getOperand(3))
429  .add(MI.getOperand(4))
430  .add(MI.getOperand(5))
431  .add(MI.getOperand(6))
432  .addImm(CfInst)
433  .addImm(EOP);
434  break;
435  }
436  case R600::RETURN: {
437  return BB;
438  }
439  }
440 
441  MI.eraseFromParent();
442  return BB;
443 }
444 
445 //===----------------------------------------------------------------------===//
446 // Custom DAG Lowering Operations
447 //===----------------------------------------------------------------------===//
448 
452  switch (Op.getOpcode()) {
453  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
454  case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
455  case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
456  case ISD::SHL_PARTS:
457  case ISD::SRA_PARTS:
458  case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
459  case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
460  case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
461  case ISD::FCOS:
462  case ISD::FSIN: return LowerTrig(Op, DAG);
463  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
464  case ISD::STORE: return LowerSTORE(Op, DAG);
465  case ISD::LOAD: {
466  SDValue Result = LowerLOAD(Op, DAG);
467  assert((!Result.getNode() ||
468  Result.getNode()->getNumValues() == 2) &&
469  "Load should return a value and a chain");
470  return Result;
471  }
472 
473  case ISD::BRCOND: return LowerBRCOND(Op, DAG);
474  case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
475  case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
476  case ISD::INTRINSIC_VOID: {
477  SDValue Chain = Op.getOperand(0);
478  unsigned IntrinsicID =
479  cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
480  switch (IntrinsicID) {
481  case Intrinsic::r600_store_swizzle: {
482  SDLoc DL(Op);
483  const SDValue Args[8] = {
484  Chain,
485  Op.getOperand(2), // Export Value
486  Op.getOperand(3), // ArrayBase
487  Op.getOperand(4), // Type
488  DAG.getConstant(0, DL, MVT::i32), // SWZ_X
489  DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
490  DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
491  DAG.getConstant(3, DL, MVT::i32) // SWZ_W
492  };
493  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
494  }
495 
496  // default for switch(IntrinsicID)
497  default: break;
498  }
499  // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
500  break;
501  }
503  unsigned IntrinsicID =
504  cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
505  EVT VT = Op.getValueType();
506  SDLoc DL(Op);
507  switch (IntrinsicID) {
508  case Intrinsic::r600_tex:
509  case Intrinsic::r600_texc: {
510  unsigned TextureOp;
511  switch (IntrinsicID) {
512  case Intrinsic::r600_tex:
513  TextureOp = 0;
514  break;
515  case Intrinsic::r600_texc:
516  TextureOp = 1;
517  break;
518  default:
519  llvm_unreachable("unhandled texture operation");
520  }
521 
522  SDValue TexArgs[19] = {
523  DAG.getConstant(TextureOp, DL, MVT::i32),
524  Op.getOperand(1),
525  DAG.getConstant(0, DL, MVT::i32),
526  DAG.getConstant(1, DL, MVT::i32),
527  DAG.getConstant(2, DL, MVT::i32),
528  DAG.getConstant(3, DL, MVT::i32),
529  Op.getOperand(2),
530  Op.getOperand(3),
531  Op.getOperand(4),
532  DAG.getConstant(0, DL, MVT::i32),
533  DAG.getConstant(1, DL, MVT::i32),
534  DAG.getConstant(2, DL, MVT::i32),
535  DAG.getConstant(3, DL, MVT::i32),
536  Op.getOperand(5),
537  Op.getOperand(6),
538  Op.getOperand(7),
539  Op.getOperand(8),
540  Op.getOperand(9),
541  Op.getOperand(10)
542  };
543  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
544  }
545  case Intrinsic::r600_dot4: {
546  SDValue Args[8] = {
547  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
548  DAG.getConstant(0, DL, MVT::i32)),
549  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
550  DAG.getConstant(0, DL, MVT::i32)),
551  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
552  DAG.getConstant(1, DL, MVT::i32)),
553  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
554  DAG.getConstant(1, DL, MVT::i32)),
555  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
556  DAG.getConstant(2, DL, MVT::i32)),
557  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
558  DAG.getConstant(2, DL, MVT::i32)),
559  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
560  DAG.getConstant(3, DL, MVT::i32)),
561  DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
562  DAG.getConstant(3, DL, MVT::i32))
563  };
564  return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
565  }
566 
567  case Intrinsic::r600_implicitarg_ptr: {
570  return DAG.getConstant(ByteOffset, DL, PtrVT);
571  }
572  case Intrinsic::r600_read_ngroups_x:
573  return LowerImplicitParameter(DAG, VT, DL, 0);
574  case Intrinsic::r600_read_ngroups_y:
575  return LowerImplicitParameter(DAG, VT, DL, 1);
576  case Intrinsic::r600_read_ngroups_z:
577  return LowerImplicitParameter(DAG, VT, DL, 2);
578  case Intrinsic::r600_read_global_size_x:
579  return LowerImplicitParameter(DAG, VT, DL, 3);
580  case Intrinsic::r600_read_global_size_y:
581  return LowerImplicitParameter(DAG, VT, DL, 4);
582  case Intrinsic::r600_read_global_size_z:
583  return LowerImplicitParameter(DAG, VT, DL, 5);
584  case Intrinsic::r600_read_local_size_x:
585  return LowerImplicitParameter(DAG, VT, DL, 6);
586  case Intrinsic::r600_read_local_size_y:
587  return LowerImplicitParameter(DAG, VT, DL, 7);
588  case Intrinsic::r600_read_local_size_z:
589  return LowerImplicitParameter(DAG, VT, DL, 8);
590 
591  case Intrinsic::r600_read_tgid_x:
592  case Intrinsic::amdgcn_workgroup_id_x:
593  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
594  R600::T1_X, VT);
595  case Intrinsic::r600_read_tgid_y:
596  case Intrinsic::amdgcn_workgroup_id_y:
597  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
598  R600::T1_Y, VT);
599  case Intrinsic::r600_read_tgid_z:
600  case Intrinsic::amdgcn_workgroup_id_z:
601  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
602  R600::T1_Z, VT);
603  case Intrinsic::r600_read_tidig_x:
604  case Intrinsic::amdgcn_workitem_id_x:
605  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
606  R600::T0_X, VT);
607  case Intrinsic::r600_read_tidig_y:
608  case Intrinsic::amdgcn_workitem_id_y:
609  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
610  R600::T0_Y, VT);
611  case Intrinsic::r600_read_tidig_z:
612  case Intrinsic::amdgcn_workitem_id_z:
613  return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
614  R600::T0_Z, VT);
615 
616  case Intrinsic::r600_recipsqrt_ieee:
617  return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
618 
619  case Intrinsic::r600_recipsqrt_clamped:
620  return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
621  default:
622  return Op;
623  }
624 
625  // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
626  break;
627  }
628  } // end switch(Op.getOpcode())
629  return SDValue();
630 }
631 
634  SelectionDAG &DAG) const {
635  switch (N->getOpcode()) {
636  default:
638  return;
639  case ISD::FP_TO_UINT:
640  if (N->getValueType(0) == MVT::i1) {
641  Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
642  return;
643  }
644  // Since we don't care about out of bounds values we can use FP_TO_SINT for
645  // uints too. The DAGLegalizer code for uint considers some extra cases
646  // which are not necessary here.
648  case ISD::FP_TO_SINT: {
649  if (N->getValueType(0) == MVT::i1) {
650  Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
651  return;
652  }
653 
654  SDValue Result;
655  if (expandFP_TO_SINT(N, Result, DAG))
656  Results.push_back(Result);
657  return;
658  }
659  case ISD::SDIVREM: {
660  SDValue Op = SDValue(N, 1);
661  SDValue RES = LowerSDIVREM(Op, DAG);
662  Results.push_back(RES);
663  Results.push_back(RES.getValue(1));
664  break;
665  }
666  case ISD::UDIVREM: {
667  SDValue Op = SDValue(N, 0);
668  LowerUDIVREM64(Op, DAG, Results);
669  break;
670  }
671  }
672 }
673 
674 SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
675  SDValue Vector) const {
676  SDLoc DL(Vector);
677  EVT VecVT = Vector.getValueType();
678  EVT EltVT = VecVT.getVectorElementType();
680 
681  for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
682  Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
683  DAG.getVectorIdxConstant(i, DL)));
684  }
685 
686  return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
687 }
688 
689 SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
690  SelectionDAG &DAG) const {
691  SDLoc DL(Op);
692  SDValue Vector = Op.getOperand(0);
693  SDValue Index = Op.getOperand(1);
694 
695  if (isa<ConstantSDNode>(Index) ||
697  return Op;
698 
699  Vector = vectorToVerticalVector(DAG, Vector);
700  return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
701  Vector, Index);
702 }
703 
704 SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
705  SelectionDAG &DAG) const {
706  SDLoc DL(Op);
707  SDValue Vector = Op.getOperand(0);
708  SDValue Value = Op.getOperand(1);
709  SDValue Index = Op.getOperand(2);
710 
711  if (isa<ConstantSDNode>(Index) ||
713  return Op;
714 
715  Vector = vectorToVerticalVector(DAG, Vector);
716  SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
717  Vector, Value, Index);
718  return vectorToVerticalVector(DAG, Insert);
719 }
720 
721 SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
722  SDValue Op,
723  SelectionDAG &DAG) const {
724  GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
727 
728  const DataLayout &DL = DAG.getDataLayout();
729  const GlobalValue *GV = GSD->getGlobal();
731 
732  SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
733  return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
734 }
735 
736 SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
737  // On hw >= R700, COS/SIN input must be between -1. and 1.
738  // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
739  EVT VT = Op.getValueType();
740  SDValue Arg = Op.getOperand(0);
741  SDLoc DL(Op);
742 
743  // TODO: Should this propagate fast-math-flags?
744  SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
745  DAG.getNode(ISD::FADD, DL, VT,
746  DAG.getNode(ISD::FMUL, DL, VT, Arg,
747  DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
748  DAG.getConstantFP(0.5, DL, MVT::f32)));
749  unsigned TrigNode;
750  switch (Op.getOpcode()) {
751  case ISD::FCOS:
752  TrigNode = AMDGPUISD::COS_HW;
753  break;
754  case ISD::FSIN:
755  TrigNode = AMDGPUISD::SIN_HW;
756  break;
757  default:
758  llvm_unreachable("Wrong trig opcode");
759  }
760  SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
761  DAG.getNode(ISD::FADD, DL, VT, FractPart,
762  DAG.getConstantFP(-0.5, DL, MVT::f32)));
763  if (Gen >= AMDGPUSubtarget::R700)
764  return TrigVal;
765  // On R600 hw, COS/SIN input must be between -Pi and Pi.
766  return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
768 }
769 
770 SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
771  SelectionDAG &DAG) const {
772  SDValue Lo, Hi;
773  expandShiftParts(Op.getNode(), Lo, Hi, DAG);
774  return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
775 }
776 
777 SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
778  unsigned mainop, unsigned ovf) const {
779  SDLoc DL(Op);
780  EVT VT = Op.getValueType();
781 
782  SDValue Lo = Op.getOperand(0);
783  SDValue Hi = Op.getOperand(1);
784 
785  SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
786  // Extend sign.
787  OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
788  DAG.getValueType(MVT::i1));
789 
790  SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
791 
792  return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
793 }
794 
795 SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
796  SDLoc DL(Op);
797  return DAG.getNode(
798  ISD::SETCC,
799  DL,
800  MVT::i1,
801  Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
802  DAG.getCondCode(ISD::SETEQ));
803 }
804 
805 SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
806  SDLoc DL(Op);
807  return DAG.getNode(
808  ISD::SETCC,
809  DL,
810  MVT::i1,
811  Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
812  DAG.getCondCode(ISD::SETEQ));
813 }
814 
815 SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
816  const SDLoc &DL,
817  unsigned DwordOffset) const {
818  unsigned ByteOffset = DwordOffset * 4;
819  PointerType * PtrType = PointerType::get(VT.getTypeForEVT(*DAG.getContext()),
821 
822  // We shouldn't be using an offset wider than 16-bits for implicit parameters.
823  assert(isInt<16>(ByteOffset));
824 
825  return DAG.getLoad(VT, DL, DAG.getEntryNode(),
826  DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
828 }
829 
830 bool R600TargetLowering::isZero(SDValue Op) const {
831  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
832  return Cst->isZero();
833  } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
834  return CstFP->isZero();
835  } else {
836  return false;
837  }
838 }
839 
840 bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
841  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
842  return CFP->isExactlyValue(1.0);
843  }
844  return isAllOnesConstant(Op);
845 }
846 
847 bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
848  if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
849  return CFP->getValueAPF().isZero();
850  }
851  return isNullConstant(Op);
852 }
853 
854 SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
855  SDLoc DL(Op);
856  EVT VT = Op.getValueType();
857 
858  SDValue LHS = Op.getOperand(0);
859  SDValue RHS = Op.getOperand(1);
860  SDValue True = Op.getOperand(2);
861  SDValue False = Op.getOperand(3);
862  SDValue CC = Op.getOperand(4);
863  SDValue Temp;
864 
865  if (VT == MVT::f32) {
866  DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
867  SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
868  if (MinMax)
869  return MinMax;
870  }
871 
872  // LHS and RHS are guaranteed to be the same value type
873  EVT CompareVT = LHS.getValueType();
874 
875  // Check if we can lower this to a native operation.
876 
877  // Try to lower to a SET* instruction:
878  //
879  // SET* can match the following patterns:
880  //
881  // select_cc f32, f32, -1, 0, cc_supported
882  // select_cc f32, f32, 1.0f, 0.0f, cc_supported
883  // select_cc i32, i32, -1, 0, cc_supported
884  //
885 
886  // Move hardware True/False values to the correct operand.
887  if (isHWTrueValue(False) && isHWFalseValue(True)) {
888  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
889  ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
890  if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
891  std::swap(False, True);
892  CC = DAG.getCondCode(InverseCC);
893  } else {
894  ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
895  if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
896  std::swap(False, True);
897  std::swap(LHS, RHS);
898  CC = DAG.getCondCode(SwapInvCC);
899  }
900  }
901  }
902 
903  if (isHWTrueValue(True) && isHWFalseValue(False) &&
904  (CompareVT == VT || VT == MVT::i32)) {
905  // This can be matched by a SET* instruction.
906  return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
907  }
908 
909  // Try to lower to a CND* instruction:
910  //
911  // CND* can match the following patterns:
912  //
913  // select_cc f32, 0.0, f32, f32, cc_supported
914  // select_cc f32, 0.0, i32, i32, cc_supported
915  // select_cc i32, 0, f32, f32, cc_supported
916  // select_cc i32, 0, i32, i32, cc_supported
917  //
918 
919  // Try to move the zero value to the RHS
920  if (isZero(LHS)) {
921  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
922  // Try swapping the operands
923  ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
924  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
925  std::swap(LHS, RHS);
926  CC = DAG.getCondCode(CCSwapped);
927  } else {
928  // Try inverting the condition and then swapping the operands
929  ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
930  CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
931  if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
932  std::swap(True, False);
933  std::swap(LHS, RHS);
934  CC = DAG.getCondCode(CCSwapped);
935  }
936  }
937  }
938  if (isZero(RHS)) {
939  SDValue Cond = LHS;
940  SDValue Zero = RHS;
941  ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
942  if (CompareVT != VT) {
943  // Bitcast True / False to the correct types. This will end up being
944  // a nop, but it allows us to define only a single pattern in the
945  // .TD files for each CND* instruction rather than having to have
946  // one pattern for integer True/False and one for fp True/False
947  True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
948  False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
949  }
950 
951  switch (CCOpcode) {
952  case ISD::SETONE:
953  case ISD::SETUNE:
954  case ISD::SETNE:
955  CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
956  Temp = True;
957  True = False;
958  False = Temp;
959  break;
960  default:
961  break;
962  }
963  SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
964  Cond, Zero,
965  True, False,
966  DAG.getCondCode(CCOpcode));
967  return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
968  }
969 
970  // If we make it this for it means we have no native instructions to handle
971  // this SELECT_CC, so we must lower it.
972  SDValue HWTrue, HWFalse;
973 
974  if (CompareVT == MVT::f32) {
975  HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
976  HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
977  } else if (CompareVT == MVT::i32) {
978  HWTrue = DAG.getConstant(-1, DL, CompareVT);
979  HWFalse = DAG.getConstant(0, DL, CompareVT);
980  }
981  else {
982  llvm_unreachable("Unhandled value type in LowerSELECT_CC");
983  }
984 
985  // Lower this unsupported SELECT_CC into a combination of two supported
986  // SELECT_CC operations.
987  SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
988 
989  return DAG.getNode(ISD::SELECT_CC, DL, VT,
990  Cond, HWFalse,
991  True, False,
992  DAG.getCondCode(ISD::SETNE));
993 }
994 
995 /// LLVM generates byte-addressed pointers. For indirect addressing, we need to
996 /// convert these pointers to a register index. Each register holds
997 /// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
998 /// \p StackWidth, which tells us how many of the 4 sub-registrers will be used
999 /// for indirect addressing.
1000 SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
1001  unsigned StackWidth,
1002  SelectionDAG &DAG) const {
1003  unsigned SRLPad;
1004  switch(StackWidth) {
1005  case 1:
1006  SRLPad = 2;
1007  break;
1008  case 2:
1009  SRLPad = 3;
1010  break;
1011  case 4:
1012  SRLPad = 4;
1013  break;
1014  default: llvm_unreachable("Invalid stack width");
1015  }
1016 
1017  SDLoc DL(Ptr);
1018  return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
1019  DAG.getConstant(SRLPad, DL, MVT::i32));
1020 }
1021 
1022 void R600TargetLowering::getStackAddress(unsigned StackWidth,
1023  unsigned ElemIdx,
1024  unsigned &Channel,
1025  unsigned &PtrIncr) const {
1026  switch (StackWidth) {
1027  default:
1028  case 1:
1029  Channel = 0;
1030  if (ElemIdx > 0) {
1031  PtrIncr = 1;
1032  } else {
1033  PtrIncr = 0;
1034  }
1035  break;
1036  case 2:
1037  Channel = ElemIdx % 2;
1038  if (ElemIdx == 2) {
1039  PtrIncr = 1;
1040  } else {
1041  PtrIncr = 0;
1042  }
1043  break;
1044  case 4:
1045  Channel = ElemIdx;
1046  PtrIncr = 0;
1047  break;
1048  }
1049 }
1050 
1051 SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1052  SelectionDAG &DAG) const {
1053  SDLoc DL(Store);
1054  //TODO: Who creates the i8 stores?
1055  assert(Store->isTruncatingStore()
1056  || Store->getValue().getValueType() == MVT::i8);
1057  assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1058 
1059  SDValue Mask;
1060  if (Store->getMemoryVT() == MVT::i8) {
1061  assert(Store->getAlignment() >= 1);
1062  Mask = DAG.getConstant(0xff, DL, MVT::i32);
1063  } else if (Store->getMemoryVT() == MVT::i16) {
1064  assert(Store->getAlignment() >= 2);
1065  Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1066  } else {
1067  llvm_unreachable("Unsupported private trunc store");
1068  }
1069 
1070  SDValue OldChain = Store->getChain();
1071  bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1072  // Skip dummy
1073  SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1074  SDValue BasePtr = Store->getBasePtr();
1075  SDValue Offset = Store->getOffset();
1076  EVT MemVT = Store->getMemoryVT();
1077 
1078  SDValue LoadPtr = BasePtr;
1079  if (!Offset.isUndef()) {
1080  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1081  }
1082 
1083  // Get dword location
1084  // TODO: this should be eliminated by the future SHR ptr, 2
1085  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1086  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1087 
1088  // Load dword
1089  // TODO: can we be smarter about machine pointer info?
1091  SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1092 
1093  Chain = Dst.getValue(1);
1094 
1095  // Get offset in dword
1096  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1097  DAG.getConstant(0x3, DL, MVT::i32));
1098 
1099  // Convert byte offset to bit shift
1100  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1101  DAG.getConstant(3, DL, MVT::i32));
1102 
1103  // TODO: Contrary to the name of the functiom,
1104  // it also handles sub i32 non-truncating stores (like i1)
1105  SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1106  Store->getValue());
1107 
1108  // Mask the value to the right type
1109  SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1110 
1111  // Shift the value in place
1112  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1113  MaskedValue, ShiftAmt);
1114 
1115  // Shift the mask in place
1116  SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1117 
1118  // Invert the mask. NOTE: if we had native ROL instructions we could
1119  // use inverted mask
1120  DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1121 
1122  // Cleanup the target bits
1123  Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1124 
1125  // Add the new bits
1126  SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1127 
1128  // Store dword
1129  // TODO: Can we be smarter about MachinePointerInfo?
1130  SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1131 
1132  // If we are part of expanded vector, make our neighbors depend on this store
1133  if (VectorTrunc) {
1134  // Make all other vector elements depend on this store
1135  Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1136  DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1137  }
1138  return NewStore;
1139 }
1140 
1141 SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1142  StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1143  unsigned AS = StoreNode->getAddressSpace();
1144 
1145  SDValue Chain = StoreNode->getChain();
1146  SDValue Ptr = StoreNode->getBasePtr();
1147  SDValue Value = StoreNode->getValue();
1148 
1149  EVT VT = Value.getValueType();
1150  EVT MemVT = StoreNode->getMemoryVT();
1151  EVT PtrVT = Ptr.getValueType();
1152 
1153  SDLoc DL(Op);
1154 
1155  const bool TruncatingStore = StoreNode->isTruncatingStore();
1156 
1157  // Neither LOCAL nor PRIVATE can do vectors at the moment
1158  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS ||
1159  TruncatingStore) &&
1160  VT.isVector()) {
1161  if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1162  // Add an extra level of chain to isolate this vector
1163  SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1164  // TODO: can the chain be replaced without creating a new store?
1165  SDValue NewStore = DAG.getTruncStore(
1166  NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(),
1167  MemVT, StoreNode->getAlignment(),
1168  StoreNode->getMemOperand()->getFlags(), StoreNode->getAAInfo());
1169  StoreNode = cast<StoreSDNode>(NewStore);
1170  }
1171 
1172  return scalarizeVectorStore(StoreNode, DAG);
1173  }
1174 
1175  Align Alignment = StoreNode->getAlign();
1176  if (Alignment < MemVT.getStoreSize() &&
1177  !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1178  StoreNode->getMemOperand()->getFlags(),
1179  nullptr)) {
1180  return expandUnalignedStore(StoreNode, DAG);
1181  }
1182 
1183  SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1184  DAG.getConstant(2, DL, PtrVT));
1185 
1186  if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1187  // It is beneficial to create MSKOR here instead of combiner to avoid
1188  // artificial dependencies introduced by RMW
1189  if (TruncatingStore) {
1190  assert(VT.bitsLE(MVT::i32));
1191  SDValue MaskConstant;
1192  if (MemVT == MVT::i8) {
1193  MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1194  } else {
1195  assert(MemVT == MVT::i16);
1196  assert(StoreNode->getAlignment() >= 2);
1197  MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1198  }
1199 
1200  SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1201  DAG.getConstant(0x00000003, DL, PtrVT));
1202  SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1203  DAG.getConstant(3, DL, VT));
1204 
1205  // Put the mask in correct place
1206  SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1207 
1208  // Put the value bits in correct place
1209  SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1210  SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1211 
1212  // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1213  // vector instead.
1214  SDValue Src[4] = {
1215  ShiftedValue,
1216  DAG.getConstant(0, DL, MVT::i32),
1217  DAG.getConstant(0, DL, MVT::i32),
1218  Mask
1219  };
1220  SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1221  SDValue Args[3] = { Chain, Input, DWordAddr };
1223  Op->getVTList(), Args, MemVT,
1224  StoreNode->getMemOperand());
1225  } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1226  // Convert pointer from byte address to dword address.
1227  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1228 
1229  if (StoreNode->isIndexed()) {
1230  llvm_unreachable("Indexed stores not supported yet");
1231  } else {
1232  Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1233  }
1234  return Chain;
1235  }
1236  }
1237 
1238  // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1239  if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1240  return SDValue();
1241 
1242  if (MemVT.bitsLT(MVT::i32))
1243  return lowerPrivateTruncStore(StoreNode, DAG);
1244 
1245  // Standard i32+ store, tag it with DWORDADDR to note that the address
1246  // has been shifted
1247  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1248  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1249  return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1250  }
1251 
1252  // Tagged i32+ stores will be matched by patterns
1253  return SDValue();
1254 }
1255 
1256 // return (512 + (kc_bank << 12)
1257 static int
1259  switch (AddressSpace) {
1261  return 512;
1263  return 512 + 4096;
1265  return 512 + 4096 * 2;
1267  return 512 + 4096 * 3;
1269  return 512 + 4096 * 4;
1271  return 512 + 4096 * 5;
1273  return 512 + 4096 * 6;
1275  return 512 + 4096 * 7;
1277  return 512 + 4096 * 8;
1279  return 512 + 4096 * 9;
1281  return 512 + 4096 * 10;
1283  return 512 + 4096 * 11;
1285  return 512 + 4096 * 12;
1287  return 512 + 4096 * 13;
1289  return 512 + 4096 * 14;
1291  return 512 + 4096 * 15;
1292  default:
1293  return -1;
1294  }
1295 }
1296 
1297 SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1298  SelectionDAG &DAG) const {
1299  SDLoc DL(Op);
1300  LoadSDNode *Load = cast<LoadSDNode>(Op);
1301  ISD::LoadExtType ExtType = Load->getExtensionType();
1302  EVT MemVT = Load->getMemoryVT();
1303  assert(Load->getAlignment() >= MemVT.getStoreSize());
1304 
1305  SDValue BasePtr = Load->getBasePtr();
1306  SDValue Chain = Load->getChain();
1307  SDValue Offset = Load->getOffset();
1308 
1309  SDValue LoadPtr = BasePtr;
1310  if (!Offset.isUndef()) {
1311  LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1312  }
1313 
1314  // Get dword location
1315  // NOTE: this should be eliminated by the future SHR ptr, 2
1316  SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1317  DAG.getConstant(0xfffffffc, DL, MVT::i32));
1318 
1319  // Load dword
1320  // TODO: can we be smarter about machine pointer info?
1322  SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1323 
1324  // Get offset within the register.
1325  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1326  LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1327 
1328  // Bit offset of target byte (byteIdx * 8).
1329  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1330  DAG.getConstant(3, DL, MVT::i32));
1331 
1332  // Shift to the right.
1333  SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1334 
1335  // Eliminate the upper bits by setting them to ...
1336  EVT MemEltVT = MemVT.getScalarType();
1337 
1338  if (ExtType == ISD::SEXTLOAD) { // ... ones.
1339  SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1340  Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1341  } else { // ... or zeros.
1342  Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1343  }
1344 
1345  SDValue Ops[] = {
1346  Ret,
1347  Read.getValue(1) // This should be our output chain
1348  };
1349 
1350  return DAG.getMergeValues(Ops, DL);
1351 }
1352 
1353 SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1354  LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1355  unsigned AS = LoadNode->getAddressSpace();
1356  EVT MemVT = LoadNode->getMemoryVT();
1357  ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1358 
1359  if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1360  ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1361  return lowerPrivateExtLoad(Op, DAG);
1362  }
1363 
1364  SDLoc DL(Op);
1365  EVT VT = Op.getValueType();
1366  SDValue Chain = LoadNode->getChain();
1367  SDValue Ptr = LoadNode->getBasePtr();
1368 
1369  if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1370  LoadNode->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
1371  VT.isVector()) {
1372  SDValue Ops[2];
1373  std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1374  return DAG.getMergeValues(Ops, DL);
1375  }
1376 
1377  // This is still used for explicit load from addrspace(8)
1378  int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1379  if (ConstantBlock > -1 &&
1380  ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1381  (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1382  SDValue Result;
1383  if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1384  isa<ConstantSDNode>(Ptr)) {
1385  return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1386  } else {
1387  //TODO: Does this even work?
1388  // non-constant ptr can't be folded, keeps it as a v4f32 load
1390  DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1391  DAG.getConstant(4, DL, MVT::i32)),
1392  DAG.getConstant(LoadNode->getAddressSpace() -
1394  );
1395  }
1396 
1397  if (!VT.isVector()) {
1399  DAG.getConstant(0, DL, MVT::i32));
1400  }
1401 
1402  SDValue MergedValues[2] = {
1403  Result,
1404  Chain
1405  };
1406  return DAG.getMergeValues(MergedValues, DL);
1407  }
1408 
1409  // For most operations returning SDValue() will result in the node being
1410  // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1411  // need to manually expand loads that may be legal in some address spaces and
1412  // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1413  // compute shaders, since the data is sign extended when it is uploaded to the
1414  // buffer. However SEXT loads from other address spaces are not supported, so
1415  // we need to expand them here.
1416  if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1417  assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1418  SDValue NewLoad = DAG.getExtLoad(
1419  ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1420  LoadNode->getAlignment(), LoadNode->getMemOperand()->getFlags());
1421  SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1422  DAG.getValueType(MemVT));
1423 
1424  SDValue MergedValues[2] = { Res, Chain };
1425  return DAG.getMergeValues(MergedValues, DL);
1426  }
1427 
1428  if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1429  return SDValue();
1430  }
1431 
1432  // DWORDADDR ISD marks already shifted address
1433  if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1434  assert(VT == MVT::i32);
1435  Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1436  Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1437  return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1438  }
1439  return SDValue();
1440 }
1441 
1442 SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1443  SDValue Chain = Op.getOperand(0);
1444  SDValue Cond = Op.getOperand(1);
1445  SDValue Jump = Op.getOperand(2);
1446 
1447  return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1448  Chain, Jump, Cond);
1449 }
1450 
1451 SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1452  SelectionDAG &DAG) const {
1453  MachineFunction &MF = DAG.getMachineFunction();
1454  const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1455 
1456  FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1457 
1458  unsigned FrameIndex = FIN->getIndex();
1459  Register IgnoredFrameReg;
1461  TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1462  return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1463  SDLoc(Op), Op.getValueType());
1464 }
1465 
1467  bool IsVarArg) const {
1468  switch (CC) {
1471  case CallingConv::C:
1472  case CallingConv::Fast:
1473  case CallingConv::Cold:
1474  llvm_unreachable("kernels should not be handled here");
1482  return CC_R600;
1483  default:
1484  report_fatal_error("Unsupported calling convention.");
1485  }
1486 }
1487 
1488 /// XXX Only kernel functions are supported, so we can assume for now that
1489 /// every function is a kernel function, but in the future we should use
1490 /// separate calling conventions for kernel and non-kernel functions.
1492  SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1493  const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1494  SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1496  CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1497  *DAG.getContext());
1498  MachineFunction &MF = DAG.getMachineFunction();
1500 
1501  if (AMDGPU::isShader(CallConv)) {
1502  CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1503  } else {
1505  }
1506 
1507  for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1508  CCValAssign &VA = ArgLocs[i];
1509  const ISD::InputArg &In = Ins[i];
1510  EVT VT = In.VT;
1511  EVT MemVT = VA.getLocVT();
1512  if (!VT.isVector() && MemVT.isVector()) {
1513  // Get load source type if scalarized.
1514  MemVT = MemVT.getVectorElementType();
1515  }
1516 
1517  if (AMDGPU::isShader(CallConv)) {
1518  Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1519  SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1520  InVals.push_back(Register);
1521  continue;
1522  }
1523 
1524  // i64 isn't a legal type, so the register type used ends up as i32, which
1525  // isn't expected here. It attempts to create this sextload, but it ends up
1526  // being invalid. Somehow this seems to work with i64 arguments, but breaks
1527  // for <1 x i64>.
1528 
1529  // The first 36 bytes of the input buffer contains information about
1530  // thread group and global sizes.
1532  if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1533  // FIXME: This should really check the extload type, but the handling of
1534  // extload vector parameters seems to be broken.
1535 
1536  // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1537  Ext = ISD::SEXTLOAD;
1538  }
1539 
1540  // Compute the offset from the value.
1541  // XXX - I think PartOffset should give you this, but it seems to give the
1542  // size of the register which isn't useful.
1543 
1544  unsigned PartOffset = VA.getLocMemOffset();
1545  unsigned Alignment = MinAlign(VT.getStoreSize(), PartOffset);
1546 
1548  SDValue Arg = DAG.getLoad(
1549  ISD::UNINDEXED, Ext, VT, DL, Chain,
1550  DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1551  PtrInfo,
1552  MemVT, Alignment, MachineMemOperand::MONonTemporal |
1555 
1556  InVals.push_back(Arg);
1557  }
1558  return Chain;
1559 }
1560 
1562  EVT VT) const {
1563  if (!VT.isVector())
1564  return MVT::i32;
1566 }
1567 
1569  const MachineFunction &MF) const {
1570  // Local and Private addresses do not handle vectors. Limit to i32
1571  if ((AS == AMDGPUAS::LOCAL_ADDRESS || AS == AMDGPUAS::PRIVATE_ADDRESS)) {
1572  return (MemVT.getSizeInBits() <= 32);
1573  }
1574  return true;
1575 }
1576 
1578  EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1579  bool *IsFast) const {
1580  if (IsFast)
1581  *IsFast = false;
1582 
1583  if (!VT.isSimple() || VT == MVT::Other)
1584  return false;
1585 
1586  if (VT.bitsLT(MVT::i32))
1587  return false;
1588 
1589  // TODO: This is a rough estimate.
1590  if (IsFast)
1591  *IsFast = true;
1592 
1593  return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1594 }
1595 
1597  SelectionDAG &DAG, SDValue VectorEntry,
1598  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1599  assert(RemapSwizzle.empty());
1600 
1601  SDLoc DL(VectorEntry);
1602  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1603 
1604  SDValue NewBldVec[4];
1605  for (unsigned i = 0; i < 4; i++)
1606  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1607  DAG.getIntPtrConstant(i, DL));
1608 
1609  for (unsigned i = 0; i < 4; i++) {
1610  if (NewBldVec[i].isUndef())
1611  // We mask write here to teach later passes that the ith element of this
1612  // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1613  // break false dependencies and additionnaly make assembly easier to read.
1614  RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1615  if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1616  if (C->isZero()) {
1617  RemapSwizzle[i] = 4; // SEL_0
1618  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1619  } else if (C->isExactlyValue(1.0)) {
1620  RemapSwizzle[i] = 5; // SEL_1
1621  NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1622  }
1623  }
1624 
1625  if (NewBldVec[i].isUndef())
1626  continue;
1627 
1628  for (unsigned j = 0; j < i; j++) {
1629  if (NewBldVec[i] == NewBldVec[j]) {
1630  NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1631  RemapSwizzle[i] = j;
1632  break;
1633  }
1634  }
1635  }
1636 
1637  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1638  NewBldVec);
1639 }
1640 
1642  DenseMap<unsigned, unsigned> &RemapSwizzle) {
1643  assert(RemapSwizzle.empty());
1644 
1645  SDLoc DL(VectorEntry);
1646  EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1647 
1648  SDValue NewBldVec[4];
1649  bool isUnmovable[4] = {false, false, false, false};
1650  for (unsigned i = 0; i < 4; i++)
1651  NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1652  DAG.getIntPtrConstant(i, DL));
1653 
1654  for (unsigned i = 0; i < 4; i++) {
1655  RemapSwizzle[i] = i;
1656  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1657  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1658  ->getZExtValue();
1659  if (i == Idx)
1660  isUnmovable[Idx] = true;
1661  }
1662  }
1663 
1664  for (unsigned i = 0; i < 4; i++) {
1665  if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1666  unsigned Idx = cast<ConstantSDNode>(NewBldVec[i].getOperand(1))
1667  ->getZExtValue();
1668  if (isUnmovable[Idx])
1669  continue;
1670  // Swap i and Idx
1671  std::swap(NewBldVec[Idx], NewBldVec[i]);
1672  std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1673  break;
1674  }
1675  }
1676 
1677  return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1678  NewBldVec);
1679 }
1680 
1681 SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[4],
1682  SelectionDAG &DAG,
1683  const SDLoc &DL) const {
1684  // Old -> New swizzle values
1685  DenseMap<unsigned, unsigned> SwizzleRemap;
1686 
1687  BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1688  for (unsigned i = 0; i < 4; i++) {
1689  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1690  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1691  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1692  }
1693 
1694  SwizzleRemap.clear();
1695  BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1696  for (unsigned i = 0; i < 4; i++) {
1697  unsigned Idx = cast<ConstantSDNode>(Swz[i])->getZExtValue();
1698  if (SwizzleRemap.find(Idx) != SwizzleRemap.end())
1699  Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1700  }
1701 
1702  return BuildVector;
1703 }
1704 
1705 SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1706  SelectionDAG &DAG) const {
1707  SDLoc DL(LoadNode);
1708  EVT VT = LoadNode->getValueType(0);
1709  SDValue Chain = LoadNode->getChain();
1710  SDValue Ptr = LoadNode->getBasePtr();
1711  assert (isa<ConstantSDNode>(Ptr));
1712 
1713  //TODO: Support smaller loads
1714  if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1715  return SDValue();
1716 
1717  if (LoadNode->getAlignment() < 4)
1718  return SDValue();
1719 
1720  int ConstantBlock = ConstantAddressBlock(Block);
1721 
1722  SDValue Slots[4];
1723  for (unsigned i = 0; i < 4; i++) {
1724  // We want Const position encoded with the following formula :
1725  // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1726  // const_index is Ptr computed by llvm using an alignment of 16.
1727  // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1728  // then div by 4 at the ISel step
1729  SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1730  DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1731  Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1732  }
1733  EVT NewVT = MVT::v4i32;
1734  unsigned NumElements = 4;
1735  if (VT.isVector()) {
1736  NewVT = VT;
1737  NumElements = VT.getVectorNumElements();
1738  }
1739  SDValue Result = DAG.getBuildVector(NewVT, DL, makeArrayRef(Slots, NumElements));
1740  if (!VT.isVector()) {
1742  DAG.getConstant(0, DL, MVT::i32));
1743  }
1744  SDValue MergedValues[2] = {
1745  Result,
1746  Chain
1747  };
1748  return DAG.getMergeValues(MergedValues, DL);
1749 }
1750 
1751 //===----------------------------------------------------------------------===//
1752 // Custom DAG Optimizations
1753 //===----------------------------------------------------------------------===//
1754 
1756  DAGCombinerInfo &DCI) const {
1757  SelectionDAG &DAG = DCI.DAG;
1758  SDLoc DL(N);
1759 
1760  switch (N->getOpcode()) {
1761  // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1762  case ISD::FP_ROUND: {
1763  SDValue Arg = N->getOperand(0);
1764  if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1765  return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1766  Arg.getOperand(0));
1767  }
1768  break;
1769  }
1770 
1771  // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1772  // (i32 select_cc f32, f32, -1, 0 cc)
1773  //
1774  // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1775  // this to one of the SET*_DX10 instructions.
1776  case ISD::FP_TO_SINT: {
1777  SDValue FNeg = N->getOperand(0);
1778  if (FNeg.getOpcode() != ISD::FNEG) {
1779  return SDValue();
1780  }
1781  SDValue SelectCC = FNeg.getOperand(0);
1782  if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1783  SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1784  SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1785  !isHWTrueValue(SelectCC.getOperand(2)) ||
1786  !isHWFalseValue(SelectCC.getOperand(3))) {
1787  return SDValue();
1788  }
1789 
1790  return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1791  SelectCC.getOperand(0), // LHS
1792  SelectCC.getOperand(1), // RHS
1793  DAG.getConstant(-1, DL, MVT::i32), // True
1794  DAG.getConstant(0, DL, MVT::i32), // False
1795  SelectCC.getOperand(4)); // CC
1796  }
1797 
1798  // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1799  // => build_vector elt0, ... , NewEltIdx, ... , eltN
1800  case ISD::INSERT_VECTOR_ELT: {
1801  SDValue InVec = N->getOperand(0);
1802  SDValue InVal = N->getOperand(1);
1803  SDValue EltNo = N->getOperand(2);
1804 
1805  // If the inserted element is an UNDEF, just use the input vector.
1806  if (InVal.isUndef())
1807  return InVec;
1808 
1809  EVT VT = InVec.getValueType();
1810 
1811  // If we can't generate a legal BUILD_VECTOR, exit
1813  return SDValue();
1814 
1815  // Check that we know which element is being inserted
1816  if (!isa<ConstantSDNode>(EltNo))
1817  return SDValue();
1818  unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
1819 
1820  // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1821  // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1822  // vector elements.
1824  if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1825  Ops.append(InVec.getNode()->op_begin(),
1826  InVec.getNode()->op_end());
1827  } else if (InVec.isUndef()) {
1828  unsigned NElts = VT.getVectorNumElements();
1829  Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1830  } else {
1831  return SDValue();
1832  }
1833 
1834  // Insert the element
1835  if (Elt < Ops.size()) {
1836  // All the operands of BUILD_VECTOR must have the same type;
1837  // we enforce that here.
1838  EVT OpVT = Ops[0].getValueType();
1839  if (InVal.getValueType() != OpVT)
1840  InVal = OpVT.bitsGT(InVal.getValueType()) ?
1841  DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1842  DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1843  Ops[Elt] = InVal;
1844  }
1845 
1846  // Return the new vector
1847  return DAG.getBuildVector(VT, DL, Ops);
1848  }
1849 
1850  // Extract_vec (Build_vector) generated by custom lowering
1851  // also needs to be customly combined
1852  case ISD::EXTRACT_VECTOR_ELT: {
1853  SDValue Arg = N->getOperand(0);
1854  if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1855  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1856  unsigned Element = Const->getZExtValue();
1857  return Arg->getOperand(Element);
1858  }
1859  }
1860  if (Arg.getOpcode() == ISD::BITCAST &&
1861  Arg.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
1862  (Arg.getOperand(0).getValueType().getVectorNumElements() ==
1863  Arg.getValueType().getVectorNumElements())) {
1864  if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1865  unsigned Element = Const->getZExtValue();
1866  return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1867  Arg->getOperand(0).getOperand(Element));
1868  }
1869  }
1870  break;
1871  }
1872 
1873  case ISD::SELECT_CC: {
1874  // Try common optimizations
1876  return Ret;
1877 
1878  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1879  // selectcc x, y, a, b, inv(cc)
1880  //
1881  // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1882  // selectcc x, y, a, b, cc
1883  SDValue LHS = N->getOperand(0);
1884  if (LHS.getOpcode() != ISD::SELECT_CC) {
1885  return SDValue();
1886  }
1887 
1888  SDValue RHS = N->getOperand(1);
1889  SDValue True = N->getOperand(2);
1890  SDValue False = N->getOperand(3);
1891  ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1892 
1893  if (LHS.getOperand(2).getNode() != True.getNode() ||
1894  LHS.getOperand(3).getNode() != False.getNode() ||
1895  RHS.getNode() != False.getNode()) {
1896  return SDValue();
1897  }
1898 
1899  switch (NCC) {
1900  default: return SDValue();
1901  case ISD::SETNE: return LHS;
1902  case ISD::SETEQ: {
1903  ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1904  LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1905  if (DCI.isBeforeLegalizeOps() ||
1906  isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1907  return DAG.getSelectCC(DL,
1908  LHS.getOperand(0),
1909  LHS.getOperand(1),
1910  LHS.getOperand(2),
1911  LHS.getOperand(3),
1912  LHSCC);
1913  break;
1914  }
1915  }
1916  return SDValue();
1917  }
1918 
1919  case AMDGPUISD::R600_EXPORT: {
1920  SDValue Arg = N->getOperand(1);
1921  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1922  break;
1923 
1924  SDValue NewArgs[8] = {
1925  N->getOperand(0), // Chain
1926  SDValue(),
1927  N->getOperand(2), // ArrayBase
1928  N->getOperand(3), // Type
1929  N->getOperand(4), // SWZ_X
1930  N->getOperand(5), // SWZ_Y
1931  N->getOperand(6), // SWZ_Z
1932  N->getOperand(7) // SWZ_W
1933  };
1934  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1935  return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1936  }
1937  case AMDGPUISD::TEXTURE_FETCH: {
1938  SDValue Arg = N->getOperand(1);
1939  if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1940  break;
1941 
1942  SDValue NewArgs[19] = {
1943  N->getOperand(0),
1944  N->getOperand(1),
1945  N->getOperand(2),
1946  N->getOperand(3),
1947  N->getOperand(4),
1948  N->getOperand(5),
1949  N->getOperand(6),
1950  N->getOperand(7),
1951  N->getOperand(8),
1952  N->getOperand(9),
1953  N->getOperand(10),
1954  N->getOperand(11),
1955  N->getOperand(12),
1956  N->getOperand(13),
1957  N->getOperand(14),
1958  N->getOperand(15),
1959  N->getOperand(16),
1960  N->getOperand(17),
1961  N->getOperand(18),
1962  };
1963  NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1964  return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1965  }
1966 
1967  case ISD::LOAD: {
1968  LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1969  SDValue Ptr = LoadNode->getBasePtr();
1970  if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1971  isa<ConstantSDNode>(Ptr))
1972  return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1973  break;
1974  }
1975 
1976  default: break;
1977  }
1978 
1980 }
1981 
1982 bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1983  SDValue &Src, SDValue &Neg, SDValue &Abs,
1984  SDValue &Sel, SDValue &Imm,
1985  SelectionDAG &DAG) const {
1986  const R600InstrInfo *TII = Subtarget->getInstrInfo();
1987  if (!Src.isMachineOpcode())
1988  return false;
1989 
1990  switch (Src.getMachineOpcode()) {
1991  case R600::FNEG_R600:
1992  if (!Neg.getNode())
1993  return false;
1994  Src = Src.getOperand(0);
1995  Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1996  return true;
1997  case R600::FABS_R600:
1998  if (!Abs.getNode())
1999  return false;
2000  Src = Src.getOperand(0);
2001  Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
2002  return true;
2003  case R600::CONST_COPY: {
2004  unsigned Opcode = ParentNode->getMachineOpcode();
2005  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2006 
2007  if (!Sel.getNode())
2008  return false;
2009 
2010  SDValue CstOffset = Src.getOperand(0);
2011  if (ParentNode->getValueType(0).isVector())
2012  return false;
2013 
2014  // Gather constants values
2015  int SrcIndices[] = {
2016  TII->getOperandIdx(Opcode, R600::OpName::src0),
2017  TII->getOperandIdx(Opcode, R600::OpName::src1),
2018  TII->getOperandIdx(Opcode, R600::OpName::src2),
2019  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2020  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2021  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2022  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2023  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2024  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2025  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2026  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2027  };
2028  std::vector<unsigned> Consts;
2029  for (int OtherSrcIdx : SrcIndices) {
2030  int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2031  if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2032  continue;
2033  if (HasDst) {
2034  OtherSrcIdx--;
2035  OtherSelIdx--;
2036  }
2037  if (RegisterSDNode *Reg =
2038  dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2039  if (Reg->getReg() == R600::ALU_CONST) {
2040  ConstantSDNode *Cst
2041  = cast<ConstantSDNode>(ParentNode->getOperand(OtherSelIdx));
2042  Consts.push_back(Cst->getZExtValue());
2043  }
2044  }
2045  }
2046 
2047  ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2048  Consts.push_back(Cst->getZExtValue());
2049  if (!TII->fitsConstReadLimitations(Consts)) {
2050  return false;
2051  }
2052 
2053  Sel = CstOffset;
2054  Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2055  return true;
2056  }
2057  case R600::MOV_IMM_GLOBAL_ADDR:
2058  // Check if the Imm slot is used. Taken from below.
2059  if (cast<ConstantSDNode>(Imm)->getZExtValue())
2060  return false;
2061  Imm = Src.getOperand(0);
2062  Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2063  return true;
2064  case R600::MOV_IMM_I32:
2065  case R600::MOV_IMM_F32: {
2066  unsigned ImmReg = R600::ALU_LITERAL_X;
2067  uint64_t ImmValue = 0;
2068 
2069  if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2070  ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2071  float FloatValue = FPC->getValueAPF().convertToFloat();
2072  if (FloatValue == 0.0) {
2073  ImmReg = R600::ZERO;
2074  } else if (FloatValue == 0.5) {
2075  ImmReg = R600::HALF;
2076  } else if (FloatValue == 1.0) {
2077  ImmReg = R600::ONE;
2078  } else {
2079  ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2080  }
2081  } else {
2082  ConstantSDNode *C = cast<ConstantSDNode>(Src.getOperand(0));
2083  uint64_t Value = C->getZExtValue();
2084  if (Value == 0) {
2085  ImmReg = R600::ZERO;
2086  } else if (Value == 1) {
2087  ImmReg = R600::ONE_INT;
2088  } else {
2089  ImmValue = Value;
2090  }
2091  }
2092 
2093  // Check that we aren't already using an immediate.
2094  // XXX: It's possible for an instruction to have more than one
2095  // immediate operand, but this is not supported yet.
2096  if (ImmReg == R600::ALU_LITERAL_X) {
2097  if (!Imm.getNode())
2098  return false;
2099  ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2100  if (C->getZExtValue())
2101  return false;
2102  Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2103  }
2104  Src = DAG.getRegister(ImmReg, MVT::i32);
2105  return true;
2106  }
2107  default:
2108  return false;
2109  }
2110 }
2111 
2112 /// Fold the instructions after selecting them
2113 SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2114  SelectionDAG &DAG) const {
2115  const R600InstrInfo *TII = Subtarget->getInstrInfo();
2116  if (!Node->isMachineOpcode())
2117  return Node;
2118 
2119  unsigned Opcode = Node->getMachineOpcode();
2120  SDValue FakeOp;
2121 
2122  std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2123 
2124  if (Opcode == R600::DOT_4) {
2125  int OperandIdx[] = {
2126  TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2127  TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2128  TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2129  TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2130  TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2131  TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2132  TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2133  TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2134  };
2135  int NegIdx[] = {
2136  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2137  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2138  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2139  TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2140  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2141  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2142  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2143  TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2144  };
2145  int AbsIdx[] = {
2146  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2147  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2148  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2149  TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2150  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2151  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2152  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2153  TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2154  };
2155  for (unsigned i = 0; i < 8; i++) {
2156  if (OperandIdx[i] < 0)
2157  return Node;
2158  SDValue &Src = Ops[OperandIdx[i] - 1];
2159  SDValue &Neg = Ops[NegIdx[i] - 1];
2160  SDValue &Abs = Ops[AbsIdx[i] - 1];
2161  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2162  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2163  if (HasDst)
2164  SelIdx--;
2165  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2166  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2167  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2168  }
2169  } else if (Opcode == R600::REG_SEQUENCE) {
2170  for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2171  SDValue &Src = Ops[i];
2172  if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2173  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2174  }
2175  } else {
2176  if (!TII->hasInstrModifiers(Opcode))
2177  return Node;
2178  int OperandIdx[] = {
2179  TII->getOperandIdx(Opcode, R600::OpName::src0),
2180  TII->getOperandIdx(Opcode, R600::OpName::src1),
2181  TII->getOperandIdx(Opcode, R600::OpName::src2)
2182  };
2183  int NegIdx[] = {
2184  TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2185  TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2186  TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2187  };
2188  int AbsIdx[] = {
2189  TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2190  TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2191  -1
2192  };
2193  for (unsigned i = 0; i < 3; i++) {
2194  if (OperandIdx[i] < 0)
2195  return Node;
2196  SDValue &Src = Ops[OperandIdx[i] - 1];
2197  SDValue &Neg = Ops[NegIdx[i] - 1];
2198  SDValue FakeAbs;
2199  SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2200  bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2201  int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2202  int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2203  if (HasDst) {
2204  SelIdx--;
2205  ImmIdx--;
2206  }
2207  SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2208  SDValue &Imm = Ops[ImmIdx];
2209  if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2210  return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2211  }
2212  }
2213 
2214  return Node;
2215 }
llvm::ISD::SUB
@ SUB
Definition: ISDOpcodes.h:240
llvm::CCValAssign::getLocVT
MVT getLocVT() const
Definition: CallingConvLower.h:153
i
i
Definition: README.txt:29
llvm::ISD::SETUGE
@ SETUGE
Definition: ISDOpcodes.h:1383
llvm::AMDGPUISD::STORE_MSKOR
@ STORE_MSKOR
Definition: AMDGPUISelLowering.h:495
llvm::TargetLowering::scalarizeVectorLoad
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
Definition: TargetLowering.cpp:7398
llvm::ConstantSDNode
Definition: SelectionDAGNodes.h:1558
llvm::RegisterSDNode
Definition: SelectionDAGNodes.h:2117
llvm::APFloat::convertToFloat
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:4908
llvm::StoreSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2350
llvm::TargetLoweringBase::setSchedulingPreference
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
Definition: TargetLowering.h:2167
llvm::ISD::SETLE
@ SETLE
Definition: ISDOpcodes.h:1394
llvm::ISD::INTRINSIC_VOID
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
llvm::ISD::SETO
@ SETO
Definition: ISDOpcodes.h:1379
MI
IRTranslator LLVM IR MI
Definition: IRTranslator.cpp:105
llvm::MachineInstrBuilder::addImm
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
Definition: MachineInstrBuilder.h:131
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AllocatorList.h:23
llvm::AMDGPUAS::CONSTANT_BUFFER_8
@ CONSTANT_BUFFER_8
Definition: AMDGPU.h:389
Reg
unsigned Reg
Definition: MachineSink.cpp:1563
llvm::SDNode::getValueType
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
Definition: SelectionDAGNodes.h:968
llvm::drop_begin
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:321
llvm::SDLoc
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Definition: SelectionDAGNodes.h:1088
llvm::TargetLoweringBase::Legal
@ Legal
Definition: TargetLowering.h:196
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::ISD::OR
@ OR
Definition: ISDOpcodes.h:633
llvm::TargetLowering::EmitInstrWithCustomInserter
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: SelectionDAGISel.cpp:295
llvm::ISD::BITCAST
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:852
llvm::ISD::SETNE
@ SETNE
Definition: ISDOpcodes.h:1395
Insert
Vector Rotate Left Mask Mask Insert
Definition: README_P9.txt:112
llvm::AMDGPUTargetLowering
Definition: AMDGPUISelLowering.h:27
llvm::ISD::NON_EXTLOAD
@ NON_EXTLOAD
Definition: ISDOpcodes.h:1350
llvm::CCState
CCState - This class holds information needed while lowering arguments and return values.
Definition: CallingConvLower.h:191
llvm::MachineOperand::getGlobal
const GlobalValue * getGlobal() const
Definition: MachineOperand.h:563
llvm::MachineRegisterInfo
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Definition: MachineRegisterInfo.h:52
llvm::SDValue::getNode
SDNode * getNode() const
get the SDNode which holds the desired result
Definition: SelectionDAGNodes.h:151
llvm::MachineInstrBuilder::add
const MachineInstrBuilder & add(const MachineOperand &MO) const
Definition: MachineInstrBuilder.h:224
llvm::ISD::UADDO
@ UADDO
Definition: ISDOpcodes.h:311
llvm::ISD::CTTZ_ZERO_UNDEF
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:674
llvm::SelectionDAG::getValueType
SDValue getValueType(EVT)
Definition: SelectionDAG.cpp:1762
llvm::AMDGPUAS::CONSTANT_BUFFER_6
@ CONSTANT_BUFFER_6
Definition: AMDGPU.h:387
llvm::PointerType::get
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
Definition: Type.cpp:729
llvm::ISD::ADDC
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
llvm::R600FrameLowering
Definition: R600FrameLowering.h:16
llvm::SmallVector
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1177
llvm::AMDGPUISD::CONST_ADDRESS
@ CONST_ADDRESS
Definition: AMDGPUISelLowering.h:447
llvm::ISD::SETEQ
@ SETEQ
Definition: ISDOpcodes.h:1390
llvm::SelectionDAG::getVTList
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
Definition: SelectionDAG.cpp:8543
llvm::MVT::v2i1
@ v2i1
Definition: MachineValueType.h:65
llvm::AMDGPUAS::CONSTANT_BUFFER_11
@ CONSTANT_BUFFER_11
Definition: AMDGPU.h:392
llvm::R600Subtarget::getInstrInfo
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:50
llvm::CallingConv::AMDGPU_PS
@ AMDGPU_PS
Calling convention used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:210
llvm::MipsISD::Lo
@ Lo
Definition: MipsISelLowering.h:79
llvm::MachineSDNode
An SDNode that represents everything that will be needed to construct a MachineInstr.
Definition: SelectionDAGNodes.h:2765
llvm::MachineMemOperand::MOInvariant
@ MOInvariant
The memory access always returns the same value (or traps).
Definition: MachineMemOperand.h:145
llvm::SelectionDAG::getVectorIdxConstant
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1548
llvm::AMDGPUAS::CONSTANT_BUFFER_9
@ CONSTANT_BUFFER_9
Definition: AMDGPU.h:390
llvm::MemSDNode::getMemoryVT
EVT getMemoryVT() const
Return the type of the in-memory value.
Definition: SelectionDAGNodes.h:1338
llvm::MemSDNode::getChain
const SDValue & getChain() const
Definition: SelectionDAGNodes.h:1361
llvm::ISD::ANY_EXTEND
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:732
llvm::SDNode
Represents one node in the SelectionDAG.
Definition: SelectionDAGNodes.h:454
llvm::ISD::FMA
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:466
llvm::ISD::FP_TO_SINT
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:785
llvm::TargetLowering::DAGCombinerInfo::DAG
SelectionDAG & DAG
Definition: TargetLowering.h:3645
llvm::LoadSDNode
This class is used to represent ISD::LOAD nodes.
Definition: SelectionDAGNodes.h:2300
llvm::ISD::SETULE
@ SETULE
Definition: ISDOpcodes.h:1385
llvm::CCState::AnalyzeFormalArguments
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
Definition: CallingConvLower.cpp:90
isEOP
static bool isEOP(MachineBasicBlock::iterator I)
Definition: R600ISelLowering.cpp:258
llvm::NVPTXISD::RETURN
@ RETURN
Definition: NVPTXISelLowering.h:49
llvm::ISD::SHL_PARTS
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:715
llvm::SelectionDAG::getZeroExtendInReg
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
Definition: SelectionDAG.cpp:1358
llvm::SelectionDAG::getStore
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
Definition: SelectionDAG.cpp:7538
llvm::MachineMemOperand::MODereferenceable
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
Definition: MachineMemOperand.h:143
llvm::AMDGPUISD::CARRY
@ CARRY
Definition: AMDGPUISelLowering.h:427
llvm::R600Subtarget::hasBCNT
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:92
llvm::ISD::SETCC
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:702
llvm::AMDGPUAS::CONSTANT_BUFFER_2
@ CONSTANT_BUFFER_2
Definition: AMDGPU.h:383
llvm::R600Subtarget::hasCARRY
bool hasCARRY() const
Definition: R600Subtarget.h:103
Offset
uint64_t Offset
Definition: ELFObjHandler.cpp:80
llvm::RegState::Kill
@ Kill
The last use of a register.
Definition: MachineInstrBuilder.h:48
Results
Function Alias Analysis Results
Definition: AliasAnalysis.cpp:849
llvm::ISD::MERGE_VALUES
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
llvm::SPII::Store
@ Store
Definition: SparcInstrInfo.h:33
llvm::AMDGPUTargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: AMDGPUISelLowering.cpp:1316
llvm::MipsISD::Ret
@ Ret
Definition: MipsISelLowering.h:116
llvm::ISD::EXTLOAD
@ EXTLOAD
Definition: ISDOpcodes.h:1350
llvm::LSBaseSDNode::isIndexed
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
Definition: SelectionDAGNodes.h:2288
RHS
Value * RHS
Definition: X86PartialReduction.cpp:74
llvm::R600Subtarget::hasFFBL
bool hasFFBL() const
Definition: R600Subtarget.h:111
llvm::EVT::bitsLE
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:281
llvm::BitmaskEnumDetail::Mask
std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::MachineOperand::getOffset
int64_t getOffset() const
Return the offset from the symbol in this operand.
Definition: MachineOperand.h:600
R600ISelLowering.h
llvm::AMDGPUAS::CONSTANT_BUFFER_1
@ CONSTANT_BUFFER_1
Definition: AMDGPU.h:382
llvm::ISD::SETUEQ
@ SETUEQ
Definition: ISDOpcodes.h:1381
llvm::TargetLowering::expandUnalignedStore
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
Definition: TargetLowering.cpp:7717
llvm::SelectionDAG::getContext
LLVMContext * getContext() const
Definition: SelectionDAG.h:447
llvm::R600MachineFunctionInfo
Definition: R600MachineFunctionInfo.h:19
llvm::ISD::BRCOND
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:960
llvm::SelectionDAG::getRegister
SDValue getRegister(unsigned Reg, EVT VT)
Definition: SelectionDAG.cpp:2025
llvm::MipsISD::Hi
@ Hi
Definition: MipsISelLowering.h:75
isUndef
static bool isUndef(ArrayRef< int > Mask)
Definition: HexagonISelDAGToDAGHVX.cpp:909
llvm::EVT::isSimple
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:130
llvm::SDNode::op_end
op_iterator op_end() const
Definition: SelectionDAGNodes.h:914
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:185
llvm::MVT::integer_valuetypes
static auto integer_valuetypes()
Definition: MachineValueType.h:1416
llvm::ISD::FFLOOR
@ FFLOOR
Definition: ISDOpcodes.h:889
llvm::ISD::LoadExtType
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1350
llvm::R600TargetLowering::LowerFormalArguments
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
Definition: R600ISelLowering.cpp:1491
llvm::R600Subtarget::hasBFE
bool hasBFE() const
Definition: R600Subtarget.h:84
LHS
Value * LHS
Definition: X86PartialReduction.cpp:73
llvm::ISD::BR_CC
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:967
x3
In x86 we generate this spiffy xmm0 xmm0 ret in x86 we generate this which could be xmm1 movss xmm1 xmm0 ret In sse4 we could use insertps to make both better Here s another testcase that could use x3
Definition: README-SSE.txt:547
llvm::SelectionDAG::getLoad
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
Definition: SelectionDAG.cpp:7488
llvm::MVT::i1
@ i1
Definition: MachineValueType.h:43
llvm::SDNode::getOpcode
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Definition: SelectionDAGNodes.h:628
llvm::EVT::getStoreSize
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:363
llvm::R600Subtarget::hasFMA
bool hasFMA() const
Definition: R600Subtarget.h:119
llvm::MinAlign
constexpr uint64_t MinAlign(uint64_t A, uint64_t B)
A and B are either alignments or offsets.
Definition: MathExtras.h:672
llvm::MachineFunction::getRegInfo
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Definition: MachineFunction.h:651
llvm::ISD::GlobalAddress
@ GlobalAddress
Definition: ISDOpcodes.h:78
llvm::ISD::SELECT_CC
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:694
llvm::SDValue::getValueType
EVT getValueType() const
Return the ValueType of the referenced return value.
Definition: SelectionDAGNodes.h:1123
llvm::CCValAssign
CCValAssign - Represent assignment of one arg/retval to a location.
Definition: CallingConvLower.h:33
llvm::AMDGPU::isShader
bool isShader(CallingConv::ID cc)
Definition: AMDGPUBaseInfo.cpp:1369
llvm::SelectionDAG
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:216
llvm::ISD::SELECT
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:679
llvm::CallingConv::Fast
@ Fast
Fast - This calling convention attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:42
llvm::TargetLowering::DAGCombinerInfo::isBeforeLegalizeOps
bool isBeforeLegalizeOps() const
Definition: TargetLowering.h:3651
llvm::AMDGPUAS::CONSTANT_BUFFER_7
@ CONSTANT_BUFFER_7
Definition: AMDGPU.h:388
f
Itanium Name Demangler i e convert the string _Z1fv into f()". You can also use the CRTP base ManglingParser to perform some simple analysis on the mangled name
llvm::SelectionDAG::getTruncStore
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7589
llvm::SmallVectorImpl::append
void append(in_iter in_start, in_iter in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:657
llvm::CCValAssign::getLocReg
Register getLocReg() const
Definition: CallingConvLower.h:150
llvm::MachineFunction::getInfo
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Definition: MachineFunction.h:739
MO_FLAG_PUSH
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
llvm::SelectionDAG::getUNDEF
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
Definition: SelectionDAG.h:951
llvm::ISD::SIGN_EXTEND_INREG
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:747
llvm::AMDGPUISD::R600_EXPORT
@ R600_EXPORT
Definition: AMDGPUISelLowering.h:446
llvm::MachineMemOperand::getValue
const Value * getValue() const
Return the base address of the memory access.
Definition: MachineMemOperand.h:211
llvm::EVT
Extended Value Type.
Definition: ValueTypes.h:35
C
(vector float) vec_cmpeq(*A, *B) C
Definition: README_ALTIVEC.txt:86
llvm::TargetLoweringBase::isCondCodeLegal
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
Definition: TargetLowering.h:1394
llvm::MVT::f64
@ f64
Definition: MachineValueType.h:56
llvm::AMDGPUAS::CONSTANT_BUFFER_10
@ CONSTANT_BUFFER_10
Definition: AMDGPU.h:391
llvm::SelectionDAG::getConstant
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1413
llvm::CallingConv::AMDGPU_GS
@ AMDGPU_GS
Calling convention used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:207
llvm::EVT::getVectorNumElements
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:309
llvm::TargetLowering::DAGCombinerInfo
Definition: TargetLowering.h:3639
getOpcode
static Optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:199
llvm::ms_demangle::QualifierMangleMode::Result
@ Result
llvm::ISD::TRUNCATE
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:735
llvm::TargetLoweringBase::ZeroOrNegativeOneBooleanContent
@ ZeroOrNegativeOneBooleanContent
Definition: TargetLowering.h:233
llvm::AMDGPUISD::DOT4
@ DOT4
Definition: AMDGPUISelLowering.h:426
llvm::SPII::Load
@ Load
Definition: SparcInstrInfo.h:32
llvm::ISD::UDIVREM
@ UDIVREM
Definition: ISDOpcodes.h:256
TII
const HexagonInstrInfo * TII
Definition: HexagonCopyToCombine.cpp:127
llvm::TargetLoweringBase::addRegisterClass
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
Definition: TargetLowering.h:2214
llvm::MachineOperand
MachineOperand class - Representation of each machine instruction operand.
Definition: MachineOperand.h:49
MO_FLAG_MASK
#define MO_FLAG_MASK
Definition: R600Defines.h:17
llvm::report_fatal_error
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:143
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1467
llvm::ISD::FRINT
@ FRINT
Definition: ISDOpcodes.h:885
llvm::codeview::EncodedFramePtrReg::BasePtr
@ BasePtr
llvm::ISD::ATOMIC_STORE
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1122
llvm::FrameIndexSDNode
Definition: SelectionDAGNodes.h:1746
llvm::R600TargetLowering::CCAssignFnForCall
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
Definition: R600ISelLowering.cpp:1466
llvm::SelectionDAG::getMemIntrinsicNode
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
Definition: SelectionDAG.cpp:7265
R600InstrInfo.h
llvm::R600TargetLowering::canMergeStoresTo
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
Definition: R600ISelLowering.cpp:1568
llvm::R600TargetLowering::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, bool *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
Definition: R600ISelLowering.cpp:1577
llvm::ISD::AND
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:632
llvm::ISD::SETOLT
@ SETOLT
Definition: ISDOpcodes.h:1376
Align
uint64_t Align
Definition: ELFObjHandler.cpp:82
llvm::GlobalAddressSDNode::getGlobal
const GlobalValue * getGlobal() const
Definition: SelectionDAGNodes.h:1732
llvm::APFloat::bitcastToAPInt
APInt bitcastToAPInt() const
Definition: APFloat.h:1130
llvm::CCValAssign::getLocMemOffset
unsigned getLocMemOffset() const
Definition: CallingConvLower.h:151
llvm::ISD::UNINDEXED
@ UNINDEXED
Definition: ISDOpcodes.h:1319
llvm::TargetLoweringBase::isOperationLegal
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
Definition: TargetLowering.h:1248
llvm::Align
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
llvm::TargetLoweringBase::setBooleanVectorContents
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
Definition: TargetLowering.h:2162
llvm::EVT::changeVectorElementTypeToInteger
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
llvm::AddressSpace
AddressSpace
Definition: NVPTXBaseInfo.h:21
llvm::MachineRegisterInfo::getVRegDef
MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
Definition: MachineRegisterInfo.cpp:398
llvm::EVT::bitsLT
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:273
llvm::AMDGPUTargetLowering::LowerGlobalAddress
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:1333
llvm::MVT::v4i16
@ v4i16
Definition: MachineValueType.h:91
llvm::EVT::getTypeForEVT
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:181
llvm::MachineRegisterInfo::use_empty
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
Definition: MachineRegisterInfo.h:506
llvm::ISD::USUBO
@ USUBO
Definition: ISDOpcodes.h:315
llvm::MVT::v4i8
@ v4i8
Definition: MachineValueType.h:78
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
ConstantAddressBlock
static int ConstantAddressBlock(unsigned AddressSpace)
Definition: R600ISelLowering.cpp:1258
llvm::StoreSDNode::isTruncatingStore
bool isTruncatingStore() const
Return true if the op does a truncation before store.
Definition: SelectionDAGNodes.h:2344
llvm::ISD::SETOLE
@ SETOLE
Definition: ISDOpcodes.h:1377
llvm::MachineBasicBlock
Definition: MachineBasicBlock.h:95
llvm::ISD::SETUGT
@ SETUGT
Definition: ISDOpcodes.h:1382
llvm::SelectionDAG::getTargetGlobalAddress
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:683
R600MachineFunctionInfo.h
llvm::CCAssignFn
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
Definition: CallingConvLower.h:177
llvm::ISD::FADD
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:377
llvm::MVT::v2i8
@ v2i8
Definition: MachineValueType.h:77
llvm::AMDGPUISD::BUILD_VERTICAL_VECTOR
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
Definition: AMDGPUISelLowering.h:481
llvm::ISD::SETUNE
@ SETUNE
Definition: ISDOpcodes.h:1386
llvm::Register::isVirtual
bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
R600MCTargetDesc.h
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
llvm::AMDGPUAS::PRIVATE_ADDRESS
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:364
llvm::TargetLoweringBase::setOperationAction
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
Definition: TargetLowering.h:2231
llvm::MVT::v32i16
@ v32i16
Definition: MachineValueType.h:94
llvm::pdb::OMFSegDescFlags::Read
@ Read
llvm::AMDGPUTargetLowering::LowerUDIVREM64
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
Definition: AMDGPUISelLowering.cpp:1805
llvm::R600Subtarget
Definition: R600Subtarget.h:29
llvm::GlobalValue
Definition: GlobalValue.h:44
llvm::MipsISD::Ext
@ Ext
Definition: MipsISelLowering.h:156
llvm::AMDGPUTargetLowering::LowerSDIVREM
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
Definition: AMDGPUISelLowering.cpp:2076
llvm::CallingConv::AMDGPU_CS
@ AMDGPU_CS
Calling convention used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:213
llvm::MVT::v16i8
@ v16i8
Definition: MachineValueType.h:80
llvm::R600Subtarget::hasBORROW
bool hasBORROW() const
Definition: R600Subtarget.h:99
llvm::AMDGPUISD::BORROW
@ BORROW
Definition: AMDGPUISelLowering.h:428
llvm::EVT::getSizeInBits
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:341
llvm::ISD::FMAD
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:470
llvm::MachineOperand::getTargetFlags
unsigned getTargetFlags() const
Definition: MachineOperand.h:221
llvm::DenseMapBase::clear
void clear()
Definition: DenseMap.h:111
llvm::TargetLowering::expandFP_TO_SINT
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
Definition: TargetLowering.cpp:6771
llvm::MVT::v16i16
@ v16i16
Definition: MachineValueType.h:93
Index
uint32_t Index
Definition: ELFObjHandler.cpp:83
llvm::MachineInstr
Representation of each machine instruction.
Definition: MachineInstr.h:64
llvm::MachineInstrBuilder
Definition: MachineInstrBuilder.h:69
llvm::TargetLoweringBase::setHasExtractBitsInsn
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
Definition: TargetLowering.h:2197
uint64_t
llvm::ISD::FP_TO_UINT
@ FP_TO_UINT
Definition: ISDOpcodes.h:786
llvm::ConstantFPSDNode
Definition: SelectionDAGNodes.h:1607
llvm::R600FrameLowering::getFrameIndexReference
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
Definition: R600FrameLowering.cpp:18
llvm::MemSDNode::getMemOperand
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
Definition: SelectionDAGNodes.h:1342
llvm::ISD::LOAD
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:925
llvm::SelectionDAG::getIntPtrConstant
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
Definition: SelectionDAG.cpp:1536
llvm::TargetLoweringBase::Promote
@ Promote
Definition: TargetLowering.h:197
llvm::ConstantPointerNull::get
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1768
llvm::AMDGPUAS::CONSTANT_BUFFER_13
@ CONSTANT_BUFFER_13
Definition: AMDGPU.h:394
llvm::MachinePointerInfo
This class contains a discriminated union of information about pointers in memory operands,...
Definition: MachineMemOperand.h:38
llvm::AMDGPUAS::CONSTANT_BUFFER_14
@ CONSTANT_BUFFER_14
Definition: AMDGPU.h:395
llvm::LLVMContext
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
llvm::numbers::e
constexpr double e
Definition: MathExtras.h:57
llvm::DenseMap< unsigned, unsigned >
llvm::SelectionDAG::getCopyFromReg
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:761
llvm::ISD::EXTRACT_VECTOR_ELT
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:511
llvm::R600Subtarget::hasBFI
bool hasBFI() const
Definition: R600Subtarget.h:88
llvm::ISD::CTLZ_ZERO_UNDEF
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:675
llvm::SDNode::getOperand
const SDValue & getOperand(unsigned Num) const
Definition: SelectionDAGNodes.h:906
I
#define I(x, y, z)
Definition: MD5.cpp:58
llvm::FrameIndexSDNode::getIndex
int getIndex() const
Definition: SelectionDAGNodes.h:1757
llvm::SelectionDAG::getNode
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
Definition: SelectionDAG.cpp:8308
llvm::SelectionDAG::getNOT
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
Definition: SelectionDAG.cpp:1389
llvm::R600::getLDSNoRetOp
int getLDSNoRetOp(uint16_t Opcode)
llvm::ISD::FCOPYSIGN
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:476
llvm::LoadSDNode::getExtensionType
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Definition: SelectionDAGNodes.h:2315
llvm::PointerType
Class to represent pointers.
Definition: DerivedTypes.h:632
llvm::TargetLowering::scalarizeVectorStore
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
Definition: TargetLowering.cpp:7488
llvm::MVT::v4f32
@ v4f32
Definition: MachineValueType.h:157
llvm::DenseMapBase::find
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:150
llvm::MachineMemOperand::Flags
Flags
Flags values. These may be or'd together.
Definition: MachineMemOperand.h:131
llvm::ISD::InputArg
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
Definition: TargetCallingConv.h:195
llvm::StoreSDNode
This class is used to represent ISD::STORE nodes.
Definition: SelectionDAGNodes.h:2328
llvm::ISD::ZEXTLOAD
@ ZEXTLOAD
Definition: ISDOpcodes.h:1350
llvm::SDValue::getValue
SDValue getValue(unsigned R) const
Definition: SelectionDAGNodes.h:171
llvm::MVT::i8
@ i8
Definition: MachineValueType.h:44
llvm::R600Subtarget::hasFFBH
bool hasFFBH() const
Definition: R600Subtarget.h:115
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
llvm::TargetMachine
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:80
llvm::MVT::Other
@ Other
Definition: MachineValueType.h:42
llvm::AMDGPUISD::SIN_HW
@ SIN_HW
Definition: AMDGPUISelLowering.h:394
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:840
llvm::ISD::SETULT
@ SETULT
Definition: ISDOpcodes.h:1384
llvm::ConstantSDNode::getZExtValue
uint64_t getZExtValue() const
Definition: SelectionDAGNodes.h:1573
llvm::TargetLowering::expandShiftParts
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Definition: TargetLowering.cpp:6719
llvm::AMDGPUISD::TEXTURE_FETCH
@ TEXTURE_FETCH
Definition: AMDGPUISelLowering.h:445
llvm::AMDGPUISD::RSQ
@ RSQ
Definition: AMDGPUISelLowering.h:419
llvm::MachineMemOperand::MONonTemporal
@ MONonTemporal
The memory access is non-temporal.
Definition: MachineMemOperand.h:141
llvm::SelectionDAG::getSelectCC
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
Definition: SelectionDAG.h:1087
llvm::ISD::CondCode
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1370
MO_FLAG_NEG
#define MO_FLAG_NEG
Definition: R600Defines.h:15
MO_FLAG_ABS
#define MO_FLAG_ABS
Definition: R600Defines.h:16
llvm::AMDGPUSubtarget::R700
@ R700
Definition: AMDGPUSubtarget.h:34
llvm::SelectionDAG::getMachineNode
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
Definition: SelectionDAG.cpp:8981
llvm::MVT
Machine Value Type.
Definition: MachineValueType.h:31
llvm::ISD::isNON_EXTLoad
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
Definition: SelectionDAGNodes.h:2920
llvm::MachineInstrBuilder::addReg
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Definition: MachineInstrBuilder.h:97
llvm::StoreSDNode::getValue
const SDValue & getValue() const
Definition: SelectionDAGNodes.h:2349
llvm::AfterLegalizeVectorOps
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
llvm::ISD::SRA_PARTS
@ SRA_PARTS
Definition: ISDOpcodes.h:716
llvm::AMDGPUTargetLowering::combineFMinMaxLegacy
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
Definition: AMDGPUISelLowering.cpp:1418
llvm::MachineFunction
Definition: MachineFunction.h:241
llvm::AMDGPUAS::CONSTANT_BUFFER_4
@ CONSTANT_BUFFER_4
Definition: AMDGPU.h:385
llvm::R600Subtarget::getFrameLowering
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:52
llvm::MachineFunction::addLiveIn
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Definition: MachineFunction.cpp:658
llvm::MemSDNode::getAAInfo
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
Definition: SelectionDAGNodes.h:1306
llvm::AMDGPUTargetLowering::getImplicitParameterOffset
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
Definition: AMDGPUISelLowering.cpp:4366
llvm::Sched::Source
@ Source
Definition: TargetLowering.h:99
llvm::EVT::isVector
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:155
llvm::AMDGPUAS::GLOBAL_ADDRESS
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
Definition: AMDGPU.h:359
llvm::CallingConv::AMDGPU_HS
@ AMDGPU_HS
Calling convention used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:223
llvm::MVT::i64
@ i64
Definition: MachineValueType.h:47
Cond
SmallVector< MachineOperand, 4 > Cond
Definition: BasicBlockSections.cpp:179
llvm::MVT::v2i32
@ v2i32
Definition: MachineValueType.h:101
llvm::EVT::getScalarSizeInBits
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:353
AMDGPU.h
llvm_unreachable
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
Definition: ErrorHandling.h:134
llvm::SelectionDAG::ReplaceAllUsesOfValueWith
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
Definition: SelectionDAG.cpp:9569
llvm::MVT::v2f32
@ v2f32
Definition: MachineValueType.h:155
uint32_t
llvm::StackOffset
StackOffset is a class to represent an offset with 2 dimensions, named fixed and scalable,...
Definition: TypeSize.h:134
llvm::AMDGPUTargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: AMDGPUISelLowering.cpp:1274
llvm::MinMax
Definition: AssumeBundleQueries.h:71
llvm::SDValue::getOperand
const SDValue & getOperand(unsigned i) const
Definition: SelectionDAGNodes.h:1131
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::CallingConv::AMDGPU_ES
@ AMDGPU_ES
Calling convention used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:236
llvm::TargetLoweringBase::setCondCodeAction
void setCondCodeAction(ISD::CondCode CC, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
Definition: TargetLowering.h:2295
llvm::TargetLoweringBase::setTruncStoreAction
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
Definition: TargetLowering.h:2250
llvm::ISD::getSetCCInverse
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
Definition: SelectionDAG.cpp:517
llvm::MVT::v4i32
@ v4i32
Definition: MachineValueType.h:103
LLVM_FALLTHROUGH
#define LLVM_FALLTHROUGH
LLVM_FALLTHROUGH - Mark fallthrough cases in switch statements.
Definition: Compiler.h:290
llvm::CallingConv::C
@ C
C - The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
llvm::ISD::FMUL
@ FMUL
Definition: ISDOpcodes.h:379
llvm::MVT::v32i32
@ v32i32
Definition: MachineValueType.h:109
llvm::ISD::SEXTLOAD
@ SEXTLOAD
Definition: ISDOpcodes.h:1350
llvm::SelectionDAG::getBuildVector
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:790
llvm::SelectionDAG::getConstantFP
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
Definition: SelectionDAG.cpp:1592
MRI
unsigned const MachineRegisterInfo * MRI
Definition: AArch64AdvSIMDScalarPass.cpp:105
llvm::Register
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
llvm::ISD::INTRINSIC_WO_CHAIN
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
llvm::LoadSDNode::getBasePtr
const SDValue & getBasePtr() const
Definition: SelectionDAGNodes.h:2319
llvm::DenseMapBase::empty
LLVM_NODISCARD bool empty() const
Definition: DenseMap.h:97
llvm::MVT::v16i32
@ v16i32
Definition: MachineValueType.h:108
llvm::ISD::FrameIndex
@ FrameIndex
Definition: ISDOpcodes.h:80
llvm::isInt< 16 >
constexpr bool isInt< 16 >(int64_t x)
Definition: MathExtras.h:370
llvm::AMDGPUTargetLowering::CreateLiveInRegisterRaw
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
Definition: AMDGPUISelLowering.h:297
j
return j(j<< 16)
llvm::ISD::SETLT
@ SETLT
Definition: ISDOpcodes.h:1393
llvm::isAllOnesConstant
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
Definition: SelectionDAG.cpp:9929
llvm::AMDGPUTargetLowering::analyzeFormalArgumentsCompute
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
Definition: AMDGPUISelLowering.cpp:1053
CompactSwizzlableVector
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1596
llvm::R600TargetLowering::LowerOperation
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Definition: R600ISelLowering.cpp:449
llvm::TargetLoweringBase::setLoadExtAction
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
Definition: TargetLowering.h:2238
llvm::GlobalAddressSDNode
Definition: SelectionDAGNodes.h:1720
llvm::AMDGPUISD::RSQ_CLAMP
@ RSQ_CLAMP
Definition: AMDGPUISelLowering.h:423
llvm::isNullConstant
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
Definition: SelectionDAG.cpp:9919
llvm::EVT::getScalarType
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:296
llvm::DenseMapBase::end
iterator end()
Definition: DenseMap.h:83
llvm::AMDGPU::SendMsg::Op
Op
Definition: SIDefines.h:325
llvm::CallingConv::Cold
@ Cold
Definition: CallingConv.h:48
llvm::MemSDNode::getAddressSpace
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Definition: SelectionDAGNodes.h:1349
llvm::MemSDNode::getAlignment
unsigned getAlignment() const
Definition: SelectionDAGNodes.h:1276
llvm::ISD::FCOS
@ FCOS
Definition: ISDOpcodes.h:875
llvm::SelectionDAG::getEntryNode
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:516
llvm::ISD::FCEIL
@ FCEIL
Definition: ISDOpcodes.h:883
llvm::ISD::getSetCCSwappedOperands
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
Definition: SelectionDAG.cpp:494
llvm::R600TargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: R600ISelLowering.cpp:1755
llvm::ISD::FSIN
@ FSIN
Definition: ISDOpcodes.h:874
llvm::SelectionDAG::getDataLayout
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:440
llvm::MVT::v8i16
@ v8i16
Definition: MachineValueType.h:92
llvm::ISD::BUILD_VECTOR
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:491
llvm::TargetLoweringBase::Custom
@ Custom
Definition: TargetLowering.h:200
llvm::R600Subtarget::getRegisterInfo
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:60
llvm::ISD::SUBC
@ SUBC
Definition: ISDOpcodes.h:270
R600Subtarget.h
llvm::MVT::i32
@ i32
Definition: MachineValueType.h:46
llvm::TargetStackID::Value
Value
Definition: TargetFrameLowering.h:27
llvm::AMDGPUAS::CONSTANT_BUFFER_0
@ CONSTANT_BUFFER_0
Definition: AMDGPU.h:381
llvm::ISD::SETUO
@ SETUO
Definition: ISDOpcodes.h:1380
llvm::AMDGPUAS::CONSTANT_BUFFER_15
@ CONSTANT_BUFFER_15
Definition: AMDGPU.h:396
llvm::SDValue
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
Definition: SelectionDAGNodes.h:137
llvm::R600InstrInfo
Definition: R600InstrInfo.h:38
llvm::ISD::STORE
@ STORE
Definition: ISDOpcodes.h:926
llvm::MVT::v8i32
@ v8i32
Definition: MachineValueType.h:107
llvm::ISD::SRL_PARTS
@ SRL_PARTS
Definition: ISDOpcodes.h:717
llvm::R600TargetLowering::getSetCCResultType
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
Definition: R600ISelLowering.cpp:1561
llvm::AMDGPUISD::BRANCH_COND
@ BRANCH_COND
Definition: AMDGPUISelLowering.h:348
llvm::ISD::UINT_TO_FP
@ UINT_TO_FP
Definition: ISDOpcodes.h:740
llvm::ISD::ADD
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
llvm::CallingConv::SPIR_KERNEL
@ SPIR_KERNEL
SPIR_KERNEL - Calling convention for SPIR kernel functions.
Definition: CallingConv.h:152
llvm::SDValue::isUndef
bool isUndef() const
Definition: SelectionDAGNodes.h:1159
llvm::SDNode::getMachineOpcode
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
Definition: SelectionDAGNodes.h:688
Vector
So we should use XX3Form_Rcr to implement instrinsic Convert DP outs ins xscvdpsp No builtin are required Round &Convert QP DP(dword[1] is set to zero) No builtin are required Round to Quad Precision because you need to assign rounding mode in instruction Provide builtin(set f128:$vT,(int_ppc_vsx_xsrqpi f128:$vB))(set f128 yields< n x< ty > >< result > yields< ty >< result > No builtin are required Load Store Vector
Definition: README_P9.txt:497
llvm::makeArrayRef
ArrayRef< T > makeArrayRef(const T &OneElt)
Construct an ArrayRef from a single element.
Definition: ArrayRef.h:474
llvm::AMDGPUTargetLowering::PerformDAGCombine
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
Definition: AMDGPUISelLowering.cpp:4063
llvm::EVT::getVectorElementType
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:301
llvm::MemSDNode::getAlign
Align getAlign() const
Definition: SelectionDAGNodes.h:1274
llvm::TargetLoweringBase::setBooleanContents
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
Definition: TargetLowering.h:2148
llvm::SDNode::op_begin
op_iterator op_begin() const
Definition: SelectionDAGNodes.h:913
llvm::AMDGPUISD::FRACT
@ FRACT
Definition: AMDGPUISelLowering.h:374
llvm::ISD::FSUB
@ FSUB
Definition: ISDOpcodes.h:378
llvm::ISD::SHL
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:657
R600Defines.h
llvm::TargetLoweringBase::Expand
@ Expand
Definition: TargetLowering.h:198
llvm::TargetLoweringBase::setTargetDAGCombine
void setTargetDAGCombine(ISD::NodeType NT)
Targets should invoke this method for each target independent node that they want to provide a custom...
Definition: TargetLowering.h:2326
llvm::BuildMI
MachineInstrBuilder BuildMI(MachineFunction &MF, const DebugLoc &DL, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
Definition: MachineInstrBuilder.h:328
N
#define N
llvm::TargetLoweringBase::computeRegisterProperties
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
Definition: TargetLoweringBase.cpp:1293
llvm::ISD::SUBE
@ SUBE
Definition: ISDOpcodes.h:280
llvm::ISD::SRL
@ SRL
Definition: ISDOpcodes.h:659
llvm::GlobalAddressSDNode::getAddressSpace
unsigned getAddressSpace() const
Definition: SelectionDAG.cpp:10685
llvm::AMDGPUAS::CONSTANT_BUFFER_12
@ CONSTANT_BUFFER_12
Definition: AMDGPU.h:393
llvm::R600TargetLowering::ReplaceNodeResults
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
Definition: R600ISelLowering.cpp:632
llvm::AMDGPUISD::COS_HW
@ COS_HW
Definition: AMDGPUISelLowering.h:393
llvm::MipsISD::Ins
@ Ins
Definition: MipsISelLowering.h:157
llvm::SmallVectorImpl
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: APFloat.h:43
llvm::AMDGPUAS::CONSTANT_BUFFER_3
@ CONSTANT_BUFFER_3
Definition: AMDGPU.h:384
llvm::CallingConv::AMDGPU_KERNEL
@ AMDGPU_KERNEL
Calling convention for AMDGPU code object kernels.
Definition: CallingConv.h:216
llvm::SDValue::getOpcode
unsigned getOpcode() const
Definition: SelectionDAGNodes.h:1119
llvm::CallingConv::AMDGPU_VS
@ AMDGPU_VS
Calling convention used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (ve...
Definition: CallingConv.h:204
llvm::SelectionDAG::getTargetConstant
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:637
llvm::numbers::pif
constexpr float pif
Definition: MathExtras.h:78
TM
const char LLVMTargetMachineRef TM
Definition: PassBuilderBindings.cpp:47
llvm::ISD::SETONE
@ SETONE
Definition: ISDOpcodes.h:1378
llvm::MVT::v32i8
@ v32i8
Definition: MachineValueType.h:81
llvm::EVT::bitsGT
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:257
llvm::AMDGPUAS::PARAM_I_ADDRESS
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
Definition: AMDGPU.h:373
llvm::MVT::i16
@ i16
Definition: MachineValueType.h:45
llvm::AMDGPUAS::CONSTANT_BUFFER_5
@ CONSTANT_BUFFER_5
Definition: AMDGPU.h:386
llvm::ISD::FNEG
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:870
llvm::SelectionDAG::getMachineFunction
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:437
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
llvm::R600TargetLowering::EmitInstrWithCustomInserter
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
Definition: R600ISelLowering.cpp:265
llvm::ISD::SDIVREM
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
llvm::AMDGPUISD::DWORDADDR
@ DWORDADDR
Definition: AMDGPUISelLowering.h:373
llvm::AMDGPU::HSAMD::Kernel::Key::Args
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
Definition: AMDGPUMetadata.h:389
llvm::EVT::bitsGE
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:265
llvm::ISD::SIGN_EXTEND
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:726
llvm::MVT::v8i8
@ v8i8
Definition: MachineValueType.h:79
llvm::AMDGPUFrameLowering::getStackWidth
unsigned getStackWidth(const MachineFunction &MF) const
Definition: AMDGPUFrameLowering.cpp:22
MachineFunction.h
llvm::ISD::FTRUNC
@ FTRUNC
Definition: ISDOpcodes.h:884
llvm::MachineInstrBundleIterator< MachineInstr >
llvm::MVT::v2i16
@ v2i16
Definition: MachineValueType.h:89
llvm::AMDGPUMachineFunction
Definition: AMDGPUMachineFunction.h:18
llvm::ConstantFPSDNode::getValueAPF
const APFloat & getValueAPF() const
Definition: SelectionDAGNodes.h:1618
ReorganizeVector
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
Definition: R600ISelLowering.cpp:1641
llvm::ISD::INSERT_VECTOR_ELT
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:500
llvm::SelectionDAG::getExtLoad
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Definition: SelectionDAG.cpp:7505
llvm::ISD::ADDE
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
llvm::ISD::FP_ROUND
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:818
llvm::MVT::f32
@ f32
Definition: MachineValueType.h:55
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::AMDGPUAS::LOCAL_ADDRESS
@ LOCAL_ADDRESS
Address space for local memory.
Definition: AMDGPU.h:363
llvm::MachineMemOperand::getFlags
Flags getFlags() const
Return the raw flags of the source value,.
Definition: MachineMemOperand.h:220
llvm::AMDGPUAS::CONSTANT_ADDRESS
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
Definition: AMDGPU.h:362
llvm::ISD::ATOMIC_LOAD
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1118
llvm::MemSDNode::getPointerInfo
const MachinePointerInfo & getPointerInfo() const
Definition: SelectionDAGNodes.h:1344
llvm::AMDGPUISD::CONST_DATA_PTR
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
Definition: AMDGPUISelLowering.h:483
llvm::TargetLoweringBase::getPointerTy
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
Definition: TargetLowering.h:345
llvm::ISD::CTPOP
@ CTPOP
Definition: ISDOpcodes.h:669
llvm::AMDGPUISD::DUMMY_CHAIN
@ DUMMY_CHAIN
Definition: AMDGPUISelLowering.h:486
llvm::R600TargetLowering::R600TargetLowering
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
Definition: R600ISelLowering.cpp:29
llvm::SelectionDAG::getMergeValues
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
Definition: SelectionDAG.cpp:7254
llvm::EVT::getSimpleVT
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:289
llvm::MVT::v4i1
@ v4i1
Definition: MachineValueType.h:66
llvm::CallingConv::AMDGPU_LS
@ AMDGPU_LS
Calling convention used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:231
llvm::SelectionDAG::getCondCode
SDValue getCondCode(ISD::CondCode Cond)
Definition: SelectionDAG.cpp:1803
llvm::AMDGPUTargetLowering::FIRST_IMPLICIT
@ FIRST_IMPLICIT
Definition: AMDGPUISelLowering.h:322