LLVM 23.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
18#include "R600Defines.h"
20#include "R600Subtarget.h"
21#include "R600TargetMachine.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/IR/IntrinsicsR600.h"
26
27using namespace llvm;
28
29#include "R600GenCallingConv.inc"
30
32 const R600Subtarget &STI)
33 : AMDGPUTargetLowering(TM, STI, STI), Subtarget(&STI),
34 Gen(STI.getGeneration()) {
35 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
36 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
37 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
38 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
39 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
40 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
41
44
45 computeRegisterProperties(Subtarget->getRegisterInfo());
46
47 // Legalize loads and stores to the private address space.
48 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
49
50 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
51 // spaces, so it is custom lowered to handle those where it isn't.
53 for (MVT VT : MVT::integer_valuetypes()) {
54 setLoadExtAction(Op, VT, MVT::i1, Promote);
55 setLoadExtAction(Op, VT, MVT::i8, Custom);
56 setLoadExtAction(Op, VT, MVT::i16, Custom);
57 }
58
59 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
61 MVT::v2i1, Expand);
62
64 MVT::v4i1, Expand);
65
66 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
67 Custom);
68
69 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
70 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
71 // We need to include these since trunc STORES to PRIVATE need
72 // special handling to accommodate RMW
73 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
74 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
75 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
76 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
77 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
78 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
79 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
80 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
81 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
82 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
83
84 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
85 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
86 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
87
88 // Set condition code actions
92 MVT::f32, Expand);
93
95 MVT::i32, Expand);
96
98
99 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
100
101 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
103
105
107 {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
108 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
109 Expand);
110
112 MVT::f64, Custom);
113
114 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
115
116 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
117 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
118 Custom);
119
120 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
121 Expand);
122
123 // ADD, SUB overflow.
124 // TODO: turn these into Legal?
125 if (Subtarget->hasCARRY())
127
128 if (Subtarget->hasBORROW())
130
131 // Expand sign extension of vectors
132 if (!Subtarget->hasBFE())
134
135 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
136
137 if (!Subtarget->hasBFE())
139 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
140
141 if (!Subtarget->hasBFE())
143 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
144
146 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
147
149
151
153 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
154
156 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
157
158 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
159 // to be Legal/Custom in order to avoid library calls.
161 Custom);
162
163 if (!Subtarget->hasFMA())
164 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
165
166 // FIXME: May need no denormals check
168
169 if (!Subtarget->hasBFI())
170 // fcopysign can be done in a single instruction with BFI.
171 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
172
173 if (!Subtarget->hasBCNT(32))
175
176 if (!Subtarget->hasBCNT(64))
178
179 if (Subtarget->hasFFBH())
181
182 if (Subtarget->hasFFBL())
184
185 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
186 // need it for R600.
187 if (Subtarget->hasBFE())
189
192
193 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
194 for (MVT VT : ScalarIntVTs)
196 Expand);
197
198 // LLVM will expand these to atomic_cmp_swap(0)
199 // and atomic_swap, respectively.
201
202 // We need to custom lower some of the intrinsics
204 Custom);
205
207
210}
211
213 if (std::next(I) == I->getParent()->end())
214 return false;
215 return std::next(I)->getOpcode() == R600::RETURN;
216}
217
220 MachineBasicBlock *BB) const {
221 MachineFunction *MF = BB->getParent();
222 MachineRegisterInfo &MRI = MF->getRegInfo();
224 const R600InstrInfo *TII = Subtarget->getInstrInfo();
225
226 switch (MI.getOpcode()) {
227 default:
228 // Replace LDS_*_RET instruction that don't have any uses with the
229 // equivalent LDS_*_NORET instruction.
230 if (TII->isLDSRetInstr(MI.getOpcode())) {
231 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
232 assert(DstIdx != -1);
234 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
235 // LDS_1A2D support and remove this special case.
236 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
237 MI.getOpcode() == R600::LDS_CMPST_RET)
238 return BB;
239
240 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
241 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
242 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
243 NewMI.add(MO);
244 } else {
246 }
247 break;
248
249 case R600::FABS_R600: {
250 MachineInstr *NewMI = TII->buildDefaultInstruction(
251 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
252 MI.getOperand(1).getReg());
253 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
254 break;
255 }
256
257 case R600::FNEG_R600: {
258 MachineInstr *NewMI = TII->buildDefaultInstruction(
259 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
260 MI.getOperand(1).getReg());
261 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
262 break;
263 }
264
265 case R600::MASK_WRITE: {
266 Register maskedRegister = MI.getOperand(0).getReg();
267 assert(maskedRegister.isVirtual());
268 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
269 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
270 break;
271 }
272
273 case R600::MOV_IMM_F32:
274 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
275 .getFPImm()
276 ->getValueAPF()
277 .bitcastToAPInt()
278 .getZExtValue());
279 break;
280
281 case R600::MOV_IMM_I32:
282 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
283 MI.getOperand(1).getImm());
284 break;
285
286 case R600::MOV_IMM_GLOBAL_ADDR: {
287 //TODO: Perhaps combine this instruction with the next if possible
288 auto MIB = TII->buildDefaultInstruction(
289 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
290 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
291 //TODO: Ugh this is rather ugly
292 const MachineOperand &MO = MI.getOperand(1);
293 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
294 MO.getTargetFlags());
295 break;
296 }
297
298 case R600::CONST_COPY: {
299 MachineInstr *NewMI = TII->buildDefaultInstruction(
300 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
301 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
302 MI.getOperand(1).getImm());
303 break;
304 }
305
306 case R600::RAT_WRITE_CACHELESS_32_eg:
307 case R600::RAT_WRITE_CACHELESS_64_eg:
308 case R600::RAT_WRITE_CACHELESS_128_eg:
309 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
310 .add(MI.getOperand(0))
311 .add(MI.getOperand(1))
312 .addImm(isEOP(I)); // Set End of program bit
313 break;
314
315 case R600::RAT_STORE_TYPED_eg:
316 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
317 .add(MI.getOperand(0))
318 .add(MI.getOperand(1))
319 .add(MI.getOperand(2))
320 .addImm(isEOP(I)); // Set End of program bit
321 break;
322
323 case R600::BRANCH:
324 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
325 .add(MI.getOperand(0));
326 break;
327
328 case R600::BRANCH_COND_f32: {
329 MachineInstr *NewMI =
330 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
331 R600::PREDICATE_BIT)
332 .add(MI.getOperand(1))
333 .addImm(R600::PRED_SETNE)
334 .addImm(0); // Flags
335 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
337 .add(MI.getOperand(0))
338 .addReg(R600::PREDICATE_BIT, RegState::Kill);
339 break;
340 }
341
342 case R600::BRANCH_COND_i32: {
343 MachineInstr *NewMI =
344 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
345 R600::PREDICATE_BIT)
346 .add(MI.getOperand(1))
347 .addImm(R600::PRED_SETNE_INT)
348 .addImm(0); // Flags
349 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
350 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
351 .add(MI.getOperand(0))
352 .addReg(R600::PREDICATE_BIT, RegState::Kill);
353 break;
354 }
355
356 case R600::EG_ExportSwz:
357 case R600::R600_ExportSwz: {
358 // Instruction is left unmodified if its not the last one of its type
359 bool isLastInstructionOfItsType = true;
360 unsigned InstExportType = MI.getOperand(1).getImm();
361 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
362 EndBlock = BB->end(); NextExportInst != EndBlock;
363 NextExportInst = std::next(NextExportInst)) {
364 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
365 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
366 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
367 .getImm();
368 if (CurrentInstExportType == InstExportType) {
369 isLastInstructionOfItsType = false;
370 break;
371 }
372 }
373 }
374 bool EOP = isEOP(I);
375 if (!EOP && !isLastInstructionOfItsType)
376 return BB;
377 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
378 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
379 .add(MI.getOperand(0))
380 .add(MI.getOperand(1))
381 .add(MI.getOperand(2))
382 .add(MI.getOperand(3))
383 .add(MI.getOperand(4))
384 .add(MI.getOperand(5))
385 .add(MI.getOperand(6))
386 .addImm(CfInst)
387 .addImm(EOP);
388 break;
389 }
390 case R600::RETURN: {
391 return BB;
392 }
393 }
394
395 MI.eraseFromParent();
396 return BB;
397}
398
399//===----------------------------------------------------------------------===//
400// Custom DAG Lowering Operations
401//===----------------------------------------------------------------------===//
402
406 switch (Op.getOpcode()) {
407 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
408 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
409 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
410 case ISD::SHL_PARTS:
411 case ISD::SRA_PARTS:
412 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
413 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
414 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
415 case ISD::FCOS:
416 case ISD::FSIN: return LowerTrig(Op, DAG);
417 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
418 case ISD::STORE: return LowerSTORE(Op, DAG);
419 case ISD::LOAD: {
420 SDValue Result = LowerLOAD(Op, DAG);
421 assert((!Result.getNode() ||
422 Result.getNode()->getNumValues() == 2) &&
423 "Load should return a value and a chain");
424 return Result;
425 }
426
427 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
428 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
429 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
431 return lowerADDRSPACECAST(Op, DAG);
432 case ISD::INTRINSIC_VOID: {
433 SDValue Chain = Op.getOperand(0);
434 unsigned IntrinsicID = Op.getConstantOperandVal(1);
435 switch (IntrinsicID) {
436 case Intrinsic::r600_store_swizzle: {
437 SDLoc DL(Op);
438 const SDValue Args[8] = {
439 Chain,
440 Op.getOperand(2), // Export Value
441 Op.getOperand(3), // ArrayBase
442 Op.getOperand(4), // Type
443 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
444 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
445 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
446 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
447 };
448 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
449 }
450
451 // default for switch(IntrinsicID)
452 default: break;
453 }
454 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
455 break;
456 }
458 unsigned IntrinsicID = Op.getConstantOperandVal(0);
459 EVT VT = Op.getValueType();
460 SDLoc DL(Op);
461 switch (IntrinsicID) {
462 case Intrinsic::r600_tex:
463 case Intrinsic::r600_texc: {
464 unsigned TextureOp;
465 switch (IntrinsicID) {
466 case Intrinsic::r600_tex:
467 TextureOp = 0;
468 break;
469 case Intrinsic::r600_texc:
470 TextureOp = 1;
471 break;
472 default:
473 llvm_unreachable("unhandled texture operation");
474 }
475
476 SDValue TexArgs[19] = {
477 DAG.getConstant(TextureOp, DL, MVT::i32),
478 Op.getOperand(1),
479 DAG.getConstant(0, DL, MVT::i32),
480 DAG.getConstant(1, DL, MVT::i32),
481 DAG.getConstant(2, DL, MVT::i32),
482 DAG.getConstant(3, DL, MVT::i32),
483 Op.getOperand(2),
484 Op.getOperand(3),
485 Op.getOperand(4),
486 DAG.getConstant(0, DL, MVT::i32),
487 DAG.getConstant(1, DL, MVT::i32),
488 DAG.getConstant(2, DL, MVT::i32),
489 DAG.getConstant(3, DL, MVT::i32),
490 Op.getOperand(5),
491 Op.getOperand(6),
492 Op.getOperand(7),
493 Op.getOperand(8),
494 Op.getOperand(9),
495 Op.getOperand(10)
496 };
497 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
498 }
499 case Intrinsic::r600_dot4: {
500 SDValue Args[8] = {
501 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
502 DAG.getConstant(0, DL, MVT::i32)),
503 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
504 DAG.getConstant(0, DL, MVT::i32)),
505 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
506 DAG.getConstant(1, DL, MVT::i32)),
507 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
508 DAG.getConstant(1, DL, MVT::i32)),
509 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
510 DAG.getConstant(2, DL, MVT::i32)),
511 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
512 DAG.getConstant(2, DL, MVT::i32)),
513 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
514 DAG.getConstant(3, DL, MVT::i32)),
515 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
516 DAG.getConstant(3, DL, MVT::i32))
517 };
518 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
519 }
520
521 case Intrinsic::r600_implicitarg_ptr: {
524 return DAG.getConstant(ByteOffset, DL, PtrVT);
525 }
526 case Intrinsic::r600_read_ngroups_x:
527 return LowerImplicitParameter(DAG, VT, DL, 0);
528 case Intrinsic::r600_read_ngroups_y:
529 return LowerImplicitParameter(DAG, VT, DL, 1);
530 case Intrinsic::r600_read_ngroups_z:
531 return LowerImplicitParameter(DAG, VT, DL, 2);
532 case Intrinsic::r600_read_global_size_x:
533 return LowerImplicitParameter(DAG, VT, DL, 3);
534 case Intrinsic::r600_read_global_size_y:
535 return LowerImplicitParameter(DAG, VT, DL, 4);
536 case Intrinsic::r600_read_global_size_z:
537 return LowerImplicitParameter(DAG, VT, DL, 5);
538 case Intrinsic::r600_read_local_size_x:
539 return LowerImplicitParameter(DAG, VT, DL, 6);
540 case Intrinsic::r600_read_local_size_y:
541 return LowerImplicitParameter(DAG, VT, DL, 7);
542 case Intrinsic::r600_read_local_size_z:
543 return LowerImplicitParameter(DAG, VT, DL, 8);
544
545 case Intrinsic::r600_read_tgid_x:
546 case Intrinsic::amdgcn_workgroup_id_x:
547 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
548 R600::T1_X, VT);
549 case Intrinsic::r600_read_tgid_y:
550 case Intrinsic::amdgcn_workgroup_id_y:
551 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
552 R600::T1_Y, VT);
553 case Intrinsic::r600_read_tgid_z:
554 case Intrinsic::amdgcn_workgroup_id_z:
555 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
556 R600::T1_Z, VT);
557 case Intrinsic::r600_read_tidig_x:
558 case Intrinsic::amdgcn_workitem_id_x:
559 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
560 R600::T0_X, VT);
561 case Intrinsic::r600_read_tidig_y:
562 case Intrinsic::amdgcn_workitem_id_y:
563 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
564 R600::T0_Y, VT);
565 case Intrinsic::r600_read_tidig_z:
566 case Intrinsic::amdgcn_workitem_id_z:
567 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
568 R600::T0_Z, VT);
569
570 case Intrinsic::r600_recipsqrt_ieee:
571 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
572
573 case Intrinsic::r600_recipsqrt_clamped:
574 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
575 default:
576 return Op;
577 }
578
579 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
580 break;
581 }
582 } // end switch(Op.getOpcode())
583 return SDValue();
584}
585
588 SelectionDAG &DAG) const {
589 switch (N->getOpcode()) {
590 default:
592 return;
593 case ISD::FP_TO_UINT:
594 if (N->getValueType(0) == MVT::i1) {
595 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
596 return;
597 }
598 // Since we don't care about out of bounds values we can use FP_TO_SINT for
599 // uints too. The DAGLegalizer code for uint considers some extra cases
600 // which are not necessary here.
601 [[fallthrough]];
602 case ISD::FP_TO_SINT: {
603 if (N->getValueType(0) == MVT::i1) {
604 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
605 return;
606 }
607
608 SDValue Result;
609 if (expandFP_TO_SINT(N, Result, DAG))
610 Results.push_back(Result);
611 return;
612 }
613 case ISD::SDIVREM: {
614 SDValue Op = SDValue(N, 1);
615 SDValue RES = LowerSDIVREM(Op, DAG);
616 Results.push_back(RES);
617 Results.push_back(RES.getValue(1));
618 break;
619 }
620 case ISD::UDIVREM: {
621 SDValue Op = SDValue(N, 0);
623 break;
624 }
625 }
626}
627
628SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
629 SDValue Vector) const {
630 SDLoc DL(Vector);
631 EVT VecVT = Vector.getValueType();
632 EVT EltVT = VecVT.getVectorElementType();
634
635 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
636 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
637 DAG.getVectorIdxConstant(i, DL)));
638 }
639
640 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
641}
642
643SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
644 SelectionDAG &DAG) const {
645 SDLoc DL(Op);
646 SDValue Vector = Op.getOperand(0);
647 SDValue Index = Op.getOperand(1);
648
649 if (isa<ConstantSDNode>(Index) ||
651 return Op;
652
653 Vector = vectorToVerticalVector(DAG, Vector);
654 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
655 Vector, Index);
656}
657
658SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
659 SelectionDAG &DAG) const {
660 SDLoc DL(Op);
661 SDValue Vector = Op.getOperand(0);
662 SDValue Value = Op.getOperand(1);
663 SDValue Index = Op.getOperand(2);
664
665 if (isa<ConstantSDNode>(Index) ||
667 return Op;
668
669 Vector = vectorToVerticalVector(DAG, Vector);
670 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
671 Vector, Value, Index);
672 return vectorToVerticalVector(DAG, Insert);
673}
674
675SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI,
676 SDValue Op,
677 SelectionDAG &DAG) const {
678 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
681
682 const DataLayout &DL = DAG.getDataLayout();
683 const GlobalValue *GV = GSD->getGlobal();
684 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
685
686 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
687 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
688}
689
690SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
691 // On hw >= R700, COS/SIN input must be between -1. and 1.
692 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
693 EVT VT = Op.getValueType();
694 SDValue Arg = Op.getOperand(0);
695 SDLoc DL(Op);
696
697 // TODO: Should this propagate fast-math-flags?
698 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
699 DAG.getNode(ISD::FADD, DL, VT,
700 DAG.getNode(ISD::FMUL, DL, VT, Arg,
701 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
702 DAG.getConstantFP(0.5, DL, MVT::f32)));
703 unsigned TrigNode;
704 switch (Op.getOpcode()) {
705 case ISD::FCOS:
706 TrigNode = AMDGPUISD::COS_HW;
707 break;
708 case ISD::FSIN:
709 TrigNode = AMDGPUISD::SIN_HW;
710 break;
711 default:
712 llvm_unreachable("Wrong trig opcode");
713 }
714 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
715 DAG.getNode(ISD::FADD, DL, VT, FractPart,
716 DAG.getConstantFP(-0.5, DL, MVT::f32)));
717 if (Gen >= AMDGPUSubtarget::R700)
718 return TrigVal;
719 // On R600 hw, COS/SIN input must be between -Pi and Pi.
720 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
721 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
722}
723
724SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
725 SelectionDAG &DAG) const {
726 SDValue Lo, Hi;
727 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
728 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
729}
730
731SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
732 unsigned mainop, unsigned ovf) const {
733 SDLoc DL(Op);
734 EVT VT = Op.getValueType();
735
736 SDValue Lo = Op.getOperand(0);
737 SDValue Hi = Op.getOperand(1);
738
739 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
740 // Extend sign.
741 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
742 DAG.getValueType(MVT::i1));
743
744 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
745
746 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
747}
748
749SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
750 SDLoc DL(Op);
751 return DAG.getNode(
753 DL,
754 MVT::i1,
755 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
757}
758
759SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
760 SDLoc DL(Op);
761 return DAG.getNode(
763 DL,
764 MVT::i1,
765 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
767}
768
769SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
770 const SDLoc &DL,
771 unsigned DwordOffset) const {
772 unsigned ByteOffset = DwordOffset * 4;
773 PointerType *PtrType =
775
776 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
777 assert(isInt<16>(ByteOffset));
778
779 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
780 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
781 MachinePointerInfo(ConstantPointerNull::get(PtrType)));
782}
783
784bool R600TargetLowering::isZero(SDValue Op) const {
785 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))
786 return Cst->isZero();
787 if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))
788 return CstFP->isZero();
789 return false;
790}
791
792bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
793 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
794 return CFP->isExactlyValue(1.0);
795 }
796 return isAllOnesConstant(Op);
797}
798
799bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
800 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
801 return CFP->getValueAPF().isZero();
802 }
803 return isNullConstant(Op);
804}
805
806SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
807 SDLoc DL(Op);
808 EVT VT = Op.getValueType();
809
810 SDValue LHS = Op.getOperand(0);
811 SDValue RHS = Op.getOperand(1);
812 SDValue True = Op.getOperand(2);
813 SDValue False = Op.getOperand(3);
814 SDValue CC = Op.getOperand(4);
815 SDValue Temp;
816
817 if (VT == MVT::f32) {
818 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
819 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
820 if (MinMax)
821 return MinMax;
822 }
823
824 // LHS and RHS are guaranteed to be the same value type
825 EVT CompareVT = LHS.getValueType();
826
827 // Check if we can lower this to a native operation.
828
829 // Try to lower to a SET* instruction:
830 //
831 // SET* can match the following patterns:
832 //
833 // select_cc f32, f32, -1, 0, cc_supported
834 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
835 // select_cc i32, i32, -1, 0, cc_supported
836 //
837
838 // Move hardware True/False values to the correct operand.
839 if (isHWTrueValue(False) && isHWFalseValue(True)) {
840 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
841 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
842 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
843 std::swap(False, True);
844 CC = DAG.getCondCode(InverseCC);
845 } else {
846 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
847 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
848 std::swap(False, True);
849 std::swap(LHS, RHS);
850 CC = DAG.getCondCode(SwapInvCC);
851 }
852 }
853 }
854
855 if (isHWTrueValue(True) && isHWFalseValue(False) &&
856 (CompareVT == VT || VT == MVT::i32)) {
857 // This can be matched by a SET* instruction.
858 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
859 }
860
861 // Try to lower to a CND* instruction:
862 //
863 // CND* can match the following patterns:
864 //
865 // select_cc f32, 0.0, f32, f32, cc_supported
866 // select_cc f32, 0.0, i32, i32, cc_supported
867 // select_cc i32, 0, f32, f32, cc_supported
868 // select_cc i32, 0, i32, i32, cc_supported
869 //
870
871 // Try to move the zero value to the RHS
872 if (isZero(LHS)) {
873 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
874 // Try swapping the operands
875 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
876 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
877 std::swap(LHS, RHS);
878 CC = DAG.getCondCode(CCSwapped);
879 } else {
880 // Try inverting the condition and then swapping the operands
881 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
882 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
883 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
884 std::swap(True, False);
885 std::swap(LHS, RHS);
886 CC = DAG.getCondCode(CCSwapped);
887 }
888 }
889 }
890 if (isZero(RHS)) {
891 SDValue Cond = LHS;
892 SDValue Zero = RHS;
893 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
894 if (CompareVT != VT) {
895 // Bitcast True / False to the correct types. This will end up being
896 // a nop, but it allows us to define only a single pattern in the
897 // .TD files for each CND* instruction rather than having to have
898 // one pattern for integer True/False and one for fp True/False
899 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
900 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
901 }
902
903 switch (CCOpcode) {
904 case ISD::SETONE:
905 case ISD::SETUNE:
906 case ISD::SETNE:
907 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
908 Temp = True;
909 True = False;
910 False = Temp;
911 break;
912 default:
913 break;
914 }
915 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
916 Cond, Zero,
917 True, False,
918 DAG.getCondCode(CCOpcode));
919 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
920 }
921
922 // If we make it this for it means we have no native instructions to handle
923 // this SELECT_CC, so we must lower it.
924 SDValue HWTrue, HWFalse;
925
926 if (CompareVT == MVT::f32) {
927 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
928 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
929 } else if (CompareVT == MVT::i32) {
930 HWTrue = DAG.getAllOnesConstant(DL, CompareVT);
931 HWFalse = DAG.getConstant(0, DL, CompareVT);
932 }
933 else {
934 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
935 }
936
937 // Lower this unsupported SELECT_CC into a combination of two supported
938 // SELECT_CC operations.
939 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
940
941 return DAG.getNode(ISD::SELECT_CC, DL, VT,
942 Cond, HWFalse,
943 True, False,
945}
946
947SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
948 SelectionDAG &DAG) const {
949 SDLoc SL(Op);
950 EVT VT = Op.getValueType();
951
952 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
953 unsigned SrcAS = ASC->getSrcAddressSpace();
954 unsigned DestAS = ASC->getDestAddressSpace();
955
956 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
957 return DAG.getSignedConstant(AMDGPU::getNullPointerValue(DestAS), SL, VT);
958
959 return Op;
960}
961
962/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
963/// convert these pointers to a register index. Each register holds
964/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
965/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
966/// for indirect addressing.
967SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
968 unsigned StackWidth,
969 SelectionDAG &DAG) const {
970 unsigned SRLPad;
971 switch(StackWidth) {
972 case 1:
973 SRLPad = 2;
974 break;
975 case 2:
976 SRLPad = 3;
977 break;
978 case 4:
979 SRLPad = 4;
980 break;
981 default: llvm_unreachable("Invalid stack width");
982 }
983
984 SDLoc DL(Ptr);
985 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
986 DAG.getConstant(SRLPad, DL, MVT::i32));
987}
988
989void R600TargetLowering::getStackAddress(unsigned StackWidth,
990 unsigned ElemIdx,
991 unsigned &Channel,
992 unsigned &PtrIncr) const {
993 switch (StackWidth) {
994 default:
995 case 1:
996 Channel = 0;
997 if (ElemIdx > 0) {
998 PtrIncr = 1;
999 } else {
1000 PtrIncr = 0;
1001 }
1002 break;
1003 case 2:
1004 Channel = ElemIdx % 2;
1005 if (ElemIdx == 2) {
1006 PtrIncr = 1;
1007 } else {
1008 PtrIncr = 0;
1009 }
1010 break;
1011 case 4:
1012 Channel = ElemIdx;
1013 PtrIncr = 0;
1014 break;
1015 }
1016}
1017
1018SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1019 SelectionDAG &DAG) const {
1020 SDLoc DL(Store);
1021 //TODO: Who creates the i8 stores?
1022 assert(Store->isTruncatingStore()
1023 || Store->getValue().getValueType() == MVT::i8);
1024 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1025
1026 SDValue Mask;
1027 if (Store->getMemoryVT() == MVT::i8) {
1028 assert(Store->getAlign() >= 1);
1029 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1030 } else if (Store->getMemoryVT() == MVT::i16) {
1031 assert(Store->getAlign() >= 2);
1032 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1033 } else {
1034 llvm_unreachable("Unsupported private trunc store");
1035 }
1036
1037 SDValue OldChain = Store->getChain();
1038 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1039 // Skip dummy
1040 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1041 SDValue BasePtr = Store->getBasePtr();
1042 SDValue Offset = Store->getOffset();
1043 EVT MemVT = Store->getMemoryVT();
1044
1045 SDValue LoadPtr = BasePtr;
1046 if (!Offset.isUndef()) {
1047 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1048 }
1049
1050 // Get dword location
1051 // TODO: this should be eliminated by the future SHR ptr, 2
1052 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1053 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1054
1055 // Load dword
1056 // TODO: can we be smarter about machine pointer info?
1057 MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1058 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1059
1060 Chain = Dst.getValue(1);
1061
1062 // Get offset in dword
1063 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1064 DAG.getConstant(0x3, DL, MVT::i32));
1065
1066 // Convert byte offset to bit shift
1067 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1068 DAG.getConstant(3, DL, MVT::i32));
1069
1070 // TODO: Contrary to the name of the function,
1071 // it also handles sub i32 non-truncating stores (like i1)
1072 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1073 Store->getValue());
1074
1075 // Mask the value to the right type
1076 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1077
1078 // Shift the value in place
1079 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1080 MaskedValue, ShiftAmt);
1081
1082 // Shift the mask in place
1083 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1084
1085 // Invert the mask. NOTE: if we had native ROL instructions we could
1086 // use inverted mask
1087 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1088
1089 // Cleanup the target bits
1090 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1091
1092 // Add the new bits
1093 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1094
1095 // Store dword
1096 // TODO: Can we be smarter about MachinePointerInfo?
1097 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1098
1099 // If we are part of expanded vector, make our neighbors depend on this store
1100 if (VectorTrunc) {
1101 // Make all other vector elements depend on this store
1102 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1103 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1104 }
1105 return NewStore;
1106}
1107
1108SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1109 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1110 unsigned AS = StoreNode->getAddressSpace();
1111
1112 SDValue Chain = StoreNode->getChain();
1113 SDValue Ptr = StoreNode->getBasePtr();
1114 SDValue Value = StoreNode->getValue();
1115
1116 EVT VT = Value.getValueType();
1117 EVT MemVT = StoreNode->getMemoryVT();
1118 EVT PtrVT = Ptr.getValueType();
1119
1120 SDLoc DL(Op);
1121
1122 const bool TruncatingStore = StoreNode->isTruncatingStore();
1123
1124 // Neither LOCAL nor PRIVATE can do vectors at the moment
1126 TruncatingStore) &&
1127 VT.isVector()) {
1128 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1129 // Add an extra level of chain to isolate this vector
1130 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1131 SmallVector<SDValue, 4> NewOps(StoreNode->ops());
1132 NewOps[0] = NewChain;
1133 StoreNode = cast<StoreSDNode>(DAG.UpdateNodeOperands(StoreNode, NewOps));
1134 }
1135
1136 return scalarizeVectorStore(StoreNode, DAG);
1137 }
1138
1139 Align Alignment = StoreNode->getAlign();
1140 if (Alignment < MemVT.getStoreSize() &&
1141 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1142 StoreNode->getMemOperand()->getFlags(),
1143 nullptr)) {
1144 return expandUnalignedStore(StoreNode, DAG);
1145 }
1146
1147 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1148 DAG.getConstant(2, DL, PtrVT));
1149
1150 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1151 // It is beneficial to create MSKOR here instead of combiner to avoid
1152 // artificial dependencies introduced by RMW
1153 if (TruncatingStore) {
1154 assert(VT.bitsLE(MVT::i32));
1155 SDValue MaskConstant;
1156 if (MemVT == MVT::i8) {
1157 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1158 } else {
1159 assert(MemVT == MVT::i16);
1160 assert(StoreNode->getAlign() >= 2);
1161 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1162 }
1163
1164 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1165 DAG.getConstant(0x00000003, DL, PtrVT));
1166 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1167 DAG.getConstant(3, DL, VT));
1168
1169 // Put the mask in correct place
1170 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1171
1172 // Put the value bits in correct place
1173 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1174 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1175
1176 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1177 // vector instead.
1178 SDValue Src[4] = {
1179 ShiftedValue,
1180 DAG.getConstant(0, DL, MVT::i32),
1181 DAG.getConstant(0, DL, MVT::i32),
1182 Mask
1183 };
1184 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1185 SDValue Args[3] = { Chain, Input, DWordAddr };
1186 return DAG.getMemIntrinsicNode(AMDGPUISD::STORE_MSKOR, DL,
1187 Op->getVTList(), Args, MemVT,
1188 StoreNode->getMemOperand());
1189 }
1190 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1191 // Convert pointer from byte address to dword address.
1192 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1193
1194 if (StoreNode->isIndexed()) {
1195 llvm_unreachable("Indexed stores not supported yet");
1196 } else {
1197 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1198 }
1199 return Chain;
1200 }
1201 }
1202
1203 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1204 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1205 return SDValue();
1206
1207 if (MemVT.bitsLT(MVT::i32))
1208 return lowerPrivateTruncStore(StoreNode, DAG);
1209
1210 // Standard i32+ store, tag it with DWORDADDR to note that the address
1211 // has been shifted
1212 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1213 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1214 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1215 }
1216
1217 // Tagged i32+ stores will be matched by patterns
1218 return SDValue();
1219}
1220
1221// return (512 + (kc_bank << 12)
1222static int
1224 switch (AddressSpace) {
1226 return 512;
1228 return 512 + 4096;
1230 return 512 + 4096 * 2;
1232 return 512 + 4096 * 3;
1234 return 512 + 4096 * 4;
1236 return 512 + 4096 * 5;
1238 return 512 + 4096 * 6;
1240 return 512 + 4096 * 7;
1242 return 512 + 4096 * 8;
1244 return 512 + 4096 * 9;
1246 return 512 + 4096 * 10;
1248 return 512 + 4096 * 11;
1250 return 512 + 4096 * 12;
1252 return 512 + 4096 * 13;
1254 return 512 + 4096 * 14;
1256 return 512 + 4096 * 15;
1257 default:
1258 return -1;
1259 }
1260}
1261
1262SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1263 SelectionDAG &DAG) const {
1264 SDLoc DL(Op);
1265 LoadSDNode *Load = cast<LoadSDNode>(Op);
1266 ISD::LoadExtType ExtType = Load->getExtensionType();
1267 EVT MemVT = Load->getMemoryVT();
1268 assert(Load->getAlign() >= MemVT.getStoreSize());
1269
1270 SDValue BasePtr = Load->getBasePtr();
1271 SDValue Chain = Load->getChain();
1272 SDValue Offset = Load->getOffset();
1273
1274 SDValue LoadPtr = BasePtr;
1275 if (!Offset.isUndef()) {
1276 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1277 }
1278
1279 // Get dword location
1280 // NOTE: this should be eliminated by the future SHR ptr, 2
1281 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1282 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1283
1284 // Load dword
1285 // TODO: can we be smarter about machine pointer info?
1286 MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1287 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1288
1289 // Get offset within the register.
1290 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1291 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1292
1293 // Bit offset of target byte (byteIdx * 8).
1294 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1295 DAG.getConstant(3, DL, MVT::i32));
1296
1297 // Shift to the right.
1298 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1299
1300 // Eliminate the upper bits by setting them to ...
1301 EVT MemEltVT = MemVT.getScalarType();
1302
1303 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1304 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1305 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1306 } else { // ... or zeros.
1307 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1308 }
1309
1310 SDValue Ops[] = {
1311 Ret,
1312 Read.getValue(1) // This should be our output chain
1313 };
1314
1315 return DAG.getMergeValues(Ops, DL);
1316}
1317
1318SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1319 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1320 unsigned AS = LoadNode->getAddressSpace();
1321 EVT MemVT = LoadNode->getMemoryVT();
1322 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1323
1324 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1325 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1326 return lowerPrivateExtLoad(Op, DAG);
1327 }
1328
1329 SDLoc DL(Op);
1330 EVT VT = Op.getValueType();
1331 SDValue Chain = LoadNode->getChain();
1332 SDValue Ptr = LoadNode->getBasePtr();
1333
1334 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1336 VT.isVector()) {
1337 SDValue Ops[2];
1338 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1339 return DAG.getMergeValues(Ops, DL);
1340 }
1341
1342 // This is still used for explicit load from addrspace(8)
1343 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1344 if (ConstantBlock > -1 &&
1345 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1346 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1348 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1349 isa<ConstantSDNode>(Ptr)) {
1350 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1351 }
1352 // TODO: Does this even work?
1353 // non-constant ptr can't be folded, keeps it as a v4f32 load
1354 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1355 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1356 DAG.getConstant(4, DL, MVT::i32)),
1357 DAG.getConstant(LoadNode->getAddressSpace() -
1359 DL, MVT::i32));
1360
1361 if (!VT.isVector()) {
1362 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1363 DAG.getConstant(0, DL, MVT::i32));
1364 }
1365
1366 SDValue MergedValues[2] = {
1367 Result,
1368 Chain
1369 };
1370 return DAG.getMergeValues(MergedValues, DL);
1371 }
1372
1373 // For most operations returning SDValue() will result in the node being
1374 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1375 // need to manually expand loads that may be legal in some address spaces and
1376 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1377 // compute shaders, since the data is sign extended when it is uploaded to the
1378 // buffer. However SEXT loads from other address spaces are not supported, so
1379 // we need to expand them here.
1380 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1381 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1382 SDValue NewLoad = DAG.getExtLoad(
1383 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1384 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1385 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1386 DAG.getValueType(MemVT));
1387
1388 SDValue MergedValues[2] = { Res, Chain };
1389 return DAG.getMergeValues(MergedValues, DL);
1390 }
1391
1392 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1393 return SDValue();
1394 }
1395
1396 // DWORDADDR ISD marks already shifted address
1397 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1398 assert(VT == MVT::i32);
1399 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1400 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1401 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1402 }
1403 return SDValue();
1404}
1405
1406SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1407 SDValue Chain = Op.getOperand(0);
1408 SDValue Cond = Op.getOperand(1);
1409 SDValue Jump = Op.getOperand(2);
1410
1411 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1412 Chain, Jump, Cond);
1413}
1414
1415SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1416 SelectionDAG &DAG) const {
1417 MachineFunction &MF = DAG.getMachineFunction();
1418 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1419
1420 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1421
1422 unsigned FrameIndex = FIN->getIndex();
1423 Register IgnoredFrameReg;
1424 StackOffset Offset =
1425 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1426 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1427 SDLoc(Op), Op.getValueType());
1428}
1429
1431 bool IsVarArg) const {
1432 switch (CC) {
1435 case CallingConv::C:
1436 case CallingConv::Fast:
1437 case CallingConv::Cold:
1438 llvm_unreachable("kernels should not be handled here");
1446 return CC_R600;
1447 default:
1448 reportFatalUsageError("unsupported calling convention");
1449 }
1450}
1451
1452/// XXX Only kernel functions are supported, so we can assume for now that
1453/// every function is a kernel function, but in the future we should use
1454/// separate calling conventions for kernel and non-kernel functions.
1456 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1457 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1458 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1460 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1461 *DAG.getContext());
1463
1464 if (AMDGPU::isShader(CallConv)) {
1465 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1466 } else {
1467 analyzeFormalArgumentsCompute(CCInfo, Ins);
1468 }
1469
1470 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1471 CCValAssign &VA = ArgLocs[i];
1472 const ISD::InputArg &In = Ins[i];
1473 EVT VT = In.VT;
1474 EVT MemVT = VA.getLocVT();
1475 if (!VT.isVector() && MemVT.isVector()) {
1476 // Get load source type if scalarized.
1477 MemVT = MemVT.getVectorElementType();
1478 }
1479
1480 if (VT.isInteger() && !MemVT.isInteger())
1481 MemVT = MemVT.changeTypeToInteger();
1482
1483 if (AMDGPU::isShader(CallConv)) {
1484 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1485 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1486 InVals.push_back(Register);
1487 continue;
1488 }
1489
1490 // i64 isn't a legal type, so the register type used ends up as i32, which
1491 // isn't expected here. It attempts to create this sextload, but it ends up
1492 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1493 // for <1 x i64>.
1494
1495 // The first 36 bytes of the input buffer contains information about
1496 // thread group and global sizes.
1498 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1499 if (VT.isFloatingPoint()) {
1500 Ext = ISD::EXTLOAD;
1501 } else {
1502 // FIXME: This should really check the extload type, but the handling of
1503 // extload vector parameters seems to be broken.
1504
1505 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1506 Ext = ISD::SEXTLOAD;
1507 }
1508 }
1509
1510 // Compute the offset from the value.
1511 // XXX - I think PartOffset should give you this, but it seems to give the
1512 // size of the register which isn't useful.
1513
1514 unsigned PartOffset = VA.getLocMemOffset();
1515 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1516
1518 SDValue Arg = DAG.getLoad(
1519 ISD::UNINDEXED, Ext, VT, DL, Chain,
1520 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1521 PtrInfo,
1522 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1525
1526 InVals.push_back(Arg);
1527 }
1528 return Chain;
1529}
1530
1532 EVT VT) const {
1533 if (!VT.isVector())
1534 return MVT::i32;
1536}
1537
1539 const MachineFunction &MF) const {
1540 // Local and Private addresses do not handle vectors. Limit to i32
1542 return (MemVT.getSizeInBits() <= 32);
1543 }
1544 return true;
1545}
1546
1548 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1549 unsigned *IsFast) const {
1550 if (IsFast)
1551 *IsFast = 0;
1552
1553 if (!VT.isSimple() || VT == MVT::Other)
1554 return false;
1555
1556 if (VT.bitsLT(MVT::i32))
1557 return false;
1558
1559 // TODO: This is a rough estimate.
1560 if (IsFast)
1561 *IsFast = 1;
1562
1563 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1564}
1565
1567 SelectionDAG &DAG, SDValue VectorEntry,
1568 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1569 assert(RemapSwizzle.empty());
1570
1571 SDLoc DL(VectorEntry);
1572 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1573
1574 SDValue NewBldVec[4];
1575 for (unsigned i = 0; i < 4; i++)
1576 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1577 DAG.getIntPtrConstant(i, DL));
1578
1579 for (unsigned i = 0; i < 4; i++) {
1580 if (NewBldVec[i].isUndef())
1581 // We mask write here to teach later passes that the ith element of this
1582 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1583 // break false dependencies and additionally make assembly easier to read.
1584 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1585 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1586 if (C->isZero()) {
1587 RemapSwizzle[i] = 4; // SEL_0
1588 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1589 } else if (C->isExactlyValue(1.0)) {
1590 RemapSwizzle[i] = 5; // SEL_1
1591 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1592 }
1593 }
1594
1595 if (NewBldVec[i].isUndef())
1596 continue;
1597
1598 for (unsigned j = 0; j < i; j++) {
1599 if (NewBldVec[i] == NewBldVec[j]) {
1600 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1601 RemapSwizzle[i] = j;
1602 break;
1603 }
1604 }
1605 }
1606
1607 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1608 NewBldVec);
1609}
1610
1612 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1613 assert(RemapSwizzle.empty());
1614
1615 SDLoc DL(VectorEntry);
1616 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1617
1618 SDValue NewBldVec[4];
1619 bool isUnmovable[4] = {false, false, false, false};
1620 for (unsigned i = 0; i < 4; i++)
1621 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1622 DAG.getIntPtrConstant(i, DL));
1623
1624 for (unsigned i = 0; i < 4; i++) {
1625 RemapSwizzle[i] = i;
1626 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1627 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1628 if (i == Idx)
1629 isUnmovable[Idx] = true;
1630 }
1631 }
1632
1633 for (unsigned i = 0; i < 4; i++) {
1634 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1635 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1636 if (isUnmovable[Idx])
1637 continue;
1638 // Swap i and Idx
1639 std::swap(NewBldVec[Idx], NewBldVec[i]);
1640 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1641 break;
1642 }
1643 }
1644
1645 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1646 NewBldVec);
1647}
1648
1649SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1650 SelectionDAG &DAG,
1651 const SDLoc &DL) const {
1652 // Old -> New swizzle values
1653 DenseMap<unsigned, unsigned> SwizzleRemap;
1654
1655 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1656 for (unsigned i = 0; i < 4; i++) {
1657 unsigned Idx = Swz[i]->getAsZExtVal();
1658 auto It = SwizzleRemap.find(Idx);
1659 if (It != SwizzleRemap.end())
1660 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1661 }
1662
1663 SwizzleRemap.clear();
1664 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1665 for (unsigned i = 0; i < 4; i++) {
1666 unsigned Idx = Swz[i]->getAsZExtVal();
1667 auto It = SwizzleRemap.find(Idx);
1668 if (It != SwizzleRemap.end())
1669 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1670 }
1671
1672 return BuildVector;
1673}
1674
1675SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1676 SelectionDAG &DAG) const {
1677 SDLoc DL(LoadNode);
1678 EVT VT = LoadNode->getValueType(0);
1679 SDValue Chain = LoadNode->getChain();
1680 SDValue Ptr = LoadNode->getBasePtr();
1682
1683 //TODO: Support smaller loads
1684 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1685 return SDValue();
1686
1687 if (LoadNode->getAlign() < Align(4))
1688 return SDValue();
1689
1690 int ConstantBlock = ConstantAddressBlock(Block);
1691
1692 SDValue Slots[4];
1693 for (unsigned i = 0; i < 4; i++) {
1694 // We want Const position encoded with the following formula :
1695 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1696 // const_index is Ptr computed by llvm using an alignment of 16.
1697 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1698 // then div by 4 at the ISel step
1699 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1700 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1701 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1702 }
1703 EVT NewVT = MVT::v4i32;
1704 unsigned NumElements = 4;
1705 if (VT.isVector()) {
1706 NewVT = VT;
1707 NumElements = VT.getVectorNumElements();
1708 }
1709 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1710 if (!VT.isVector()) {
1711 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1712 DAG.getConstant(0, DL, MVT::i32));
1713 }
1714 SDValue MergedValues[2] = {
1715 Result,
1716 Chain
1717 };
1718 return DAG.getMergeValues(MergedValues, DL);
1719}
1720
1721//===----------------------------------------------------------------------===//
1722// Custom DAG Optimizations
1723//===----------------------------------------------------------------------===//
1724
1726 DAGCombinerInfo &DCI) const {
1727 SelectionDAG &DAG = DCI.DAG;
1728 SDLoc DL(N);
1729
1730 switch (N->getOpcode()) {
1731 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1732 case ISD::FP_ROUND: {
1733 SDValue Arg = N->getOperand(0);
1734 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1735 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1736 Arg.getOperand(0));
1737 }
1738 break;
1739 }
1740
1741 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1742 // (i32 select_cc f32, f32, -1, 0 cc)
1743 //
1744 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1745 // this to one of the SET*_DX10 instructions.
1746 case ISD::FP_TO_SINT: {
1747 SDValue FNeg = N->getOperand(0);
1748 if (FNeg.getOpcode() != ISD::FNEG) {
1749 return SDValue();
1750 }
1751 SDValue SelectCC = FNeg.getOperand(0);
1752 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1753 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1754 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1755 !isHWTrueValue(SelectCC.getOperand(2)) ||
1756 !isHWFalseValue(SelectCC.getOperand(3))) {
1757 return SDValue();
1758 }
1759
1760 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1761 SelectCC.getOperand(0), // LHS
1762 SelectCC.getOperand(1), // RHS
1763 DAG.getAllOnesConstant(DL, MVT::i32), // True
1764 DAG.getConstant(0, DL, MVT::i32), // False
1765 SelectCC.getOperand(4)); // CC
1766 }
1767
1768 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1769 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1771 SDValue InVec = N->getOperand(0);
1772 SDValue InVal = N->getOperand(1);
1773 SDValue EltNo = N->getOperand(2);
1774
1775 // If the inserted element is an UNDEF, just use the input vector.
1776 if (InVal.isUndef())
1777 return InVec;
1778
1779 EVT VT = InVec.getValueType();
1780
1781 // If we can't generate a legal BUILD_VECTOR, exit
1783 return SDValue();
1784
1785 // Check that we know which element is being inserted
1786 if (!isa<ConstantSDNode>(EltNo))
1787 return SDValue();
1788 unsigned Elt = EltNo->getAsZExtVal();
1789
1790 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1791 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1792 // vector elements.
1794 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1795 Ops.append(InVec.getNode()->op_begin(),
1796 InVec.getNode()->op_end());
1797 } else if (InVec.isUndef()) {
1798 unsigned NElts = VT.getVectorNumElements();
1799 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1800 } else {
1801 return SDValue();
1802 }
1803
1804 // Insert the element
1805 if (Elt < Ops.size()) {
1806 // All the operands of BUILD_VECTOR must have the same type;
1807 // we enforce that here.
1808 EVT OpVT = Ops[0].getValueType();
1809 if (InVal.getValueType() != OpVT)
1810 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1811 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1812 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1813 Ops[Elt] = InVal;
1814 }
1815
1816 // Return the new vector
1817 return DAG.getBuildVector(VT, DL, Ops);
1818 }
1819
1820 // Extract_vec (Build_vector) generated by custom lowering
1821 // also needs to be customly combined
1823 SDValue Arg = N->getOperand(0);
1824 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1825 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1826 unsigned Element = Const->getZExtValue();
1827 return Arg->getOperand(Element);
1828 }
1829 }
1830 if (Arg.getOpcode() == ISD::BITCAST &&
1834 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1835 unsigned Element = Const->getZExtValue();
1836 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1837 Arg->getOperand(0).getOperand(Element));
1838 }
1839 }
1840 break;
1841 }
1842
1843 case ISD::SELECT_CC: {
1844 // Try common optimizations
1846 return Ret;
1847
1848 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1849 // selectcc x, y, a, b, inv(cc)
1850 //
1851 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1852 // selectcc x, y, a, b, cc
1853 SDValue LHS = N->getOperand(0);
1854 if (LHS.getOpcode() != ISD::SELECT_CC) {
1855 return SDValue();
1856 }
1857
1858 SDValue RHS = N->getOperand(1);
1859 SDValue True = N->getOperand(2);
1860 SDValue False = N->getOperand(3);
1861 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1862
1863 if (LHS.getOperand(2).getNode() != True.getNode() ||
1864 LHS.getOperand(3).getNode() != False.getNode() ||
1865 RHS.getNode() != False.getNode()) {
1866 return SDValue();
1867 }
1868
1869 switch (NCC) {
1870 default: return SDValue();
1871 case ISD::SETNE: return LHS;
1872 case ISD::SETEQ: {
1873 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1874 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1875 if (DCI.isBeforeLegalizeOps() ||
1876 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1877 return DAG.getSelectCC(DL,
1878 LHS.getOperand(0),
1879 LHS.getOperand(1),
1880 LHS.getOperand(2),
1881 LHS.getOperand(3),
1882 LHSCC);
1883 break;
1884 }
1885 }
1886 return SDValue();
1887 }
1888
1890 SDValue Arg = N->getOperand(1);
1891 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1892 break;
1893
1894 SDValue NewArgs[8] = {
1895 N->getOperand(0), // Chain
1896 SDValue(),
1897 N->getOperand(2), // ArrayBase
1898 N->getOperand(3), // Type
1899 N->getOperand(4), // SWZ_X
1900 N->getOperand(5), // SWZ_Y
1901 N->getOperand(6), // SWZ_Z
1902 N->getOperand(7) // SWZ_W
1903 };
1904 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1905 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1906 }
1908 SDValue Arg = N->getOperand(1);
1909 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1910 break;
1911
1912 SDValue NewArgs[19] = {
1913 N->getOperand(0),
1914 N->getOperand(1),
1915 N->getOperand(2),
1916 N->getOperand(3),
1917 N->getOperand(4),
1918 N->getOperand(5),
1919 N->getOperand(6),
1920 N->getOperand(7),
1921 N->getOperand(8),
1922 N->getOperand(9),
1923 N->getOperand(10),
1924 N->getOperand(11),
1925 N->getOperand(12),
1926 N->getOperand(13),
1927 N->getOperand(14),
1928 N->getOperand(15),
1929 N->getOperand(16),
1930 N->getOperand(17),
1931 N->getOperand(18),
1932 };
1933 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1934 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1935 }
1936
1937 case ISD::LOAD: {
1938 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1939 SDValue Ptr = LoadNode->getBasePtr();
1940 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1942 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1943 break;
1944 }
1945
1946 default: break;
1947 }
1948
1950}
1951
1952bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1953 SDValue &Src, SDValue &Neg, SDValue &Abs,
1954 SDValue &Sel, SDValue &Imm,
1955 SelectionDAG &DAG) const {
1956 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1957 if (!Src.isMachineOpcode())
1958 return false;
1959
1960 switch (Src.getMachineOpcode()) {
1961 case R600::FNEG_R600:
1962 if (!Neg.getNode())
1963 return false;
1964 Src = Src.getOperand(0);
1965 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1966 return true;
1967 case R600::FABS_R600:
1968 if (!Abs.getNode())
1969 return false;
1970 Src = Src.getOperand(0);
1971 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1972 return true;
1973 case R600::CONST_COPY: {
1974 unsigned Opcode = ParentNode->getMachineOpcode();
1975 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1976
1977 if (!Sel.getNode())
1978 return false;
1979
1980 SDValue CstOffset = Src.getOperand(0);
1981 if (ParentNode->getValueType(0).isVector())
1982 return false;
1983
1984 // Gather constants values
1985 int SrcIndices[] = {
1986 TII->getOperandIdx(Opcode, R600::OpName::src0),
1987 TII->getOperandIdx(Opcode, R600::OpName::src1),
1988 TII->getOperandIdx(Opcode, R600::OpName::src2),
1989 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1990 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1991 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1992 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1993 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1994 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1995 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1996 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1997 };
1998 std::vector<unsigned> Consts;
1999 for (int OtherSrcIdx : SrcIndices) {
2000 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2001 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2002 continue;
2003 if (HasDst) {
2004 OtherSrcIdx--;
2005 OtherSelIdx--;
2006 }
2007 if (RegisterSDNode *Reg =
2008 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2009 if (Reg->getReg() == R600::ALU_CONST) {
2010 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2011 }
2012 }
2013 }
2014
2015 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2016 Consts.push_back(Cst->getZExtValue());
2017 if (!TII->fitsConstReadLimitations(Consts)) {
2018 return false;
2019 }
2020
2021 Sel = CstOffset;
2022 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2023 return true;
2024 }
2025 case R600::MOV_IMM_GLOBAL_ADDR:
2026 // Check if the Imm slot is used. Taken from below.
2027 if (Imm->getAsZExtVal())
2028 return false;
2029 Imm = Src.getOperand(0);
2030 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2031 return true;
2032 case R600::MOV_IMM_I32:
2033 case R600::MOV_IMM_F32: {
2034 unsigned ImmReg = R600::ALU_LITERAL_X;
2035 uint64_t ImmValue = 0;
2036
2037 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2038 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2039 float FloatValue = FPC->getValueAPF().convertToFloat();
2040 if (FloatValue == 0.0) {
2041 ImmReg = R600::ZERO;
2042 } else if (FloatValue == 0.5) {
2043 ImmReg = R600::HALF;
2044 } else if (FloatValue == 1.0) {
2045 ImmReg = R600::ONE;
2046 } else {
2047 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2048 }
2049 } else {
2050 uint64_t Value = Src.getConstantOperandVal(0);
2051 if (Value == 0) {
2052 ImmReg = R600::ZERO;
2053 } else if (Value == 1) {
2054 ImmReg = R600::ONE_INT;
2055 } else {
2056 ImmValue = Value;
2057 }
2058 }
2059
2060 // Check that we aren't already using an immediate.
2061 // XXX: It's possible for an instruction to have more than one
2062 // immediate operand, but this is not supported yet.
2063 if (ImmReg == R600::ALU_LITERAL_X) {
2064 if (!Imm.getNode())
2065 return false;
2066 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2067 if (C->getZExtValue())
2068 return false;
2069 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2070 }
2071 Src = DAG.getRegister(ImmReg, MVT::i32);
2072 return true;
2073 }
2074 default:
2075 return false;
2076 }
2077}
2078
2079/// Fold the instructions after selecting them
2080SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2081 SelectionDAG &DAG) const {
2082 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2083 if (!Node->isMachineOpcode())
2084 return Node;
2085
2086 unsigned Opcode = Node->getMachineOpcode();
2087 SDValue FakeOp;
2088
2089 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2090
2091 if (Opcode == R600::DOT_4) {
2092 int OperandIdx[] = {
2093 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2094 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2095 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2097 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2098 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2099 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2101 };
2102 int NegIdx[] = {
2103 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2104 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2105 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2107 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2108 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2109 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2111 };
2112 int AbsIdx[] = {
2113 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2114 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2115 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2116 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2117 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2118 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2119 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2120 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2121 };
2122 for (unsigned i = 0; i < 8; i++) {
2123 if (OperandIdx[i] < 0)
2124 return Node;
2125 SDValue &Src = Ops[OperandIdx[i] - 1];
2126 SDValue &Neg = Ops[NegIdx[i] - 1];
2127 SDValue &Abs = Ops[AbsIdx[i] - 1];
2128 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2129 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2130 if (HasDst)
2131 SelIdx--;
2132 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2133 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2134 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2135 }
2136 } else if (Opcode == R600::REG_SEQUENCE) {
2137 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2138 SDValue &Src = Ops[i];
2139 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2140 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2141 }
2142 } else {
2143 if (!TII->hasInstrModifiers(Opcode))
2144 return Node;
2145 int OperandIdx[] = {
2146 TII->getOperandIdx(Opcode, R600::OpName::src0),
2147 TII->getOperandIdx(Opcode, R600::OpName::src1),
2148 TII->getOperandIdx(Opcode, R600::OpName::src2)
2149 };
2150 int NegIdx[] = {
2151 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2152 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2153 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2154 };
2155 int AbsIdx[] = {
2156 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2157 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2158 -1
2159 };
2160 for (unsigned i = 0; i < 3; i++) {
2161 if (OperandIdx[i] < 0)
2162 return Node;
2163 SDValue &Src = Ops[OperandIdx[i] - 1];
2164 SDValue &Neg = Ops[NegIdx[i] - 1];
2165 SDValue FakeAbs;
2166 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2167 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2168 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2169 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2170 if (HasDst) {
2171 SelIdx--;
2172 ImmIdx--;
2173 }
2174 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2175 SDValue &Imm = Ops[ImmIdx];
2176 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2177 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2178 }
2179 }
2180
2181 return Node;
2182}
2183
2185R600TargetLowering::shouldExpandAtomicRMWInIR(const AtomicRMWInst *RMW) const {
2186 switch (RMW->getOperation()) {
2197 // FIXME: Cayman at least appears to have instructions for this, but the
2198 // instruction definitions appear to be missing.
2200 case AtomicRMWInst::Xchg: {
2201 const DataLayout &DL = RMW->getFunction()->getDataLayout();
2202 unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
2203 if (ValSize == 32 || ValSize == 64)
2206 }
2207 default:
2208 if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
2209 unsigned Size = IntTy->getBitWidth();
2210 if (Size == 32 || Size == 64)
2212 }
2213
2215 }
2216
2217 llvm_unreachable("covered atomicrmw op switch");
2218}
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Interfaces for producing common pass manager configurations.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:57
static bool isUndef(const MachineInstr &MI)
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define MO_FLAG_NEG
Definition R600Defines.h:15
#define MO_FLAG_ABS
Definition R600Defines.h:16
#define MO_FLAG_MASK
Definition R600Defines.h:17
#define MO_FLAG_PUSH
Definition R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunctionInfo *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
AMDGPUTargetLowering(const TargetMachine &TM, const TargetSubtargetInfo &STI, const AMDGPUSubtarget &AMDGPUSTI)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6066
APInt bitcastToAPInt() const
Definition APFloat.h:1408
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
an instruction that atomically reads a memory location, combines it with another value,...
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
uint64_t getZExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:178
bool empty() const
Definition DenseMap.h:109
iterator end()
Definition DenseMap.h:81
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:362
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
bool use_empty(Register RegNo) const
use_empty - Return true if there are no instructions using the specified register.
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
MachineMemOperand * getMemOperand() const
Return the unique MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
const R600InstrInfo * getInstrInfo() const override
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
ArrayRef< SDUse > ops() const
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
LLVM_ABI SDNode * UpdateNodeOperands(SDNode *N, SDValue Op)
Mutate the specified node in-place to have the specified operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
constexpr int64_t getNullPointerValue(unsigned AS)
Get the null pointer value for the given address space.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:819
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:261
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:788
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:294
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:522
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:518
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:220
@ GlobalAddress
Definition ISDOpcodes.h:88
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:417
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:280
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:787
@ FNEG
Perform various unary floating-point operations inspired by libm.
@ BR_CC
BR_CC - Conditional branch.
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:548
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:796
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:765
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:811
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:888
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:926
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:739
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:205
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:304
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:565
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition ISDOpcodes.h:997
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ BRCOND
BRCOND - Conditional branch.
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:833
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:534
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:556
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
int32_t getLDSNoRetOp(uint32_t Opcode)
constexpr float pif
Definition MathExtras.h:53
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:532
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
@ Kill
The last use of a register.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:145
EVT changeTypeToInteger() const
Return the type converted to an equivalently sized integer or vector with integer element type.
Definition ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:292
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
bool isFloatingPoint() const
Return true if this is a FP or a vector FP type.
Definition ValueTypes.h:155
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:393
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:331
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:300
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:316
bool isInteger() const
Return true if this is an integer or a vector integer type.
Definition ValueTypes.h:160
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...