LLVM 22.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
17#include "R600Defines.h"
19#include "R600Subtarget.h"
20#include "R600TargetMachine.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/IntrinsicsR600.h"
25
26using namespace llvm;
27
28#include "R600GenCallingConv.inc"
29
31 const R600Subtarget &STI)
32 : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
33 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
34 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
35 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
36 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
37 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
38 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
39
42
43 computeRegisterProperties(Subtarget->getRegisterInfo());
44
45 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
47
48 // 32-bit ABS is legal for AMDGPU except for R600
50
51 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
52 // spaces, so it is custom lowered to handle those where it isn't.
54 for (MVT VT : MVT::integer_valuetypes()) {
55 setLoadExtAction(Op, VT, MVT::i1, Promote);
56 setLoadExtAction(Op, VT, MVT::i8, Custom);
57 setLoadExtAction(Op, VT, MVT::i16, Custom);
58 }
59
60 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
62 MVT::v2i1, Expand);
63
65 MVT::v4i1, Expand);
66
67 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
68 Custom);
69
70 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
71 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
72 // We need to include these since trunc STORES to PRIVATE need
73 // special handling to accommodate RMW
74 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
75 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
76 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
77 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
78 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
79 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
80 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
81 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
82 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
83 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
84
85 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
86 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
87 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
88
89 // Set condition code actions
93 MVT::f32, Expand);
94
96 MVT::i32, Expand);
97
98 setOperationAction({ISD::FCOS, ISD::FSIN}, MVT::f32, Custom);
99
100 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
101
102 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
103 setOperationAction(ISD::BRCOND, MVT::Other, Custom);
104
106
108 {MVT::f32, MVT::v2f32, MVT::v3f32, MVT::v4f32, MVT::v5f32,
109 MVT::v6f32, MVT::v7f32, MVT::v8f32, MVT::v16f32},
110 Expand);
111
112 setOperationAction({ISD::FCEIL, ISD::FTRUNC, ISD::FROUNDEVEN, ISD::FFLOOR},
113 MVT::f64, Custom);
114
115 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
116
117 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
118 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
119 Custom);
120
121 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
122 Expand);
123
124 // ADD, SUB overflow.
125 // TODO: turn these into Legal?
126 if (Subtarget->hasCARRY())
128
129 if (Subtarget->hasBORROW())
131
132 // Expand sign extension of vectors
133 if (!Subtarget->hasBFE())
135
136 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
137
138 if (!Subtarget->hasBFE())
140 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
141
142 if (!Subtarget->hasBFE())
144 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
145
147 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
148
150
152
154 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
155
157 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
158
159 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
160 // to be Legal/Custom in order to avoid library calls.
162 Custom);
163
164 if (!Subtarget->hasFMA())
165 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
166
167 // FIXME: May need no denormals check
169
170 if (!Subtarget->hasBFI())
171 // fcopysign can be done in a single instruction with BFI.
172 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
173
174 if (!Subtarget->hasBCNT(32))
176
177 if (!Subtarget->hasBCNT(64))
179
180 if (Subtarget->hasFFBH())
182
183 if (Subtarget->hasFFBL())
185
186 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
187 // need it for R600.
188 if (Subtarget->hasBFE())
190
192 setOperationAction(ISD::ADDRSPACECAST, MVT::i32, Custom);
193
194 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
195 for (MVT VT : ScalarIntVTs)
197 Expand);
198
199 // LLVM will expand these to atomic_cmp_swap(0)
200 // and atomic_swap, respectively.
201 setOperationAction({ISD::ATOMIC_LOAD, ISD::ATOMIC_STORE}, MVT::i32, Expand);
202
203 // We need to custom lower some of the intrinsics
205 Custom);
206
208
211}
212
214 if (std::next(I) == I->getParent()->end())
215 return false;
216 return std::next(I)->getOpcode() == R600::RETURN;
217}
218
221 MachineBasicBlock *BB) const {
222 MachineFunction *MF = BB->getParent();
225 const R600InstrInfo *TII = Subtarget->getInstrInfo();
226
227 switch (MI.getOpcode()) {
228 default:
229 // Replace LDS_*_RET instruction that don't have any uses with the
230 // equivalent LDS_*_NORET instruction.
231 if (TII->isLDSRetInstr(MI.getOpcode())) {
232 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
233 assert(DstIdx != -1);
235 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
236 // LDS_1A2D support and remove this special case.
237 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
238 MI.getOpcode() == R600::LDS_CMPST_RET)
239 return BB;
240
241 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
242 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
243 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
244 NewMI.add(MO);
245 } else {
247 }
248 break;
249
250 case R600::FABS_R600: {
251 MachineInstr *NewMI = TII->buildDefaultInstruction(
252 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
253 MI.getOperand(1).getReg());
254 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
255 break;
256 }
257
258 case R600::FNEG_R600: {
259 MachineInstr *NewMI = TII->buildDefaultInstruction(
260 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
261 MI.getOperand(1).getReg());
262 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
263 break;
264 }
265
266 case R600::MASK_WRITE: {
267 Register maskedRegister = MI.getOperand(0).getReg();
268 assert(maskedRegister.isVirtual());
269 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
270 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
271 break;
272 }
273
274 case R600::MOV_IMM_F32:
275 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
276 .getFPImm()
277 ->getValueAPF()
278 .bitcastToAPInt()
279 .getZExtValue());
280 break;
281
282 case R600::MOV_IMM_I32:
283 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
284 MI.getOperand(1).getImm());
285 break;
286
287 case R600::MOV_IMM_GLOBAL_ADDR: {
288 //TODO: Perhaps combine this instruction with the next if possible
289 auto MIB = TII->buildDefaultInstruction(
290 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
291 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
292 //TODO: Ugh this is rather ugly
293 const MachineOperand &MO = MI.getOperand(1);
294 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
295 MO.getTargetFlags());
296 break;
297 }
298
299 case R600::CONST_COPY: {
300 MachineInstr *NewMI = TII->buildDefaultInstruction(
301 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
302 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
303 MI.getOperand(1).getImm());
304 break;
305 }
306
307 case R600::RAT_WRITE_CACHELESS_32_eg:
308 case R600::RAT_WRITE_CACHELESS_64_eg:
309 case R600::RAT_WRITE_CACHELESS_128_eg:
310 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
311 .add(MI.getOperand(0))
312 .add(MI.getOperand(1))
313 .addImm(isEOP(I)); // Set End of program bit
314 break;
315
316 case R600::RAT_STORE_TYPED_eg:
317 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
318 .add(MI.getOperand(0))
319 .add(MI.getOperand(1))
320 .add(MI.getOperand(2))
321 .addImm(isEOP(I)); // Set End of program bit
322 break;
323
324 case R600::BRANCH:
325 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
326 .add(MI.getOperand(0));
327 break;
328
329 case R600::BRANCH_COND_f32: {
330 MachineInstr *NewMI =
331 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
332 R600::PREDICATE_BIT)
333 .add(MI.getOperand(1))
334 .addImm(R600::PRED_SETNE)
335 .addImm(0); // Flags
336 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
337 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
338 .add(MI.getOperand(0))
339 .addReg(R600::PREDICATE_BIT, RegState::Kill);
340 break;
341 }
342
343 case R600::BRANCH_COND_i32: {
344 MachineInstr *NewMI =
345 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
346 R600::PREDICATE_BIT)
347 .add(MI.getOperand(1))
348 .addImm(R600::PRED_SETNE_INT)
349 .addImm(0); // Flags
350 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
351 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
352 .add(MI.getOperand(0))
353 .addReg(R600::PREDICATE_BIT, RegState::Kill);
354 break;
355 }
356
357 case R600::EG_ExportSwz:
358 case R600::R600_ExportSwz: {
359 // Instruction is left unmodified if its not the last one of its type
360 bool isLastInstructionOfItsType = true;
361 unsigned InstExportType = MI.getOperand(1).getImm();
362 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
363 EndBlock = BB->end(); NextExportInst != EndBlock;
364 NextExportInst = std::next(NextExportInst)) {
365 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
366 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
367 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
368 .getImm();
369 if (CurrentInstExportType == InstExportType) {
370 isLastInstructionOfItsType = false;
371 break;
372 }
373 }
374 }
375 bool EOP = isEOP(I);
376 if (!EOP && !isLastInstructionOfItsType)
377 return BB;
378 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
379 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
380 .add(MI.getOperand(0))
381 .add(MI.getOperand(1))
382 .add(MI.getOperand(2))
383 .add(MI.getOperand(3))
384 .add(MI.getOperand(4))
385 .add(MI.getOperand(5))
386 .add(MI.getOperand(6))
387 .addImm(CfInst)
388 .addImm(EOP);
389 break;
390 }
391 case R600::RETURN: {
392 return BB;
393 }
394 }
395
396 MI.eraseFromParent();
397 return BB;
398}
399
400//===----------------------------------------------------------------------===//
401// Custom DAG Lowering Operations
402//===----------------------------------------------------------------------===//
403
407 switch (Op.getOpcode()) {
408 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
409 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
410 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
411 case ISD::SHL_PARTS:
412 case ISD::SRA_PARTS:
413 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
414 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
415 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
416 case ISD::FCOS:
417 case ISD::FSIN: return LowerTrig(Op, DAG);
418 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
419 case ISD::STORE: return LowerSTORE(Op, DAG);
420 case ISD::LOAD: {
421 SDValue Result = LowerLOAD(Op, DAG);
422 assert((!Result.getNode() ||
423 Result.getNode()->getNumValues() == 2) &&
424 "Load should return a value and a chain");
425 return Result;
426 }
427
428 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
429 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
430 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
431 case ISD::ADDRSPACECAST:
432 return lowerADDRSPACECAST(Op, DAG);
433 case ISD::INTRINSIC_VOID: {
434 SDValue Chain = Op.getOperand(0);
435 unsigned IntrinsicID = Op.getConstantOperandVal(1);
436 switch (IntrinsicID) {
437 case Intrinsic::r600_store_swizzle: {
438 SDLoc DL(Op);
439 const SDValue Args[8] = {
440 Chain,
441 Op.getOperand(2), // Export Value
442 Op.getOperand(3), // ArrayBase
443 Op.getOperand(4), // Type
444 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
445 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
446 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
447 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
448 };
449 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
450 }
451
452 // default for switch(IntrinsicID)
453 default: break;
454 }
455 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
456 break;
457 }
459 unsigned IntrinsicID = Op.getConstantOperandVal(0);
460 EVT VT = Op.getValueType();
461 SDLoc DL(Op);
462 switch (IntrinsicID) {
463 case Intrinsic::r600_tex:
464 case Intrinsic::r600_texc: {
465 unsigned TextureOp;
466 switch (IntrinsicID) {
467 case Intrinsic::r600_tex:
468 TextureOp = 0;
469 break;
470 case Intrinsic::r600_texc:
471 TextureOp = 1;
472 break;
473 default:
474 llvm_unreachable("unhandled texture operation");
475 }
476
477 SDValue TexArgs[19] = {
478 DAG.getConstant(TextureOp, DL, MVT::i32),
479 Op.getOperand(1),
480 DAG.getConstant(0, DL, MVT::i32),
481 DAG.getConstant(1, DL, MVT::i32),
482 DAG.getConstant(2, DL, MVT::i32),
483 DAG.getConstant(3, DL, MVT::i32),
484 Op.getOperand(2),
485 Op.getOperand(3),
486 Op.getOperand(4),
487 DAG.getConstant(0, DL, MVT::i32),
488 DAG.getConstant(1, DL, MVT::i32),
489 DAG.getConstant(2, DL, MVT::i32),
490 DAG.getConstant(3, DL, MVT::i32),
491 Op.getOperand(5),
492 Op.getOperand(6),
493 Op.getOperand(7),
494 Op.getOperand(8),
495 Op.getOperand(9),
496 Op.getOperand(10)
497 };
498 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
499 }
500 case Intrinsic::r600_dot4: {
501 SDValue Args[8] = {
502 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
503 DAG.getConstant(0, DL, MVT::i32)),
504 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
505 DAG.getConstant(0, DL, MVT::i32)),
506 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
507 DAG.getConstant(1, DL, MVT::i32)),
508 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
509 DAG.getConstant(1, DL, MVT::i32)),
510 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
511 DAG.getConstant(2, DL, MVT::i32)),
512 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
513 DAG.getConstant(2, DL, MVT::i32)),
514 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
515 DAG.getConstant(3, DL, MVT::i32)),
516 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
517 DAG.getConstant(3, DL, MVT::i32))
518 };
519 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
520 }
521
522 case Intrinsic::r600_implicitarg_ptr: {
525 return DAG.getConstant(ByteOffset, DL, PtrVT);
526 }
527 case Intrinsic::r600_read_ngroups_x:
528 return LowerImplicitParameter(DAG, VT, DL, 0);
529 case Intrinsic::r600_read_ngroups_y:
530 return LowerImplicitParameter(DAG, VT, DL, 1);
531 case Intrinsic::r600_read_ngroups_z:
532 return LowerImplicitParameter(DAG, VT, DL, 2);
533 case Intrinsic::r600_read_global_size_x:
534 return LowerImplicitParameter(DAG, VT, DL, 3);
535 case Intrinsic::r600_read_global_size_y:
536 return LowerImplicitParameter(DAG, VT, DL, 4);
537 case Intrinsic::r600_read_global_size_z:
538 return LowerImplicitParameter(DAG, VT, DL, 5);
539 case Intrinsic::r600_read_local_size_x:
540 return LowerImplicitParameter(DAG, VT, DL, 6);
541 case Intrinsic::r600_read_local_size_y:
542 return LowerImplicitParameter(DAG, VT, DL, 7);
543 case Intrinsic::r600_read_local_size_z:
544 return LowerImplicitParameter(DAG, VT, DL, 8);
545
546 case Intrinsic::r600_read_tgid_x:
547 case Intrinsic::amdgcn_workgroup_id_x:
548 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
549 R600::T1_X, VT);
550 case Intrinsic::r600_read_tgid_y:
551 case Intrinsic::amdgcn_workgroup_id_y:
552 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
553 R600::T1_Y, VT);
554 case Intrinsic::r600_read_tgid_z:
555 case Intrinsic::amdgcn_workgroup_id_z:
556 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
557 R600::T1_Z, VT);
558 case Intrinsic::r600_read_tidig_x:
559 case Intrinsic::amdgcn_workitem_id_x:
560 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
561 R600::T0_X, VT);
562 case Intrinsic::r600_read_tidig_y:
563 case Intrinsic::amdgcn_workitem_id_y:
564 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
565 R600::T0_Y, VT);
566 case Intrinsic::r600_read_tidig_z:
567 case Intrinsic::amdgcn_workitem_id_z:
568 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
569 R600::T0_Z, VT);
570
571 case Intrinsic::r600_recipsqrt_ieee:
572 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
573
574 case Intrinsic::r600_recipsqrt_clamped:
575 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
576 default:
577 return Op;
578 }
579
580 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
581 break;
582 }
583 } // end switch(Op.getOpcode())
584 return SDValue();
585}
586
589 SelectionDAG &DAG) const {
590 switch (N->getOpcode()) {
591 default:
593 return;
594 case ISD::FP_TO_UINT:
595 if (N->getValueType(0) == MVT::i1) {
596 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
597 return;
598 }
599 // Since we don't care about out of bounds values we can use FP_TO_SINT for
600 // uints too. The DAGLegalizer code for uint considers some extra cases
601 // which are not necessary here.
602 [[fallthrough]];
603 case ISD::FP_TO_SINT: {
604 if (N->getValueType(0) == MVT::i1) {
605 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
606 return;
607 }
608
609 SDValue Result;
610 if (expandFP_TO_SINT(N, Result, DAG))
611 Results.push_back(Result);
612 return;
613 }
614 case ISD::SDIVREM: {
615 SDValue Op = SDValue(N, 1);
616 SDValue RES = LowerSDIVREM(Op, DAG);
617 Results.push_back(RES);
618 Results.push_back(RES.getValue(1));
619 break;
620 }
621 case ISD::UDIVREM: {
622 SDValue Op = SDValue(N, 0);
624 break;
625 }
626 }
627}
628
629SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
630 SDValue Vector) const {
631 SDLoc DL(Vector);
632 EVT VecVT = Vector.getValueType();
633 EVT EltVT = VecVT.getVectorElementType();
635
636 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
637 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
638 DAG.getVectorIdxConstant(i, DL)));
639 }
640
641 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
642}
643
644SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
645 SelectionDAG &DAG) const {
646 SDLoc DL(Op);
647 SDValue Vector = Op.getOperand(0);
648 SDValue Index = Op.getOperand(1);
649
650 if (isa<ConstantSDNode>(Index) ||
652 return Op;
653
654 Vector = vectorToVerticalVector(DAG, Vector);
655 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
656 Vector, Index);
657}
658
659SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
660 SelectionDAG &DAG) const {
661 SDLoc DL(Op);
662 SDValue Vector = Op.getOperand(0);
663 SDValue Value = Op.getOperand(1);
664 SDValue Index = Op.getOperand(2);
665
666 if (isa<ConstantSDNode>(Index) ||
668 return Op;
669
670 Vector = vectorToVerticalVector(DAG, Vector);
671 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
672 Vector, Value, Index);
673 return vectorToVerticalVector(DAG, Insert);
674}
675
676SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
677 SDValue Op,
678 SelectionDAG &DAG) const {
679 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
682
683 const DataLayout &DL = DAG.getDataLayout();
684 const GlobalValue *GV = GSD->getGlobal();
685 MVT ConstPtrVT = getPointerTy(DL, AMDGPUAS::CONSTANT_ADDRESS);
686
687 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
688 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
689}
690
691SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
692 // On hw >= R700, COS/SIN input must be between -1. and 1.
693 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
694 EVT VT = Op.getValueType();
695 SDValue Arg = Op.getOperand(0);
696 SDLoc DL(Op);
697
698 // TODO: Should this propagate fast-math-flags?
699 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
700 DAG.getNode(ISD::FADD, DL, VT,
701 DAG.getNode(ISD::FMUL, DL, VT, Arg,
702 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
703 DAG.getConstantFP(0.5, DL, MVT::f32)));
704 unsigned TrigNode;
705 switch (Op.getOpcode()) {
706 case ISD::FCOS:
707 TrigNode = AMDGPUISD::COS_HW;
708 break;
709 case ISD::FSIN:
710 TrigNode = AMDGPUISD::SIN_HW;
711 break;
712 default:
713 llvm_unreachable("Wrong trig opcode");
714 }
715 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
716 DAG.getNode(ISD::FADD, DL, VT, FractPart,
717 DAG.getConstantFP(-0.5, DL, MVT::f32)));
718 if (Gen >= AMDGPUSubtarget::R700)
719 return TrigVal;
720 // On R600 hw, COS/SIN input must be between -Pi and Pi.
721 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
722 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
723}
724
725SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
726 SelectionDAG &DAG) const {
727 SDValue Lo, Hi;
728 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
729 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
730}
731
732SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
733 unsigned mainop, unsigned ovf) const {
734 SDLoc DL(Op);
735 EVT VT = Op.getValueType();
736
737 SDValue Lo = Op.getOperand(0);
738 SDValue Hi = Op.getOperand(1);
739
740 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
741 // Extend sign.
742 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
743 DAG.getValueType(MVT::i1));
744
745 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
746
747 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
748}
749
750SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
751 SDLoc DL(Op);
752 return DAG.getNode(
754 DL,
755 MVT::i1,
756 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
758}
759
760SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
761 SDLoc DL(Op);
762 return DAG.getNode(
764 DL,
765 MVT::i1,
766 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
768}
769
770SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
771 const SDLoc &DL,
772 unsigned DwordOffset) const {
773 unsigned ByteOffset = DwordOffset * 4;
774 PointerType *PtrType =
776
777 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
778 assert(isInt<16>(ByteOffset));
779
780 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
781 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
782 MachinePointerInfo(ConstantPointerNull::get(PtrType)));
783}
784
785bool R600TargetLowering::isZero(SDValue Op) const {
786 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))
787 return Cst->isZero();
788 if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))
789 return CstFP->isZero();
790 return false;
791}
792
793bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
794 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
795 return CFP->isExactlyValue(1.0);
796 }
797 return isAllOnesConstant(Op);
798}
799
800bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
801 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
802 return CFP->getValueAPF().isZero();
803 }
804 return isNullConstant(Op);
805}
806
807SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
808 SDLoc DL(Op);
809 EVT VT = Op.getValueType();
810
811 SDValue LHS = Op.getOperand(0);
812 SDValue RHS = Op.getOperand(1);
813 SDValue True = Op.getOperand(2);
814 SDValue False = Op.getOperand(3);
815 SDValue CC = Op.getOperand(4);
816 SDValue Temp;
817
818 if (VT == MVT::f32) {
819 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
820 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
821 if (MinMax)
822 return MinMax;
823 }
824
825 // LHS and RHS are guaranteed to be the same value type
826 EVT CompareVT = LHS.getValueType();
827
828 // Check if we can lower this to a native operation.
829
830 // Try to lower to a SET* instruction:
831 //
832 // SET* can match the following patterns:
833 //
834 // select_cc f32, f32, -1, 0, cc_supported
835 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
836 // select_cc i32, i32, -1, 0, cc_supported
837 //
838
839 // Move hardware True/False values to the correct operand.
840 if (isHWTrueValue(False) && isHWFalseValue(True)) {
841 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
842 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
843 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
844 std::swap(False, True);
845 CC = DAG.getCondCode(InverseCC);
846 } else {
847 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
848 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
849 std::swap(False, True);
850 std::swap(LHS, RHS);
851 CC = DAG.getCondCode(SwapInvCC);
852 }
853 }
854 }
855
856 if (isHWTrueValue(True) && isHWFalseValue(False) &&
857 (CompareVT == VT || VT == MVT::i32)) {
858 // This can be matched by a SET* instruction.
859 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
860 }
861
862 // Try to lower to a CND* instruction:
863 //
864 // CND* can match the following patterns:
865 //
866 // select_cc f32, 0.0, f32, f32, cc_supported
867 // select_cc f32, 0.0, i32, i32, cc_supported
868 // select_cc i32, 0, f32, f32, cc_supported
869 // select_cc i32, 0, i32, i32, cc_supported
870 //
871
872 // Try to move the zero value to the RHS
873 if (isZero(LHS)) {
874 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
875 // Try swapping the operands
876 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
877 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
878 std::swap(LHS, RHS);
879 CC = DAG.getCondCode(CCSwapped);
880 } else {
881 // Try inverting the condition and then swapping the operands
882 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
883 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
884 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
885 std::swap(True, False);
886 std::swap(LHS, RHS);
887 CC = DAG.getCondCode(CCSwapped);
888 }
889 }
890 }
891 if (isZero(RHS)) {
892 SDValue Cond = LHS;
893 SDValue Zero = RHS;
894 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
895 if (CompareVT != VT) {
896 // Bitcast True / False to the correct types. This will end up being
897 // a nop, but it allows us to define only a single pattern in the
898 // .TD files for each CND* instruction rather than having to have
899 // one pattern for integer True/False and one for fp True/False
900 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
901 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
902 }
903
904 switch (CCOpcode) {
905 case ISD::SETONE:
906 case ISD::SETUNE:
907 case ISD::SETNE:
908 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
909 Temp = True;
910 True = False;
911 False = Temp;
912 break;
913 default:
914 break;
915 }
916 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
917 Cond, Zero,
918 True, False,
919 DAG.getCondCode(CCOpcode));
920 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
921 }
922
923 // If we make it this for it means we have no native instructions to handle
924 // this SELECT_CC, so we must lower it.
925 SDValue HWTrue, HWFalse;
926
927 if (CompareVT == MVT::f32) {
928 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
929 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
930 } else if (CompareVT == MVT::i32) {
931 HWTrue = DAG.getAllOnesConstant(DL, CompareVT);
932 HWFalse = DAG.getConstant(0, DL, CompareVT);
933 }
934 else {
935 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
936 }
937
938 // Lower this unsupported SELECT_CC into a combination of two supported
939 // SELECT_CC operations.
940 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
941
942 return DAG.getNode(ISD::SELECT_CC, DL, VT,
943 Cond, HWFalse,
944 True, False,
946}
947
948SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
949 SelectionDAG &DAG) const {
950 SDLoc SL(Op);
951 EVT VT = Op.getValueType();
952
953 const R600TargetMachine &TM =
954 static_cast<const R600TargetMachine &>(getTargetMachine());
955
956 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
957 unsigned SrcAS = ASC->getSrcAddressSpace();
958 unsigned DestAS = ASC->getDestAddressSpace();
959
960 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
961 return DAG.getSignedConstant(TM.getNullPointerValue(DestAS), SL, VT);
962
963 return Op;
964}
965
966/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
967/// convert these pointers to a register index. Each register holds
968/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
969/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
970/// for indirect addressing.
971SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
972 unsigned StackWidth,
973 SelectionDAG &DAG) const {
974 unsigned SRLPad;
975 switch(StackWidth) {
976 case 1:
977 SRLPad = 2;
978 break;
979 case 2:
980 SRLPad = 3;
981 break;
982 case 4:
983 SRLPad = 4;
984 break;
985 default: llvm_unreachable("Invalid stack width");
986 }
987
988 SDLoc DL(Ptr);
989 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
990 DAG.getConstant(SRLPad, DL, MVT::i32));
991}
992
993void R600TargetLowering::getStackAddress(unsigned StackWidth,
994 unsigned ElemIdx,
995 unsigned &Channel,
996 unsigned &PtrIncr) const {
997 switch (StackWidth) {
998 default:
999 case 1:
1000 Channel = 0;
1001 if (ElemIdx > 0) {
1002 PtrIncr = 1;
1003 } else {
1004 PtrIncr = 0;
1005 }
1006 break;
1007 case 2:
1008 Channel = ElemIdx % 2;
1009 if (ElemIdx == 2) {
1010 PtrIncr = 1;
1011 } else {
1012 PtrIncr = 0;
1013 }
1014 break;
1015 case 4:
1016 Channel = ElemIdx;
1017 PtrIncr = 0;
1018 break;
1019 }
1020}
1021
1022SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1023 SelectionDAG &DAG) const {
1024 SDLoc DL(Store);
1025 //TODO: Who creates the i8 stores?
1026 assert(Store->isTruncatingStore()
1027 || Store->getValue().getValueType() == MVT::i8);
1028 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1029
1030 SDValue Mask;
1031 if (Store->getMemoryVT() == MVT::i8) {
1032 assert(Store->getAlign() >= 1);
1033 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1034 } else if (Store->getMemoryVT() == MVT::i16) {
1035 assert(Store->getAlign() >= 2);
1036 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1037 } else {
1038 llvm_unreachable("Unsupported private trunc store");
1039 }
1040
1041 SDValue OldChain = Store->getChain();
1042 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1043 // Skip dummy
1044 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1045 SDValue BasePtr = Store->getBasePtr();
1046 SDValue Offset = Store->getOffset();
1047 EVT MemVT = Store->getMemoryVT();
1048
1049 SDValue LoadPtr = BasePtr;
1050 if (!Offset.isUndef()) {
1051 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1052 }
1053
1054 // Get dword location
1055 // TODO: this should be eliminated by the future SHR ptr, 2
1056 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1057 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1058
1059 // Load dword
1060 // TODO: can we be smarter about machine pointer info?
1061 MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1062 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1063
1064 Chain = Dst.getValue(1);
1065
1066 // Get offset in dword
1067 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1068 DAG.getConstant(0x3, DL, MVT::i32));
1069
1070 // Convert byte offset to bit shift
1071 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1072 DAG.getConstant(3, DL, MVT::i32));
1073
1074 // TODO: Contrary to the name of the function,
1075 // it also handles sub i32 non-truncating stores (like i1)
1076 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1077 Store->getValue());
1078
1079 // Mask the value to the right type
1080 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1081
1082 // Shift the value in place
1083 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1084 MaskedValue, ShiftAmt);
1085
1086 // Shift the mask in place
1087 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1088
1089 // Invert the mask. NOTE: if we had native ROL instructions we could
1090 // use inverted mask
1091 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1092
1093 // Cleanup the target bits
1094 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1095
1096 // Add the new bits
1097 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1098
1099 // Store dword
1100 // TODO: Can we be smarter about MachinePointerInfo?
1101 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1102
1103 // If we are part of expanded vector, make our neighbors depend on this store
1104 if (VectorTrunc) {
1105 // Make all other vector elements depend on this store
1106 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1107 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1108 }
1109 return NewStore;
1110}
1111
1112SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1113 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1114 unsigned AS = StoreNode->getAddressSpace();
1115
1116 SDValue Chain = StoreNode->getChain();
1117 SDValue Ptr = StoreNode->getBasePtr();
1118 SDValue Value = StoreNode->getValue();
1119
1120 EVT VT = Value.getValueType();
1121 EVT MemVT = StoreNode->getMemoryVT();
1122 EVT PtrVT = Ptr.getValueType();
1123
1124 SDLoc DL(Op);
1125
1126 const bool TruncatingStore = StoreNode->isTruncatingStore();
1127
1128 // Neither LOCAL nor PRIVATE can do vectors at the moment
1130 TruncatingStore) &&
1131 VT.isVector()) {
1132 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1133 // Add an extra level of chain to isolate this vector
1134 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1135 // TODO: can the chain be replaced without creating a new store?
1136 SDValue NewStore = DAG.getTruncStore(
1137 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1138 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1139 StoreNode->getAAInfo());
1140 StoreNode = cast<StoreSDNode>(NewStore);
1141 }
1142
1143 return scalarizeVectorStore(StoreNode, DAG);
1144 }
1145
1146 Align Alignment = StoreNode->getAlign();
1147 if (Alignment < MemVT.getStoreSize() &&
1148 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1149 StoreNode->getMemOperand()->getFlags(),
1150 nullptr)) {
1151 return expandUnalignedStore(StoreNode, DAG);
1152 }
1153
1154 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1155 DAG.getConstant(2, DL, PtrVT));
1156
1157 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1158 // It is beneficial to create MSKOR here instead of combiner to avoid
1159 // artificial dependencies introduced by RMW
1160 if (TruncatingStore) {
1161 assert(VT.bitsLE(MVT::i32));
1162 SDValue MaskConstant;
1163 if (MemVT == MVT::i8) {
1164 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1165 } else {
1166 assert(MemVT == MVT::i16);
1167 assert(StoreNode->getAlign() >= 2);
1168 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1169 }
1170
1171 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1172 DAG.getConstant(0x00000003, DL, PtrVT));
1173 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1174 DAG.getConstant(3, DL, VT));
1175
1176 // Put the mask in correct place
1177 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1178
1179 // Put the value bits in correct place
1180 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1181 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1182
1183 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1184 // vector instead.
1185 SDValue Src[4] = {
1186 ShiftedValue,
1187 DAG.getConstant(0, DL, MVT::i32),
1188 DAG.getConstant(0, DL, MVT::i32),
1189 Mask
1190 };
1191 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1192 SDValue Args[3] = { Chain, Input, DWordAddr };
1194 Op->getVTList(), Args, MemVT,
1195 StoreNode->getMemOperand());
1196 }
1197 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1198 // Convert pointer from byte address to dword address.
1199 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1200
1201 if (StoreNode->isIndexed()) {
1202 llvm_unreachable("Indexed stores not supported yet");
1203 } else {
1204 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1205 }
1206 return Chain;
1207 }
1208 }
1209
1210 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1211 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1212 return SDValue();
1213
1214 if (MemVT.bitsLT(MVT::i32))
1215 return lowerPrivateTruncStore(StoreNode, DAG);
1216
1217 // Standard i32+ store, tag it with DWORDADDR to note that the address
1218 // has been shifted
1219 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1220 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1221 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1222 }
1223
1224 // Tagged i32+ stores will be matched by patterns
1225 return SDValue();
1226}
1227
1228// return (512 + (kc_bank << 12)
1229static int
1231 switch (AddressSpace) {
1233 return 512;
1235 return 512 + 4096;
1237 return 512 + 4096 * 2;
1239 return 512 + 4096 * 3;
1241 return 512 + 4096 * 4;
1243 return 512 + 4096 * 5;
1245 return 512 + 4096 * 6;
1247 return 512 + 4096 * 7;
1249 return 512 + 4096 * 8;
1251 return 512 + 4096 * 9;
1253 return 512 + 4096 * 10;
1255 return 512 + 4096 * 11;
1257 return 512 + 4096 * 12;
1259 return 512 + 4096 * 13;
1261 return 512 + 4096 * 14;
1263 return 512 + 4096 * 15;
1264 default:
1265 return -1;
1266 }
1267}
1268
1269SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1270 SelectionDAG &DAG) const {
1271 SDLoc DL(Op);
1272 LoadSDNode *Load = cast<LoadSDNode>(Op);
1273 ISD::LoadExtType ExtType = Load->getExtensionType();
1274 EVT MemVT = Load->getMemoryVT();
1275 assert(Load->getAlign() >= MemVT.getStoreSize());
1276
1277 SDValue BasePtr = Load->getBasePtr();
1278 SDValue Chain = Load->getChain();
1279 SDValue Offset = Load->getOffset();
1280
1281 SDValue LoadPtr = BasePtr;
1282 if (!Offset.isUndef()) {
1283 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1284 }
1285
1286 // Get dword location
1287 // NOTE: this should be eliminated by the future SHR ptr, 2
1288 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1289 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1290
1291 // Load dword
1292 // TODO: can we be smarter about machine pointer info?
1293 MachinePointerInfo PtrInfo(AMDGPUAS::PRIVATE_ADDRESS);
1294 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1295
1296 // Get offset within the register.
1297 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1298 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1299
1300 // Bit offset of target byte (byteIdx * 8).
1301 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1302 DAG.getConstant(3, DL, MVT::i32));
1303
1304 // Shift to the right.
1305 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1306
1307 // Eliminate the upper bits by setting them to ...
1308 EVT MemEltVT = MemVT.getScalarType();
1309
1310 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1311 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1312 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1313 } else { // ... or zeros.
1314 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1315 }
1316
1317 SDValue Ops[] = {
1318 Ret,
1319 Read.getValue(1) // This should be our output chain
1320 };
1321
1322 return DAG.getMergeValues(Ops, DL);
1323}
1324
1325SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1326 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1327 unsigned AS = LoadNode->getAddressSpace();
1328 EVT MemVT = LoadNode->getMemoryVT();
1329 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1330
1331 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1332 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1333 return lowerPrivateExtLoad(Op, DAG);
1334 }
1335
1336 SDLoc DL(Op);
1337 EVT VT = Op.getValueType();
1338 SDValue Chain = LoadNode->getChain();
1339 SDValue Ptr = LoadNode->getBasePtr();
1340
1341 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1343 VT.isVector()) {
1344 SDValue Ops[2];
1345 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1346 return DAG.getMergeValues(Ops, DL);
1347 }
1348
1349 // This is still used for explicit load from addrspace(8)
1350 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1351 if (ConstantBlock > -1 &&
1352 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1353 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1355 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1357 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1358 }
1359 // TODO: Does this even work?
1360 // non-constant ptr can't be folded, keeps it as a v4f32 load
1361 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1362 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1363 DAG.getConstant(4, DL, MVT::i32)),
1364 DAG.getConstant(LoadNode->getAddressSpace() -
1366 DL, MVT::i32));
1367
1368 if (!VT.isVector()) {
1369 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1370 DAG.getConstant(0, DL, MVT::i32));
1371 }
1372
1373 SDValue MergedValues[2] = {
1374 Result,
1375 Chain
1376 };
1377 return DAG.getMergeValues(MergedValues, DL);
1378 }
1379
1380 // For most operations returning SDValue() will result in the node being
1381 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1382 // need to manually expand loads that may be legal in some address spaces and
1383 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1384 // compute shaders, since the data is sign extended when it is uploaded to the
1385 // buffer. However SEXT loads from other address spaces are not supported, so
1386 // we need to expand them here.
1387 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1388 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1389 SDValue NewLoad = DAG.getExtLoad(
1390 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1391 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1392 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1393 DAG.getValueType(MemVT));
1394
1395 SDValue MergedValues[2] = { Res, Chain };
1396 return DAG.getMergeValues(MergedValues, DL);
1397 }
1398
1399 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1400 return SDValue();
1401 }
1402
1403 // DWORDADDR ISD marks already shifted address
1404 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1405 assert(VT == MVT::i32);
1406 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1407 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1408 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1409 }
1410 return SDValue();
1411}
1412
1413SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1414 SDValue Chain = Op.getOperand(0);
1415 SDValue Cond = Op.getOperand(1);
1416 SDValue Jump = Op.getOperand(2);
1417
1418 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1419 Chain, Jump, Cond);
1420}
1421
1422SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1423 SelectionDAG &DAG) const {
1424 MachineFunction &MF = DAG.getMachineFunction();
1425 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1426
1427 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1428
1429 unsigned FrameIndex = FIN->getIndex();
1430 Register IgnoredFrameReg;
1431 StackOffset Offset =
1432 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1433 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1434 SDLoc(Op), Op.getValueType());
1435}
1436
1438 bool IsVarArg) const {
1439 switch (CC) {
1442 case CallingConv::C:
1443 case CallingConv::Fast:
1444 case CallingConv::Cold:
1445 llvm_unreachable("kernels should not be handled here");
1453 return CC_R600;
1454 default:
1455 reportFatalUsageError("unsupported calling convention");
1456 }
1457}
1458
1459/// XXX Only kernel functions are supported, so we can assume for now that
1460/// every function is a kernel function, but in the future we should use
1461/// separate calling conventions for kernel and non-kernel functions.
1463 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1464 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1465 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1467 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1468 *DAG.getContext());
1470
1471 if (AMDGPU::isShader(CallConv)) {
1472 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1473 } else {
1474 analyzeFormalArgumentsCompute(CCInfo, Ins);
1475 }
1476
1477 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1478 CCValAssign &VA = ArgLocs[i];
1479 const ISD::InputArg &In = Ins[i];
1480 EVT VT = In.VT;
1481 EVT MemVT = VA.getLocVT();
1482 if (!VT.isVector() && MemVT.isVector()) {
1483 // Get load source type if scalarized.
1484 MemVT = MemVT.getVectorElementType();
1485 }
1486
1487 if (AMDGPU::isShader(CallConv)) {
1488 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1489 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1490 InVals.push_back(Register);
1491 continue;
1492 }
1493
1494 // i64 isn't a legal type, so the register type used ends up as i32, which
1495 // isn't expected here. It attempts to create this sextload, but it ends up
1496 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1497 // for <1 x i64>.
1498
1499 // The first 36 bytes of the input buffer contains information about
1500 // thread group and global sizes.
1502 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1503 // FIXME: This should really check the extload type, but the handling of
1504 // extload vector parameters seems to be broken.
1505
1506 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1507 Ext = ISD::SEXTLOAD;
1508 }
1509
1510 // Compute the offset from the value.
1511 // XXX - I think PartOffset should give you this, but it seems to give the
1512 // size of the register which isn't useful.
1513
1514 unsigned PartOffset = VA.getLocMemOffset();
1515 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1516
1518 SDValue Arg = DAG.getLoad(
1519 ISD::UNINDEXED, Ext, VT, DL, Chain,
1520 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1521 PtrInfo,
1522 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1525
1526 InVals.push_back(Arg);
1527 }
1528 return Chain;
1529}
1530
1532 EVT VT) const {
1533 if (!VT.isVector())
1534 return MVT::i32;
1536}
1537
1539 const MachineFunction &MF) const {
1540 // Local and Private addresses do not handle vectors. Limit to i32
1542 return (MemVT.getSizeInBits() <= 32);
1543 }
1544 return true;
1545}
1546
1548 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1549 unsigned *IsFast) const {
1550 if (IsFast)
1551 *IsFast = 0;
1552
1553 if (!VT.isSimple() || VT == MVT::Other)
1554 return false;
1555
1556 if (VT.bitsLT(MVT::i32))
1557 return false;
1558
1559 // TODO: This is a rough estimate.
1560 if (IsFast)
1561 *IsFast = 1;
1562
1563 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1564}
1565
1567 SelectionDAG &DAG, SDValue VectorEntry,
1568 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1569 assert(RemapSwizzle.empty());
1570
1571 SDLoc DL(VectorEntry);
1572 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1573
1574 SDValue NewBldVec[4];
1575 for (unsigned i = 0; i < 4; i++)
1576 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1577 DAG.getIntPtrConstant(i, DL));
1578
1579 for (unsigned i = 0; i < 4; i++) {
1580 if (NewBldVec[i].isUndef())
1581 // We mask write here to teach later passes that the ith element of this
1582 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1583 // break false dependencies and additionally make assembly easier to read.
1584 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1585 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1586 if (C->isZero()) {
1587 RemapSwizzle[i] = 4; // SEL_0
1588 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1589 } else if (C->isExactlyValue(1.0)) {
1590 RemapSwizzle[i] = 5; // SEL_1
1591 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1592 }
1593 }
1594
1595 if (NewBldVec[i].isUndef())
1596 continue;
1597
1598 for (unsigned j = 0; j < i; j++) {
1599 if (NewBldVec[i] == NewBldVec[j]) {
1600 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1601 RemapSwizzle[i] = j;
1602 break;
1603 }
1604 }
1605 }
1606
1607 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1608 NewBldVec);
1609}
1610
1612 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1613 assert(RemapSwizzle.empty());
1614
1615 SDLoc DL(VectorEntry);
1616 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1617
1618 SDValue NewBldVec[4];
1619 bool isUnmovable[4] = {false, false, false, false};
1620 for (unsigned i = 0; i < 4; i++)
1621 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1622 DAG.getIntPtrConstant(i, DL));
1623
1624 for (unsigned i = 0; i < 4; i++) {
1625 RemapSwizzle[i] = i;
1626 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1627 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1628 if (i == Idx)
1629 isUnmovable[Idx] = true;
1630 }
1631 }
1632
1633 for (unsigned i = 0; i < 4; i++) {
1634 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1635 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1636 if (isUnmovable[Idx])
1637 continue;
1638 // Swap i and Idx
1639 std::swap(NewBldVec[Idx], NewBldVec[i]);
1640 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1641 break;
1642 }
1643 }
1644
1645 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1646 NewBldVec);
1647}
1648
1649SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1650 SelectionDAG &DAG,
1651 const SDLoc &DL) const {
1652 // Old -> New swizzle values
1653 DenseMap<unsigned, unsigned> SwizzleRemap;
1654
1655 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1656 for (unsigned i = 0; i < 4; i++) {
1657 unsigned Idx = Swz[i]->getAsZExtVal();
1658 auto It = SwizzleRemap.find(Idx);
1659 if (It != SwizzleRemap.end())
1660 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1661 }
1662
1663 SwizzleRemap.clear();
1664 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1665 for (unsigned i = 0; i < 4; i++) {
1666 unsigned Idx = Swz[i]->getAsZExtVal();
1667 auto It = SwizzleRemap.find(Idx);
1668 if (It != SwizzleRemap.end())
1669 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1670 }
1671
1672 return BuildVector;
1673}
1674
1675SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1676 SelectionDAG &DAG) const {
1677 SDLoc DL(LoadNode);
1678 EVT VT = LoadNode->getValueType(0);
1679 SDValue Chain = LoadNode->getChain();
1680 SDValue Ptr = LoadNode->getBasePtr();
1682
1683 //TODO: Support smaller loads
1684 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1685 return SDValue();
1686
1687 if (LoadNode->getAlign() < Align(4))
1688 return SDValue();
1689
1690 int ConstantBlock = ConstantAddressBlock(Block);
1691
1692 SDValue Slots[4];
1693 for (unsigned i = 0; i < 4; i++) {
1694 // We want Const position encoded with the following formula :
1695 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1696 // const_index is Ptr computed by llvm using an alignment of 16.
1697 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1698 // then div by 4 at the ISel step
1699 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1700 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1701 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1702 }
1703 EVT NewVT = MVT::v4i32;
1704 unsigned NumElements = 4;
1705 if (VT.isVector()) {
1706 NewVT = VT;
1707 NumElements = VT.getVectorNumElements();
1708 }
1709 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1710 if (!VT.isVector()) {
1711 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1712 DAG.getConstant(0, DL, MVT::i32));
1713 }
1714 SDValue MergedValues[2] = {
1715 Result,
1716 Chain
1717 };
1718 return DAG.getMergeValues(MergedValues, DL);
1719}
1720
1721//===----------------------------------------------------------------------===//
1722// Custom DAG Optimizations
1723//===----------------------------------------------------------------------===//
1724
1726 DAGCombinerInfo &DCI) const {
1727 SelectionDAG &DAG = DCI.DAG;
1728 SDLoc DL(N);
1729
1730 switch (N->getOpcode()) {
1731 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1732 case ISD::FP_ROUND: {
1733 SDValue Arg = N->getOperand(0);
1734 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1735 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1736 Arg.getOperand(0));
1737 }
1738 break;
1739 }
1740
1741 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1742 // (i32 select_cc f32, f32, -1, 0 cc)
1743 //
1744 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1745 // this to one of the SET*_DX10 instructions.
1746 case ISD::FP_TO_SINT: {
1747 SDValue FNeg = N->getOperand(0);
1748 if (FNeg.getOpcode() != ISD::FNEG) {
1749 return SDValue();
1750 }
1751 SDValue SelectCC = FNeg.getOperand(0);
1752 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1753 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1754 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1755 !isHWTrueValue(SelectCC.getOperand(2)) ||
1756 !isHWFalseValue(SelectCC.getOperand(3))) {
1757 return SDValue();
1758 }
1759
1760 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1761 SelectCC.getOperand(0), // LHS
1762 SelectCC.getOperand(1), // RHS
1763 DAG.getAllOnesConstant(DL, MVT::i32), // True
1764 DAG.getConstant(0, DL, MVT::i32), // False
1765 SelectCC.getOperand(4)); // CC
1766 }
1767
1768 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1769 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1771 SDValue InVec = N->getOperand(0);
1772 SDValue InVal = N->getOperand(1);
1773 SDValue EltNo = N->getOperand(2);
1774
1775 // If the inserted element is an UNDEF, just use the input vector.
1776 if (InVal.isUndef())
1777 return InVec;
1778
1779 EVT VT = InVec.getValueType();
1780
1781 // If we can't generate a legal BUILD_VECTOR, exit
1783 return SDValue();
1784
1785 // Check that we know which element is being inserted
1786 if (!isa<ConstantSDNode>(EltNo))
1787 return SDValue();
1788 unsigned Elt = EltNo->getAsZExtVal();
1789
1790 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1791 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1792 // vector elements.
1794 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1795 Ops.append(InVec.getNode()->op_begin(),
1796 InVec.getNode()->op_end());
1797 } else if (InVec.isUndef()) {
1798 unsigned NElts = VT.getVectorNumElements();
1799 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1800 } else {
1801 return SDValue();
1802 }
1803
1804 // Insert the element
1805 if (Elt < Ops.size()) {
1806 // All the operands of BUILD_VECTOR must have the same type;
1807 // we enforce that here.
1808 EVT OpVT = Ops[0].getValueType();
1809 if (InVal.getValueType() != OpVT)
1810 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1811 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1812 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1813 Ops[Elt] = InVal;
1814 }
1815
1816 // Return the new vector
1817 return DAG.getBuildVector(VT, DL, Ops);
1818 }
1819
1820 // Extract_vec (Build_vector) generated by custom lowering
1821 // also needs to be customly combined
1823 SDValue Arg = N->getOperand(0);
1824 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1825 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1826 unsigned Element = Const->getZExtValue();
1827 return Arg->getOperand(Element);
1828 }
1829 }
1830 if (Arg.getOpcode() == ISD::BITCAST &&
1834 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1835 unsigned Element = Const->getZExtValue();
1836 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1837 Arg->getOperand(0).getOperand(Element));
1838 }
1839 }
1840 break;
1841 }
1842
1843 case ISD::SELECT_CC: {
1844 // Try common optimizations
1846 return Ret;
1847
1848 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1849 // selectcc x, y, a, b, inv(cc)
1850 //
1851 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1852 // selectcc x, y, a, b, cc
1853 SDValue LHS = N->getOperand(0);
1854 if (LHS.getOpcode() != ISD::SELECT_CC) {
1855 return SDValue();
1856 }
1857
1858 SDValue RHS = N->getOperand(1);
1859 SDValue True = N->getOperand(2);
1860 SDValue False = N->getOperand(3);
1861 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1862
1863 if (LHS.getOperand(2).getNode() != True.getNode() ||
1864 LHS.getOperand(3).getNode() != False.getNode() ||
1865 RHS.getNode() != False.getNode()) {
1866 return SDValue();
1867 }
1868
1869 switch (NCC) {
1870 default: return SDValue();
1871 case ISD::SETNE: return LHS;
1872 case ISD::SETEQ: {
1873 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1874 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1875 if (DCI.isBeforeLegalizeOps() ||
1876 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1877 return DAG.getSelectCC(DL,
1878 LHS.getOperand(0),
1879 LHS.getOperand(1),
1880 LHS.getOperand(2),
1881 LHS.getOperand(3),
1882 LHSCC);
1883 break;
1884 }
1885 }
1886 return SDValue();
1887 }
1888
1890 SDValue Arg = N->getOperand(1);
1891 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1892 break;
1893
1894 SDValue NewArgs[8] = {
1895 N->getOperand(0), // Chain
1896 SDValue(),
1897 N->getOperand(2), // ArrayBase
1898 N->getOperand(3), // Type
1899 N->getOperand(4), // SWZ_X
1900 N->getOperand(5), // SWZ_Y
1901 N->getOperand(6), // SWZ_Z
1902 N->getOperand(7) // SWZ_W
1903 };
1904 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1905 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1906 }
1908 SDValue Arg = N->getOperand(1);
1909 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1910 break;
1911
1912 SDValue NewArgs[19] = {
1913 N->getOperand(0),
1914 N->getOperand(1),
1915 N->getOperand(2),
1916 N->getOperand(3),
1917 N->getOperand(4),
1918 N->getOperand(5),
1919 N->getOperand(6),
1920 N->getOperand(7),
1921 N->getOperand(8),
1922 N->getOperand(9),
1923 N->getOperand(10),
1924 N->getOperand(11),
1925 N->getOperand(12),
1926 N->getOperand(13),
1927 N->getOperand(14),
1928 N->getOperand(15),
1929 N->getOperand(16),
1930 N->getOperand(17),
1931 N->getOperand(18),
1932 };
1933 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1934 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1935 }
1936
1937 case ISD::LOAD: {
1938 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1939 SDValue Ptr = LoadNode->getBasePtr();
1940 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1942 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1943 break;
1944 }
1945
1946 default: break;
1947 }
1948
1950}
1951
1952bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1953 SDValue &Src, SDValue &Neg, SDValue &Abs,
1954 SDValue &Sel, SDValue &Imm,
1955 SelectionDAG &DAG) const {
1956 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1957 if (!Src.isMachineOpcode())
1958 return false;
1959
1960 switch (Src.getMachineOpcode()) {
1961 case R600::FNEG_R600:
1962 if (!Neg.getNode())
1963 return false;
1964 Src = Src.getOperand(0);
1965 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1966 return true;
1967 case R600::FABS_R600:
1968 if (!Abs.getNode())
1969 return false;
1970 Src = Src.getOperand(0);
1971 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1972 return true;
1973 case R600::CONST_COPY: {
1974 unsigned Opcode = ParentNode->getMachineOpcode();
1975 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1976
1977 if (!Sel.getNode())
1978 return false;
1979
1980 SDValue CstOffset = Src.getOperand(0);
1981 if (ParentNode->getValueType(0).isVector())
1982 return false;
1983
1984 // Gather constants values
1985 int SrcIndices[] = {
1986 TII->getOperandIdx(Opcode, R600::OpName::src0),
1987 TII->getOperandIdx(Opcode, R600::OpName::src1),
1988 TII->getOperandIdx(Opcode, R600::OpName::src2),
1989 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1990 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1991 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1992 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1993 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1994 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1995 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1996 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1997 };
1998 std::vector<unsigned> Consts;
1999 for (int OtherSrcIdx : SrcIndices) {
2000 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
2001 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
2002 continue;
2003 if (HasDst) {
2004 OtherSrcIdx--;
2005 OtherSelIdx--;
2006 }
2007 if (RegisterSDNode *Reg =
2008 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2009 if (Reg->getReg() == R600::ALU_CONST) {
2010 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2011 }
2012 }
2013 }
2014
2015 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2016 Consts.push_back(Cst->getZExtValue());
2017 if (!TII->fitsConstReadLimitations(Consts)) {
2018 return false;
2019 }
2020
2021 Sel = CstOffset;
2022 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2023 return true;
2024 }
2025 case R600::MOV_IMM_GLOBAL_ADDR:
2026 // Check if the Imm slot is used. Taken from below.
2027 if (Imm->getAsZExtVal())
2028 return false;
2029 Imm = Src.getOperand(0);
2030 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2031 return true;
2032 case R600::MOV_IMM_I32:
2033 case R600::MOV_IMM_F32: {
2034 unsigned ImmReg = R600::ALU_LITERAL_X;
2035 uint64_t ImmValue = 0;
2036
2037 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2038 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2039 float FloatValue = FPC->getValueAPF().convertToFloat();
2040 if (FloatValue == 0.0) {
2041 ImmReg = R600::ZERO;
2042 } else if (FloatValue == 0.5) {
2043 ImmReg = R600::HALF;
2044 } else if (FloatValue == 1.0) {
2045 ImmReg = R600::ONE;
2046 } else {
2047 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2048 }
2049 } else {
2050 uint64_t Value = Src.getConstantOperandVal(0);
2051 if (Value == 0) {
2052 ImmReg = R600::ZERO;
2053 } else if (Value == 1) {
2054 ImmReg = R600::ONE_INT;
2055 } else {
2056 ImmValue = Value;
2057 }
2058 }
2059
2060 // Check that we aren't already using an immediate.
2061 // XXX: It's possible for an instruction to have more than one
2062 // immediate operand, but this is not supported yet.
2063 if (ImmReg == R600::ALU_LITERAL_X) {
2064 if (!Imm.getNode())
2065 return false;
2066 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2067 if (C->getZExtValue())
2068 return false;
2069 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2070 }
2071 Src = DAG.getRegister(ImmReg, MVT::i32);
2072 return true;
2073 }
2074 default:
2075 return false;
2076 }
2077}
2078
2079/// Fold the instructions after selecting them
2080SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2081 SelectionDAG &DAG) const {
2082 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2083 if (!Node->isMachineOpcode())
2084 return Node;
2085
2086 unsigned Opcode = Node->getMachineOpcode();
2087 SDValue FakeOp;
2088
2089 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2090
2091 if (Opcode == R600::DOT_4) {
2092 int OperandIdx[] = {
2093 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2094 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2095 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2097 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2098 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2099 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2101 };
2102 int NegIdx[] = {
2103 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2104 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2105 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2107 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2108 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2109 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2111 };
2112 int AbsIdx[] = {
2113 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2114 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2115 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2116 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2117 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2118 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2119 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2120 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2121 };
2122 for (unsigned i = 0; i < 8; i++) {
2123 if (OperandIdx[i] < 0)
2124 return Node;
2125 SDValue &Src = Ops[OperandIdx[i] - 1];
2126 SDValue &Neg = Ops[NegIdx[i] - 1];
2127 SDValue &Abs = Ops[AbsIdx[i] - 1];
2128 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2129 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2130 if (HasDst)
2131 SelIdx--;
2132 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2133 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2134 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2135 }
2136 } else if (Opcode == R600::REG_SEQUENCE) {
2137 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2138 SDValue &Src = Ops[i];
2139 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2140 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2141 }
2142 } else {
2143 if (!TII->hasInstrModifiers(Opcode))
2144 return Node;
2145 int OperandIdx[] = {
2146 TII->getOperandIdx(Opcode, R600::OpName::src0),
2147 TII->getOperandIdx(Opcode, R600::OpName::src1),
2148 TII->getOperandIdx(Opcode, R600::OpName::src2)
2149 };
2150 int NegIdx[] = {
2151 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2152 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2153 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2154 };
2155 int AbsIdx[] = {
2156 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2157 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2158 -1
2159 };
2160 for (unsigned i = 0; i < 3; i++) {
2161 if (OperandIdx[i] < 0)
2162 return Node;
2163 SDValue &Src = Ops[OperandIdx[i] - 1];
2164 SDValue &Neg = Ops[NegIdx[i] - 1];
2165 SDValue FakeAbs;
2166 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2167 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2168 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2169 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2170 if (HasDst) {
2171 SelIdx--;
2172 ImmIdx--;
2173 }
2174 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2175 SDValue &Imm = Ops[ImmIdx];
2176 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2177 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2178 }
2179 }
2180
2181 return Node;
2182}
2183
2185R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2186 switch (RMW->getOperation()) {
2195 // FIXME: Cayman at least appears to have instructions for this, but the
2196 // instruction defintions appear to be missing.
2198 case AtomicRMWInst::Xchg: {
2199 const DataLayout &DL = RMW->getFunction()->getDataLayout();
2200 unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
2201 if (ValSize == 32 || ValSize == 64)
2204 }
2205 default:
2206 if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
2207 unsigned Size = IntTy->getBitWidth();
2208 if (Size == 32 || Size == 64)
2210 }
2211
2213 }
2214
2215 llvm_unreachable("covered atomicrmw op switch");
2216}
unsigned const MachineRegisterInfo * MRI
return SDValue()
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Interfaces for producing common pass manager configurations.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
#define I(x, y, z)
Definition MD5.cpp:58
static bool isUndef(const MachineInstr &MI)
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define MO_FLAG_NEG
Definition R600Defines.h:15
#define MO_FLAG_ABS
Definition R600Defines.h:16
#define MO_FLAG_MASK
Definition R600Defines.h:17
#define MO_FLAG_PUSH
Definition R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition VPlanSLP.cpp:247
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
AMDGPUTargetLowering(const TargetMachine &TM, const AMDGPUSubtarget &STI)
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
LLVM_ABI float convertToFloat() const
Converts this APFloat to host float value.
Definition APFloat.cpp:6150
APInt bitcastToAPInt() const
Definition APFloat.h:1335
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1540
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
an instruction that atomically reads a memory location, combines it with another value,...
@ FAdd
*p = old + v
@ FSub
*p = old - v
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
@ Nand
*p = ~(old & v)
BinOp getOperation() const
CCState - This class holds information needed while lowering arguments and return values.
LLVM_ABI void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
uint64_t getZExtValue() const
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition DenseMap.h:167
bool empty() const
Definition DenseMap.h:109
iterator end()
Definition DenseMap.h:81
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition Function.cpp:363
LLVM_ABI unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
LLVM_ABI DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static LLVM_ABI PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
const R600InstrInfo * getInstrInfo() const override
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
LLVM_ABI SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
LLVM_ABI MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
LLVM_ABI SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
LLVM_ABI SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
LLVM_ABI SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
LLVM_ABI SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond, SDNodeFlags Flags=SDNodeFlags())
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const TargetMachine & getTargetMachine() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
LLVM_READNONE constexpr bool isShader(CallingConv::ID CC)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition ISDOpcodes.h:807
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition ISDOpcodes.h:256
@ CTLZ_ZERO_UNDEF
Definition ISDOpcodes.h:780
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:289
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition ISDOpcodes.h:515
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition ISDOpcodes.h:511
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition ISDOpcodes.h:215
@ GlobalAddress
Definition ISDOpcodes.h:88
@ FADD
Simple binary floating point operators.
Definition ISDOpcodes.h:410
@ ABS
ABS - Determine the unsigned absolute value of a signed integer value of the same bitwidth.
Definition ISDOpcodes.h:744
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition ISDOpcodes.h:275
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition ISDOpcodes.h:779
@ IS_FPCLASS
Performs a check of floating point class property, defined by IEEE-754.
Definition ISDOpcodes.h:541
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition ISDOpcodes.h:784
@ SHL
Shift and rotation operations.
Definition ISDOpcodes.h:762
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition ISDOpcodes.h:799
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition ISDOpcodes.h:876
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition ISDOpcodes.h:914
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition ISDOpcodes.h:736
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition ISDOpcodes.h:200
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition ISDOpcodes.h:299
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition ISDOpcodes.h:558
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition ISDOpcodes.h:821
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition ISDOpcodes.h:527
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition ISDOpcodes.h:549
LLVM_ABI CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
LLVM_ABI CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
int getLDSNoRetOp(uint16_t Opcode)
@ Kill
The last use of a register.
constexpr float pif
Definition MathExtras.h:53
NodeAddr< NodeBase * > Node
Definition RDFGraph.h:381
This is an optimization pass for GlobalISel generic memory operations.
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Offset
Definition DWP.cpp:477
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
LLVM_ABI bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ AfterLegalizeVectorOps
Definition DAGCombine.h:18
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition Alignment.h:201
LLVM_ABI bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:180
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:869
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition ValueTypes.h:284
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
uint64_t getScalarSizeInBits() const
Definition ValueTypes.h:385
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition ValueTypes.h:323
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition ValueTypes.h:292
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition ValueTypes.h:308
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...