LLVM 20.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
17#include "R600Defines.h"
19#include "R600Subtarget.h"
20#include "R600TargetMachine.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/IntrinsicsR600.h"
24
25using namespace llvm;
26
27#include "R600GenCallingConv.inc"
28
30 const R600Subtarget &STI)
31 : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
32 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
33 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
34 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
35 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
36 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
37 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
38
41
43
44 // Legalize loads and stores to the private address space.
45 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
46
47 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
48 // spaces, so it is custom lowered to handle those where it isn't.
50 for (MVT VT : MVT::integer_valuetypes()) {
51 setLoadExtAction(Op, VT, MVT::i1, Promote);
52 setLoadExtAction(Op, VT, MVT::i8, Custom);
53 setLoadExtAction(Op, VT, MVT::i16, Custom);
54 }
55
56 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
58 MVT::v2i1, Expand);
59
61 MVT::v4i1, Expand);
62
63 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
64 Custom);
65
66 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
67 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
68 // We need to include these since trunc STORES to PRIVATE need
69 // special handling to accommodate RMW
70 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
71 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
72 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
73 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
74 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
75 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
76 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
77 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
78 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
79 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
80
81 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
82 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
83 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
84
85 // Set condition code actions
89 MVT::f32, Expand);
90
92 MVT::i32, Expand);
93
95
96 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
97
98 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
100
102
104 MVT::f64, Custom);
105
106 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
107
108 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
109 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
110 Custom);
111
112 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
113 Expand);
114
115 // ADD, SUB overflow.
116 // TODO: turn these into Legal?
117 if (Subtarget->hasCARRY())
119
120 if (Subtarget->hasBORROW())
122
123 // Expand sign extension of vectors
124 if (!Subtarget->hasBFE())
126
127 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
128
129 if (!Subtarget->hasBFE())
131 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
132
133 if (!Subtarget->hasBFE())
135 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
136
138 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
139
141
143
145 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
146
148 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
149
150 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
151 // to be Legal/Custom in order to avoid library calls.
153 Custom);
154
155 if (!Subtarget->hasFMA())
156 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
157
158 // FIXME: May need no denormals check
160
161 if (!Subtarget->hasBFI())
162 // fcopysign can be done in a single instruction with BFI.
163 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
164
165 if (!Subtarget->hasBCNT(32))
167
168 if (!Subtarget->hasBCNT(64))
170
171 if (Subtarget->hasFFBH())
173
174 if (Subtarget->hasFFBL())
176
177 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
178 // need it for R600.
179 if (Subtarget->hasBFE())
181
184
185 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
186 for (MVT VT : ScalarIntVTs)
188 Expand);
189
190 // LLVM will expand these to atomic_cmp_swap(0)
191 // and atomic_swap, respectively.
193
194 // We need to custom lower some of the intrinsics
196 Custom);
197
199
202}
203
205 if (std::next(I) == I->getParent()->end())
206 return false;
207 return std::next(I)->getOpcode() == R600::RETURN;
208}
209
212 MachineBasicBlock *BB) const {
213 MachineFunction *MF = BB->getParent();
216 const R600InstrInfo *TII = Subtarget->getInstrInfo();
217
218 switch (MI.getOpcode()) {
219 default:
220 // Replace LDS_*_RET instruction that don't have any uses with the
221 // equivalent LDS_*_NORET instruction.
222 if (TII->isLDSRetInstr(MI.getOpcode())) {
223 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
224 assert(DstIdx != -1);
226 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
227 // LDS_1A2D support and remove this special case.
228 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
229 MI.getOpcode() == R600::LDS_CMPST_RET)
230 return BB;
231
232 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
233 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
234 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
235 NewMI.add(MO);
236 } else {
238 }
239 break;
240
241 case R600::FABS_R600: {
242 MachineInstr *NewMI = TII->buildDefaultInstruction(
243 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
244 MI.getOperand(1).getReg());
245 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
246 break;
247 }
248
249 case R600::FNEG_R600: {
250 MachineInstr *NewMI = TII->buildDefaultInstruction(
251 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
252 MI.getOperand(1).getReg());
253 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
254 break;
255 }
256
257 case R600::MASK_WRITE: {
258 Register maskedRegister = MI.getOperand(0).getReg();
259 assert(maskedRegister.isVirtual());
260 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
261 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
262 break;
263 }
264
265 case R600::MOV_IMM_F32:
266 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
267 .getFPImm()
268 ->getValueAPF()
269 .bitcastToAPInt()
270 .getZExtValue());
271 break;
272
273 case R600::MOV_IMM_I32:
274 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
275 MI.getOperand(1).getImm());
276 break;
277
278 case R600::MOV_IMM_GLOBAL_ADDR: {
279 //TODO: Perhaps combine this instruction with the next if possible
280 auto MIB = TII->buildDefaultInstruction(
281 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
282 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
283 //TODO: Ugh this is rather ugly
284 const MachineOperand &MO = MI.getOperand(1);
285 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
286 MO.getTargetFlags());
287 break;
288 }
289
290 case R600::CONST_COPY: {
291 MachineInstr *NewMI = TII->buildDefaultInstruction(
292 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
293 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
294 MI.getOperand(1).getImm());
295 break;
296 }
297
298 case R600::RAT_WRITE_CACHELESS_32_eg:
299 case R600::RAT_WRITE_CACHELESS_64_eg:
300 case R600::RAT_WRITE_CACHELESS_128_eg:
301 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
302 .add(MI.getOperand(0))
303 .add(MI.getOperand(1))
304 .addImm(isEOP(I)); // Set End of program bit
305 break;
306
307 case R600::RAT_STORE_TYPED_eg:
308 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
309 .add(MI.getOperand(0))
310 .add(MI.getOperand(1))
311 .add(MI.getOperand(2))
312 .addImm(isEOP(I)); // Set End of program bit
313 break;
314
315 case R600::BRANCH:
316 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
317 .add(MI.getOperand(0));
318 break;
319
320 case R600::BRANCH_COND_f32: {
321 MachineInstr *NewMI =
322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
323 R600::PREDICATE_BIT)
324 .add(MI.getOperand(1))
325 .addImm(R600::PRED_SETNE)
326 .addImm(0); // Flags
327 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
328 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
329 .add(MI.getOperand(0))
330 .addReg(R600::PREDICATE_BIT, RegState::Kill);
331 break;
332 }
333
334 case R600::BRANCH_COND_i32: {
335 MachineInstr *NewMI =
336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
337 R600::PREDICATE_BIT)
338 .add(MI.getOperand(1))
339 .addImm(R600::PRED_SETNE_INT)
340 .addImm(0); // Flags
341 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
342 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
343 .add(MI.getOperand(0))
344 .addReg(R600::PREDICATE_BIT, RegState::Kill);
345 break;
346 }
347
348 case R600::EG_ExportSwz:
349 case R600::R600_ExportSwz: {
350 // Instruction is left unmodified if its not the last one of its type
351 bool isLastInstructionOfItsType = true;
352 unsigned InstExportType = MI.getOperand(1).getImm();
353 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
354 EndBlock = BB->end(); NextExportInst != EndBlock;
355 NextExportInst = std::next(NextExportInst)) {
356 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
357 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
358 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
359 .getImm();
360 if (CurrentInstExportType == InstExportType) {
361 isLastInstructionOfItsType = false;
362 break;
363 }
364 }
365 }
366 bool EOP = isEOP(I);
367 if (!EOP && !isLastInstructionOfItsType)
368 return BB;
369 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
370 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
371 .add(MI.getOperand(0))
372 .add(MI.getOperand(1))
373 .add(MI.getOperand(2))
374 .add(MI.getOperand(3))
375 .add(MI.getOperand(4))
376 .add(MI.getOperand(5))
377 .add(MI.getOperand(6))
378 .addImm(CfInst)
379 .addImm(EOP);
380 break;
381 }
382 case R600::RETURN: {
383 return BB;
384 }
385 }
386
387 MI.eraseFromParent();
388 return BB;
389}
390
391//===----------------------------------------------------------------------===//
392// Custom DAG Lowering Operations
393//===----------------------------------------------------------------------===//
394
398 switch (Op.getOpcode()) {
399 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
400 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
401 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
402 case ISD::SHL_PARTS:
403 case ISD::SRA_PARTS:
404 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
405 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
406 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
407 case ISD::FCOS:
408 case ISD::FSIN: return LowerTrig(Op, DAG);
409 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
410 case ISD::STORE: return LowerSTORE(Op, DAG);
411 case ISD::LOAD: {
412 SDValue Result = LowerLOAD(Op, DAG);
413 assert((!Result.getNode() ||
414 Result.getNode()->getNumValues() == 2) &&
415 "Load should return a value and a chain");
416 return Result;
417 }
418
419 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
420 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
421 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
423 return lowerADDRSPACECAST(Op, DAG);
424 case ISD::INTRINSIC_VOID: {
425 SDValue Chain = Op.getOperand(0);
426 unsigned IntrinsicID = Op.getConstantOperandVal(1);
427 switch (IntrinsicID) {
428 case Intrinsic::r600_store_swizzle: {
429 SDLoc DL(Op);
430 const SDValue Args[8] = {
431 Chain,
432 Op.getOperand(2), // Export Value
433 Op.getOperand(3), // ArrayBase
434 Op.getOperand(4), // Type
435 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
436 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
437 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
438 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
439 };
440 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
441 }
442
443 // default for switch(IntrinsicID)
444 default: break;
445 }
446 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
447 break;
448 }
450 unsigned IntrinsicID = Op.getConstantOperandVal(0);
451 EVT VT = Op.getValueType();
452 SDLoc DL(Op);
453 switch (IntrinsicID) {
454 case Intrinsic::r600_tex:
455 case Intrinsic::r600_texc: {
456 unsigned TextureOp;
457 switch (IntrinsicID) {
458 case Intrinsic::r600_tex:
459 TextureOp = 0;
460 break;
461 case Intrinsic::r600_texc:
462 TextureOp = 1;
463 break;
464 default:
465 llvm_unreachable("unhandled texture operation");
466 }
467
468 SDValue TexArgs[19] = {
469 DAG.getConstant(TextureOp, DL, MVT::i32),
470 Op.getOperand(1),
471 DAG.getConstant(0, DL, MVT::i32),
472 DAG.getConstant(1, DL, MVT::i32),
473 DAG.getConstant(2, DL, MVT::i32),
474 DAG.getConstant(3, DL, MVT::i32),
475 Op.getOperand(2),
476 Op.getOperand(3),
477 Op.getOperand(4),
478 DAG.getConstant(0, DL, MVT::i32),
479 DAG.getConstant(1, DL, MVT::i32),
480 DAG.getConstant(2, DL, MVT::i32),
481 DAG.getConstant(3, DL, MVT::i32),
482 Op.getOperand(5),
483 Op.getOperand(6),
484 Op.getOperand(7),
485 Op.getOperand(8),
486 Op.getOperand(9),
487 Op.getOperand(10)
488 };
489 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
490 }
491 case Intrinsic::r600_dot4: {
492 SDValue Args[8] = {
493 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
494 DAG.getConstant(0, DL, MVT::i32)),
495 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
496 DAG.getConstant(0, DL, MVT::i32)),
497 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
498 DAG.getConstant(1, DL, MVT::i32)),
499 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
500 DAG.getConstant(1, DL, MVT::i32)),
501 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
502 DAG.getConstant(2, DL, MVT::i32)),
503 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
504 DAG.getConstant(2, DL, MVT::i32)),
505 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
506 DAG.getConstant(3, DL, MVT::i32)),
507 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
508 DAG.getConstant(3, DL, MVT::i32))
509 };
510 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
511 }
512
513 case Intrinsic::r600_implicitarg_ptr: {
516 return DAG.getConstant(ByteOffset, DL, PtrVT);
517 }
518 case Intrinsic::r600_read_ngroups_x:
519 return LowerImplicitParameter(DAG, VT, DL, 0);
520 case Intrinsic::r600_read_ngroups_y:
521 return LowerImplicitParameter(DAG, VT, DL, 1);
522 case Intrinsic::r600_read_ngroups_z:
523 return LowerImplicitParameter(DAG, VT, DL, 2);
524 case Intrinsic::r600_read_global_size_x:
525 return LowerImplicitParameter(DAG, VT, DL, 3);
526 case Intrinsic::r600_read_global_size_y:
527 return LowerImplicitParameter(DAG, VT, DL, 4);
528 case Intrinsic::r600_read_global_size_z:
529 return LowerImplicitParameter(DAG, VT, DL, 5);
530 case Intrinsic::r600_read_local_size_x:
531 return LowerImplicitParameter(DAG, VT, DL, 6);
532 case Intrinsic::r600_read_local_size_y:
533 return LowerImplicitParameter(DAG, VT, DL, 7);
534 case Intrinsic::r600_read_local_size_z:
535 return LowerImplicitParameter(DAG, VT, DL, 8);
536
537 case Intrinsic::r600_read_tgid_x:
538 case Intrinsic::amdgcn_workgroup_id_x:
539 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
540 R600::T1_X, VT);
541 case Intrinsic::r600_read_tgid_y:
542 case Intrinsic::amdgcn_workgroup_id_y:
543 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
544 R600::T1_Y, VT);
545 case Intrinsic::r600_read_tgid_z:
546 case Intrinsic::amdgcn_workgroup_id_z:
547 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
548 R600::T1_Z, VT);
549 case Intrinsic::r600_read_tidig_x:
550 case Intrinsic::amdgcn_workitem_id_x:
551 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
552 R600::T0_X, VT);
553 case Intrinsic::r600_read_tidig_y:
554 case Intrinsic::amdgcn_workitem_id_y:
555 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
556 R600::T0_Y, VT);
557 case Intrinsic::r600_read_tidig_z:
558 case Intrinsic::amdgcn_workitem_id_z:
559 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
560 R600::T0_Z, VT);
561
562 case Intrinsic::r600_recipsqrt_ieee:
563 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
564
565 case Intrinsic::r600_recipsqrt_clamped:
566 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
567 default:
568 return Op;
569 }
570
571 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
572 break;
573 }
574 } // end switch(Op.getOpcode())
575 return SDValue();
576}
577
580 SelectionDAG &DAG) const {
581 switch (N->getOpcode()) {
582 default:
584 return;
585 case ISD::FP_TO_UINT:
586 if (N->getValueType(0) == MVT::i1) {
587 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
588 return;
589 }
590 // Since we don't care about out of bounds values we can use FP_TO_SINT for
591 // uints too. The DAGLegalizer code for uint considers some extra cases
592 // which are not necessary here.
593 [[fallthrough]];
594 case ISD::FP_TO_SINT: {
595 if (N->getValueType(0) == MVT::i1) {
596 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
597 return;
598 }
599
600 SDValue Result;
601 if (expandFP_TO_SINT(N, Result, DAG))
602 Results.push_back(Result);
603 return;
604 }
605 case ISD::SDIVREM: {
606 SDValue Op = SDValue(N, 1);
607 SDValue RES = LowerSDIVREM(Op, DAG);
608 Results.push_back(RES);
609 Results.push_back(RES.getValue(1));
610 break;
611 }
612 case ISD::UDIVREM: {
613 SDValue Op = SDValue(N, 0);
615 break;
616 }
617 }
618}
619
620SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
621 SDValue Vector) const {
622 SDLoc DL(Vector);
623 EVT VecVT = Vector.getValueType();
624 EVT EltVT = VecVT.getVectorElementType();
626
627 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
628 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
629 DAG.getVectorIdxConstant(i, DL)));
630 }
631
632 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
633}
634
635SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
636 SelectionDAG &DAG) const {
637 SDLoc DL(Op);
638 SDValue Vector = Op.getOperand(0);
639 SDValue Index = Op.getOperand(1);
640
641 if (isa<ConstantSDNode>(Index) ||
643 return Op;
644
645 Vector = vectorToVerticalVector(DAG, Vector);
646 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
647 Vector, Index);
648}
649
650SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
651 SelectionDAG &DAG) const {
652 SDLoc DL(Op);
653 SDValue Vector = Op.getOperand(0);
654 SDValue Value = Op.getOperand(1);
655 SDValue Index = Op.getOperand(2);
656
657 if (isa<ConstantSDNode>(Index) ||
659 return Op;
660
661 Vector = vectorToVerticalVector(DAG, Vector);
662 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
663 Vector, Value, Index);
664 return vectorToVerticalVector(DAG, Insert);
665}
666
667SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
668 SDValue Op,
669 SelectionDAG &DAG) const {
670 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
673
674 const DataLayout &DL = DAG.getDataLayout();
675 const GlobalValue *GV = GSD->getGlobal();
677
678 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
679 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
680}
681
682SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
683 // On hw >= R700, COS/SIN input must be between -1. and 1.
684 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
685 EVT VT = Op.getValueType();
686 SDValue Arg = Op.getOperand(0);
687 SDLoc DL(Op);
688
689 // TODO: Should this propagate fast-math-flags?
690 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
691 DAG.getNode(ISD::FADD, DL, VT,
692 DAG.getNode(ISD::FMUL, DL, VT, Arg,
693 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
694 DAG.getConstantFP(0.5, DL, MVT::f32)));
695 unsigned TrigNode;
696 switch (Op.getOpcode()) {
697 case ISD::FCOS:
698 TrigNode = AMDGPUISD::COS_HW;
699 break;
700 case ISD::FSIN:
701 TrigNode = AMDGPUISD::SIN_HW;
702 break;
703 default:
704 llvm_unreachable("Wrong trig opcode");
705 }
706 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
707 DAG.getNode(ISD::FADD, DL, VT, FractPart,
708 DAG.getConstantFP(-0.5, DL, MVT::f32)));
709 if (Gen >= AMDGPUSubtarget::R700)
710 return TrigVal;
711 // On R600 hw, COS/SIN input must be between -Pi and Pi.
712 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
713 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
714}
715
716SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
717 SelectionDAG &DAG) const {
718 SDValue Lo, Hi;
719 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
720 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
721}
722
723SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
724 unsigned mainop, unsigned ovf) const {
725 SDLoc DL(Op);
726 EVT VT = Op.getValueType();
727
728 SDValue Lo = Op.getOperand(0);
729 SDValue Hi = Op.getOperand(1);
730
731 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
732 // Extend sign.
733 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
734 DAG.getValueType(MVT::i1));
735
736 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
737
738 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
739}
740
741SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
742 SDLoc DL(Op);
743 return DAG.getNode(
745 DL,
746 MVT::i1,
747 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
749}
750
751SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
752 SDLoc DL(Op);
753 return DAG.getNode(
755 DL,
756 MVT::i1,
757 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
759}
760
761SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
762 const SDLoc &DL,
763 unsigned DwordOffset) const {
764 unsigned ByteOffset = DwordOffset * 4;
767
768 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
769 assert(isInt<16>(ByteOffset));
770
771 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
772 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
774}
775
776bool R600TargetLowering::isZero(SDValue Op) const {
777 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))
778 return Cst->isZero();
779 if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))
780 return CstFP->isZero();
781 return false;
782}
783
784bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
785 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
786 return CFP->isExactlyValue(1.0);
787 }
788 return isAllOnesConstant(Op);
789}
790
791bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
792 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
793 return CFP->getValueAPF().isZero();
794 }
795 return isNullConstant(Op);
796}
797
798SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
799 SDLoc DL(Op);
800 EVT VT = Op.getValueType();
801
802 SDValue LHS = Op.getOperand(0);
803 SDValue RHS = Op.getOperand(1);
804 SDValue True = Op.getOperand(2);
805 SDValue False = Op.getOperand(3);
806 SDValue CC = Op.getOperand(4);
807 SDValue Temp;
808
809 if (VT == MVT::f32) {
810 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
811 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
812 if (MinMax)
813 return MinMax;
814 }
815
816 // LHS and RHS are guaranteed to be the same value type
817 EVT CompareVT = LHS.getValueType();
818
819 // Check if we can lower this to a native operation.
820
821 // Try to lower to a SET* instruction:
822 //
823 // SET* can match the following patterns:
824 //
825 // select_cc f32, f32, -1, 0, cc_supported
826 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
827 // select_cc i32, i32, -1, 0, cc_supported
828 //
829
830 // Move hardware True/False values to the correct operand.
831 if (isHWTrueValue(False) && isHWFalseValue(True)) {
832 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
833 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
834 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
835 std::swap(False, True);
836 CC = DAG.getCondCode(InverseCC);
837 } else {
838 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
839 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
840 std::swap(False, True);
841 std::swap(LHS, RHS);
842 CC = DAG.getCondCode(SwapInvCC);
843 }
844 }
845 }
846
847 if (isHWTrueValue(True) && isHWFalseValue(False) &&
848 (CompareVT == VT || VT == MVT::i32)) {
849 // This can be matched by a SET* instruction.
850 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
851 }
852
853 // Try to lower to a CND* instruction:
854 //
855 // CND* can match the following patterns:
856 //
857 // select_cc f32, 0.0, f32, f32, cc_supported
858 // select_cc f32, 0.0, i32, i32, cc_supported
859 // select_cc i32, 0, f32, f32, cc_supported
860 // select_cc i32, 0, i32, i32, cc_supported
861 //
862
863 // Try to move the zero value to the RHS
864 if (isZero(LHS)) {
865 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
866 // Try swapping the operands
867 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
868 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
869 std::swap(LHS, RHS);
870 CC = DAG.getCondCode(CCSwapped);
871 } else {
872 // Try inverting the condition and then swapping the operands
873 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
874 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
875 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
876 std::swap(True, False);
877 std::swap(LHS, RHS);
878 CC = DAG.getCondCode(CCSwapped);
879 }
880 }
881 }
882 if (isZero(RHS)) {
883 SDValue Cond = LHS;
884 SDValue Zero = RHS;
885 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
886 if (CompareVT != VT) {
887 // Bitcast True / False to the correct types. This will end up being
888 // a nop, but it allows us to define only a single pattern in the
889 // .TD files for each CND* instruction rather than having to have
890 // one pattern for integer True/False and one for fp True/False
891 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
892 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
893 }
894
895 switch (CCOpcode) {
896 case ISD::SETONE:
897 case ISD::SETUNE:
898 case ISD::SETNE:
899 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
900 Temp = True;
901 True = False;
902 False = Temp;
903 break;
904 default:
905 break;
906 }
907 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
908 Cond, Zero,
909 True, False,
910 DAG.getCondCode(CCOpcode));
911 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
912 }
913
914 // If we make it this for it means we have no native instructions to handle
915 // this SELECT_CC, so we must lower it.
916 SDValue HWTrue, HWFalse;
917
918 if (CompareVT == MVT::f32) {
919 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
920 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
921 } else if (CompareVT == MVT::i32) {
922 HWTrue = DAG.getConstant(-1, DL, CompareVT);
923 HWFalse = DAG.getConstant(0, DL, CompareVT);
924 }
925 else {
926 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
927 }
928
929 // Lower this unsupported SELECT_CC into a combination of two supported
930 // SELECT_CC operations.
931 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
932
933 return DAG.getNode(ISD::SELECT_CC, DL, VT,
934 Cond, HWFalse,
935 True, False,
937}
938
939SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
940 SelectionDAG &DAG) const {
941 SDLoc SL(Op);
942 EVT VT = Op.getValueType();
943
944 const R600TargetMachine &TM =
945 static_cast<const R600TargetMachine &>(getTargetMachine());
946
947 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
948 unsigned SrcAS = ASC->getSrcAddressSpace();
949 unsigned DestAS = ASC->getDestAddressSpace();
950
951 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
952 return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
953
954 return Op;
955}
956
957/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
958/// convert these pointers to a register index. Each register holds
959/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
960/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
961/// for indirect addressing.
962SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
963 unsigned StackWidth,
964 SelectionDAG &DAG) const {
965 unsigned SRLPad;
966 switch(StackWidth) {
967 case 1:
968 SRLPad = 2;
969 break;
970 case 2:
971 SRLPad = 3;
972 break;
973 case 4:
974 SRLPad = 4;
975 break;
976 default: llvm_unreachable("Invalid stack width");
977 }
978
979 SDLoc DL(Ptr);
980 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
981 DAG.getConstant(SRLPad, DL, MVT::i32));
982}
983
984void R600TargetLowering::getStackAddress(unsigned StackWidth,
985 unsigned ElemIdx,
986 unsigned &Channel,
987 unsigned &PtrIncr) const {
988 switch (StackWidth) {
989 default:
990 case 1:
991 Channel = 0;
992 if (ElemIdx > 0) {
993 PtrIncr = 1;
994 } else {
995 PtrIncr = 0;
996 }
997 break;
998 case 2:
999 Channel = ElemIdx % 2;
1000 if (ElemIdx == 2) {
1001 PtrIncr = 1;
1002 } else {
1003 PtrIncr = 0;
1004 }
1005 break;
1006 case 4:
1007 Channel = ElemIdx;
1008 PtrIncr = 0;
1009 break;
1010 }
1011}
1012
1013SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1014 SelectionDAG &DAG) const {
1015 SDLoc DL(Store);
1016 //TODO: Who creates the i8 stores?
1017 assert(Store->isTruncatingStore()
1018 || Store->getValue().getValueType() == MVT::i8);
1019 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1020
1021 SDValue Mask;
1022 if (Store->getMemoryVT() == MVT::i8) {
1023 assert(Store->getAlign() >= 1);
1024 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1025 } else if (Store->getMemoryVT() == MVT::i16) {
1026 assert(Store->getAlign() >= 2);
1027 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1028 } else {
1029 llvm_unreachable("Unsupported private trunc store");
1030 }
1031
1032 SDValue OldChain = Store->getChain();
1033 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1034 // Skip dummy
1035 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1036 SDValue BasePtr = Store->getBasePtr();
1037 SDValue Offset = Store->getOffset();
1038 EVT MemVT = Store->getMemoryVT();
1039
1040 SDValue LoadPtr = BasePtr;
1041 if (!Offset.isUndef()) {
1042 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1043 }
1044
1045 // Get dword location
1046 // TODO: this should be eliminated by the future SHR ptr, 2
1047 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1048 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1049
1050 // Load dword
1051 // TODO: can we be smarter about machine pointer info?
1053 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1054
1055 Chain = Dst.getValue(1);
1056
1057 // Get offset in dword
1058 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1059 DAG.getConstant(0x3, DL, MVT::i32));
1060
1061 // Convert byte offset to bit shift
1062 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1063 DAG.getConstant(3, DL, MVT::i32));
1064
1065 // TODO: Contrary to the name of the function,
1066 // it also handles sub i32 non-truncating stores (like i1)
1067 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1068 Store->getValue());
1069
1070 // Mask the value to the right type
1071 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1072
1073 // Shift the value in place
1074 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1075 MaskedValue, ShiftAmt);
1076
1077 // Shift the mask in place
1078 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1079
1080 // Invert the mask. NOTE: if we had native ROL instructions we could
1081 // use inverted mask
1082 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1083
1084 // Cleanup the target bits
1085 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1086
1087 // Add the new bits
1088 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1089
1090 // Store dword
1091 // TODO: Can we be smarter about MachinePointerInfo?
1092 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1093
1094 // If we are part of expanded vector, make our neighbors depend on this store
1095 if (VectorTrunc) {
1096 // Make all other vector elements depend on this store
1097 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1098 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1099 }
1100 return NewStore;
1101}
1102
1103SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1104 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1105 unsigned AS = StoreNode->getAddressSpace();
1106
1107 SDValue Chain = StoreNode->getChain();
1108 SDValue Ptr = StoreNode->getBasePtr();
1109 SDValue Value = StoreNode->getValue();
1110
1111 EVT VT = Value.getValueType();
1112 EVT MemVT = StoreNode->getMemoryVT();
1113 EVT PtrVT = Ptr.getValueType();
1114
1115 SDLoc DL(Op);
1116
1117 const bool TruncatingStore = StoreNode->isTruncatingStore();
1118
1119 // Neither LOCAL nor PRIVATE can do vectors at the moment
1121 TruncatingStore) &&
1122 VT.isVector()) {
1123 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1124 // Add an extra level of chain to isolate this vector
1125 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1126 // TODO: can the chain be replaced without creating a new store?
1127 SDValue NewStore = DAG.getTruncStore(
1128 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1129 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1130 StoreNode->getAAInfo());
1131 StoreNode = cast<StoreSDNode>(NewStore);
1132 }
1133
1134 return scalarizeVectorStore(StoreNode, DAG);
1135 }
1136
1137 Align Alignment = StoreNode->getAlign();
1138 if (Alignment < MemVT.getStoreSize() &&
1139 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1140 StoreNode->getMemOperand()->getFlags(),
1141 nullptr)) {
1142 return expandUnalignedStore(StoreNode, DAG);
1143 }
1144
1145 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1146 DAG.getConstant(2, DL, PtrVT));
1147
1148 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1149 // It is beneficial to create MSKOR here instead of combiner to avoid
1150 // artificial dependencies introduced by RMW
1151 if (TruncatingStore) {
1152 assert(VT.bitsLE(MVT::i32));
1153 SDValue MaskConstant;
1154 if (MemVT == MVT::i8) {
1155 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1156 } else {
1157 assert(MemVT == MVT::i16);
1158 assert(StoreNode->getAlign() >= 2);
1159 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1160 }
1161
1162 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1163 DAG.getConstant(0x00000003, DL, PtrVT));
1164 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1165 DAG.getConstant(3, DL, VT));
1166
1167 // Put the mask in correct place
1168 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1169
1170 // Put the value bits in correct place
1171 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1172 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1173
1174 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1175 // vector instead.
1176 SDValue Src[4] = {
1177 ShiftedValue,
1178 DAG.getConstant(0, DL, MVT::i32),
1179 DAG.getConstant(0, DL, MVT::i32),
1180 Mask
1181 };
1182 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1183 SDValue Args[3] = { Chain, Input, DWordAddr };
1185 Op->getVTList(), Args, MemVT,
1186 StoreNode->getMemOperand());
1187 }
1188 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1189 // Convert pointer from byte address to dword address.
1190 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1191
1192 if (StoreNode->isIndexed()) {
1193 llvm_unreachable("Indexed stores not supported yet");
1194 } else {
1195 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1196 }
1197 return Chain;
1198 }
1199 }
1200
1201 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1202 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1203 return SDValue();
1204
1205 if (MemVT.bitsLT(MVT::i32))
1206 return lowerPrivateTruncStore(StoreNode, DAG);
1207
1208 // Standard i32+ store, tag it with DWORDADDR to note that the address
1209 // has been shifted
1210 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1211 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1212 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1213 }
1214
1215 // Tagged i32+ stores will be matched by patterns
1216 return SDValue();
1217}
1218
1219// return (512 + (kc_bank << 12)
1220static int
1222 switch (AddressSpace) {
1224 return 512;
1226 return 512 + 4096;
1228 return 512 + 4096 * 2;
1230 return 512 + 4096 * 3;
1232 return 512 + 4096 * 4;
1234 return 512 + 4096 * 5;
1236 return 512 + 4096 * 6;
1238 return 512 + 4096 * 7;
1240 return 512 + 4096 * 8;
1242 return 512 + 4096 * 9;
1244 return 512 + 4096 * 10;
1246 return 512 + 4096 * 11;
1248 return 512 + 4096 * 12;
1250 return 512 + 4096 * 13;
1252 return 512 + 4096 * 14;
1254 return 512 + 4096 * 15;
1255 default:
1256 return -1;
1257 }
1258}
1259
1260SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1261 SelectionDAG &DAG) const {
1262 SDLoc DL(Op);
1263 LoadSDNode *Load = cast<LoadSDNode>(Op);
1264 ISD::LoadExtType ExtType = Load->getExtensionType();
1265 EVT MemVT = Load->getMemoryVT();
1266 assert(Load->getAlign() >= MemVT.getStoreSize());
1267
1268 SDValue BasePtr = Load->getBasePtr();
1269 SDValue Chain = Load->getChain();
1270 SDValue Offset = Load->getOffset();
1271
1272 SDValue LoadPtr = BasePtr;
1273 if (!Offset.isUndef()) {
1274 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1275 }
1276
1277 // Get dword location
1278 // NOTE: this should be eliminated by the future SHR ptr, 2
1279 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1280 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1281
1282 // Load dword
1283 // TODO: can we be smarter about machine pointer info?
1285 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1286
1287 // Get offset within the register.
1288 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1289 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1290
1291 // Bit offset of target byte (byteIdx * 8).
1292 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1293 DAG.getConstant(3, DL, MVT::i32));
1294
1295 // Shift to the right.
1296 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1297
1298 // Eliminate the upper bits by setting them to ...
1299 EVT MemEltVT = MemVT.getScalarType();
1300
1301 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1302 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1303 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1304 } else { // ... or zeros.
1305 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1306 }
1307
1308 SDValue Ops[] = {
1309 Ret,
1310 Read.getValue(1) // This should be our output chain
1311 };
1312
1313 return DAG.getMergeValues(Ops, DL);
1314}
1315
1316SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1317 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1318 unsigned AS = LoadNode->getAddressSpace();
1319 EVT MemVT = LoadNode->getMemoryVT();
1320 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1321
1322 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1323 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1324 return lowerPrivateExtLoad(Op, DAG);
1325 }
1326
1327 SDLoc DL(Op);
1328 EVT VT = Op.getValueType();
1329 SDValue Chain = LoadNode->getChain();
1330 SDValue Ptr = LoadNode->getBasePtr();
1331
1332 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1334 VT.isVector()) {
1335 SDValue Ops[2];
1336 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1337 return DAG.getMergeValues(Ops, DL);
1338 }
1339
1340 // This is still used for explicit load from addrspace(8)
1341 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1342 if (ConstantBlock > -1 &&
1343 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1344 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1346 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1347 isa<ConstantSDNode>(Ptr)) {
1348 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1349 }
1350 // TODO: Does this even work?
1351 // non-constant ptr can't be folded, keeps it as a v4f32 load
1352 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1353 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1354 DAG.getConstant(4, DL, MVT::i32)),
1355 DAG.getConstant(LoadNode->getAddressSpace() -
1357 DL, MVT::i32));
1358
1359 if (!VT.isVector()) {
1360 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1361 DAG.getConstant(0, DL, MVT::i32));
1362 }
1363
1364 SDValue MergedValues[2] = {
1365 Result,
1366 Chain
1367 };
1368 return DAG.getMergeValues(MergedValues, DL);
1369 }
1370
1371 // For most operations returning SDValue() will result in the node being
1372 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1373 // need to manually expand loads that may be legal in some address spaces and
1374 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1375 // compute shaders, since the data is sign extended when it is uploaded to the
1376 // buffer. However SEXT loads from other address spaces are not supported, so
1377 // we need to expand them here.
1378 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1379 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1380 SDValue NewLoad = DAG.getExtLoad(
1381 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1382 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1383 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1384 DAG.getValueType(MemVT));
1385
1386 SDValue MergedValues[2] = { Res, Chain };
1387 return DAG.getMergeValues(MergedValues, DL);
1388 }
1389
1390 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1391 return SDValue();
1392 }
1393
1394 // DWORDADDR ISD marks already shifted address
1395 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1396 assert(VT == MVT::i32);
1397 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1398 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1399 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1400 }
1401 return SDValue();
1402}
1403
1404SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1405 SDValue Chain = Op.getOperand(0);
1406 SDValue Cond = Op.getOperand(1);
1407 SDValue Jump = Op.getOperand(2);
1408
1409 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1410 Chain, Jump, Cond);
1411}
1412
1413SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1414 SelectionDAG &DAG) const {
1416 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1417
1418 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1419
1420 unsigned FrameIndex = FIN->getIndex();
1421 Register IgnoredFrameReg;
1423 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1424 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1425 SDLoc(Op), Op.getValueType());
1426}
1427
1429 bool IsVarArg) const {
1430 switch (CC) {
1433 case CallingConv::C:
1434 case CallingConv::Fast:
1435 case CallingConv::Cold:
1436 llvm_unreachable("kernels should not be handled here");
1444 return CC_R600;
1445 default:
1446 report_fatal_error("Unsupported calling convention.");
1447 }
1448}
1449
1450/// XXX Only kernel functions are supported, so we can assume for now that
1451/// every function is a kernel function, but in the future we should use
1452/// separate calling conventions for kernel and non-kernel functions.
1454 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1455 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1456 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1458 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1459 *DAG.getContext());
1462
1463 if (AMDGPU::isShader(CallConv)) {
1464 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1465 } else {
1466 analyzeFormalArgumentsCompute(CCInfo, Ins);
1467 }
1468
1469 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1470 CCValAssign &VA = ArgLocs[i];
1471 const ISD::InputArg &In = Ins[i];
1472 EVT VT = In.VT;
1473 EVT MemVT = VA.getLocVT();
1474 if (!VT.isVector() && MemVT.isVector()) {
1475 // Get load source type if scalarized.
1476 MemVT = MemVT.getVectorElementType();
1477 }
1478
1479 if (AMDGPU::isShader(CallConv)) {
1480 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1481 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1482 InVals.push_back(Register);
1483 continue;
1484 }
1485
1486 // i64 isn't a legal type, so the register type used ends up as i32, which
1487 // isn't expected here. It attempts to create this sextload, but it ends up
1488 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1489 // for <1 x i64>.
1490
1491 // The first 36 bytes of the input buffer contains information about
1492 // thread group and global sizes.
1494 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1495 // FIXME: This should really check the extload type, but the handling of
1496 // extload vector parameters seems to be broken.
1497
1498 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1499 Ext = ISD::SEXTLOAD;
1500 }
1501
1502 // Compute the offset from the value.
1503 // XXX - I think PartOffset should give you this, but it seems to give the
1504 // size of the register which isn't useful.
1505
1506 unsigned PartOffset = VA.getLocMemOffset();
1507 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1508
1510 SDValue Arg = DAG.getLoad(
1511 ISD::UNINDEXED, Ext, VT, DL, Chain,
1512 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1513 PtrInfo,
1514 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1517
1518 InVals.push_back(Arg);
1519 }
1520 return Chain;
1521}
1522
1524 EVT VT) const {
1525 if (!VT.isVector())
1526 return MVT::i32;
1528}
1529
1531 const MachineFunction &MF) const {
1532 // Local and Private addresses do not handle vectors. Limit to i32
1534 return (MemVT.getSizeInBits() <= 32);
1535 }
1536 return true;
1537}
1538
1540 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1541 unsigned *IsFast) const {
1542 if (IsFast)
1543 *IsFast = 0;
1544
1545 if (!VT.isSimple() || VT == MVT::Other)
1546 return false;
1547
1548 if (VT.bitsLT(MVT::i32))
1549 return false;
1550
1551 // TODO: This is a rough estimate.
1552 if (IsFast)
1553 *IsFast = 1;
1554
1555 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1556}
1557
1559 SelectionDAG &DAG, SDValue VectorEntry,
1560 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1561 assert(RemapSwizzle.empty());
1562
1563 SDLoc DL(VectorEntry);
1564 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1565
1566 SDValue NewBldVec[4];
1567 for (unsigned i = 0; i < 4; i++)
1568 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1569 DAG.getIntPtrConstant(i, DL));
1570
1571 for (unsigned i = 0; i < 4; i++) {
1572 if (NewBldVec[i].isUndef())
1573 // We mask write here to teach later passes that the ith element of this
1574 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1575 // break false dependencies and additionally make assembly easier to read.
1576 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1577 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1578 if (C->isZero()) {
1579 RemapSwizzle[i] = 4; // SEL_0
1580 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1581 } else if (C->isExactlyValue(1.0)) {
1582 RemapSwizzle[i] = 5; // SEL_1
1583 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1584 }
1585 }
1586
1587 if (NewBldVec[i].isUndef())
1588 continue;
1589
1590 for (unsigned j = 0; j < i; j++) {
1591 if (NewBldVec[i] == NewBldVec[j]) {
1592 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1593 RemapSwizzle[i] = j;
1594 break;
1595 }
1596 }
1597 }
1598
1599 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1600 NewBldVec);
1601}
1602
1604 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1605 assert(RemapSwizzle.empty());
1606
1607 SDLoc DL(VectorEntry);
1608 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1609
1610 SDValue NewBldVec[4];
1611 bool isUnmovable[4] = {false, false, false, false};
1612 for (unsigned i = 0; i < 4; i++)
1613 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1614 DAG.getIntPtrConstant(i, DL));
1615
1616 for (unsigned i = 0; i < 4; i++) {
1617 RemapSwizzle[i] = i;
1618 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1619 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1620 if (i == Idx)
1621 isUnmovable[Idx] = true;
1622 }
1623 }
1624
1625 for (unsigned i = 0; i < 4; i++) {
1626 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1627 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1628 if (isUnmovable[Idx])
1629 continue;
1630 // Swap i and Idx
1631 std::swap(NewBldVec[Idx], NewBldVec[i]);
1632 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1633 break;
1634 }
1635 }
1636
1637 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1638 NewBldVec);
1639}
1640
1641SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1642 SelectionDAG &DAG,
1643 const SDLoc &DL) const {
1644 // Old -> New swizzle values
1645 DenseMap<unsigned, unsigned> SwizzleRemap;
1646
1647 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1648 for (unsigned i = 0; i < 4; i++) {
1649 unsigned Idx = Swz[i]->getAsZExtVal();
1650 if (SwizzleRemap.contains(Idx))
1651 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1652 }
1653
1654 SwizzleRemap.clear();
1655 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1656 for (unsigned i = 0; i < 4; i++) {
1657 unsigned Idx = Swz[i]->getAsZExtVal();
1658 if (SwizzleRemap.contains(Idx))
1659 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1660 }
1661
1662 return BuildVector;
1663}
1664
1665SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1666 SelectionDAG &DAG) const {
1667 SDLoc DL(LoadNode);
1668 EVT VT = LoadNode->getValueType(0);
1669 SDValue Chain = LoadNode->getChain();
1670 SDValue Ptr = LoadNode->getBasePtr();
1671 assert (isa<ConstantSDNode>(Ptr));
1672
1673 //TODO: Support smaller loads
1674 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1675 return SDValue();
1676
1677 if (LoadNode->getAlign() < Align(4))
1678 return SDValue();
1679
1680 int ConstantBlock = ConstantAddressBlock(Block);
1681
1682 SDValue Slots[4];
1683 for (unsigned i = 0; i < 4; i++) {
1684 // We want Const position encoded with the following formula :
1685 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1686 // const_index is Ptr computed by llvm using an alignment of 16.
1687 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1688 // then div by 4 at the ISel step
1689 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1690 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1691 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1692 }
1693 EVT NewVT = MVT::v4i32;
1694 unsigned NumElements = 4;
1695 if (VT.isVector()) {
1696 NewVT = VT;
1697 NumElements = VT.getVectorNumElements();
1698 }
1699 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1700 if (!VT.isVector()) {
1701 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1702 DAG.getConstant(0, DL, MVT::i32));
1703 }
1704 SDValue MergedValues[2] = {
1705 Result,
1706 Chain
1707 };
1708 return DAG.getMergeValues(MergedValues, DL);
1709}
1710
1711//===----------------------------------------------------------------------===//
1712// Custom DAG Optimizations
1713//===----------------------------------------------------------------------===//
1714
1716 DAGCombinerInfo &DCI) const {
1717 SelectionDAG &DAG = DCI.DAG;
1718 SDLoc DL(N);
1719
1720 switch (N->getOpcode()) {
1721 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1722 case ISD::FP_ROUND: {
1723 SDValue Arg = N->getOperand(0);
1724 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1725 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1726 Arg.getOperand(0));
1727 }
1728 break;
1729 }
1730
1731 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1732 // (i32 select_cc f32, f32, -1, 0 cc)
1733 //
1734 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1735 // this to one of the SET*_DX10 instructions.
1736 case ISD::FP_TO_SINT: {
1737 SDValue FNeg = N->getOperand(0);
1738 if (FNeg.getOpcode() != ISD::FNEG) {
1739 return SDValue();
1740 }
1741 SDValue SelectCC = FNeg.getOperand(0);
1742 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1743 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1744 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1745 !isHWTrueValue(SelectCC.getOperand(2)) ||
1746 !isHWFalseValue(SelectCC.getOperand(3))) {
1747 return SDValue();
1748 }
1749
1750 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1751 SelectCC.getOperand(0), // LHS
1752 SelectCC.getOperand(1), // RHS
1753 DAG.getConstant(-1, DL, MVT::i32), // True
1754 DAG.getConstant(0, DL, MVT::i32), // False
1755 SelectCC.getOperand(4)); // CC
1756 }
1757
1758 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1759 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1761 SDValue InVec = N->getOperand(0);
1762 SDValue InVal = N->getOperand(1);
1763 SDValue EltNo = N->getOperand(2);
1764
1765 // If the inserted element is an UNDEF, just use the input vector.
1766 if (InVal.isUndef())
1767 return InVec;
1768
1769 EVT VT = InVec.getValueType();
1770
1771 // If we can't generate a legal BUILD_VECTOR, exit
1773 return SDValue();
1774
1775 // Check that we know which element is being inserted
1776 if (!isa<ConstantSDNode>(EltNo))
1777 return SDValue();
1778 unsigned Elt = EltNo->getAsZExtVal();
1779
1780 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1781 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1782 // vector elements.
1784 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1785 Ops.append(InVec.getNode()->op_begin(),
1786 InVec.getNode()->op_end());
1787 } else if (InVec.isUndef()) {
1788 unsigned NElts = VT.getVectorNumElements();
1789 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1790 } else {
1791 return SDValue();
1792 }
1793
1794 // Insert the element
1795 if (Elt < Ops.size()) {
1796 // All the operands of BUILD_VECTOR must have the same type;
1797 // we enforce that here.
1798 EVT OpVT = Ops[0].getValueType();
1799 if (InVal.getValueType() != OpVT)
1800 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1801 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1802 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1803 Ops[Elt] = InVal;
1804 }
1805
1806 // Return the new vector
1807 return DAG.getBuildVector(VT, DL, Ops);
1808 }
1809
1810 // Extract_vec (Build_vector) generated by custom lowering
1811 // also needs to be customly combined
1813 SDValue Arg = N->getOperand(0);
1814 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1815 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1816 unsigned Element = Const->getZExtValue();
1817 return Arg->getOperand(Element);
1818 }
1819 }
1820 if (Arg.getOpcode() == ISD::BITCAST &&
1824 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1825 unsigned Element = Const->getZExtValue();
1826 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1827 Arg->getOperand(0).getOperand(Element));
1828 }
1829 }
1830 break;
1831 }
1832
1833 case ISD::SELECT_CC: {
1834 // Try common optimizations
1836 return Ret;
1837
1838 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1839 // selectcc x, y, a, b, inv(cc)
1840 //
1841 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1842 // selectcc x, y, a, b, cc
1843 SDValue LHS = N->getOperand(0);
1844 if (LHS.getOpcode() != ISD::SELECT_CC) {
1845 return SDValue();
1846 }
1847
1848 SDValue RHS = N->getOperand(1);
1849 SDValue True = N->getOperand(2);
1850 SDValue False = N->getOperand(3);
1851 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1852
1853 if (LHS.getOperand(2).getNode() != True.getNode() ||
1854 LHS.getOperand(3).getNode() != False.getNode() ||
1855 RHS.getNode() != False.getNode()) {
1856 return SDValue();
1857 }
1858
1859 switch (NCC) {
1860 default: return SDValue();
1861 case ISD::SETNE: return LHS;
1862 case ISD::SETEQ: {
1863 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1864 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1865 if (DCI.isBeforeLegalizeOps() ||
1866 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1867 return DAG.getSelectCC(DL,
1868 LHS.getOperand(0),
1869 LHS.getOperand(1),
1870 LHS.getOperand(2),
1871 LHS.getOperand(3),
1872 LHSCC);
1873 break;
1874 }
1875 }
1876 return SDValue();
1877 }
1878
1880 SDValue Arg = N->getOperand(1);
1881 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1882 break;
1883
1884 SDValue NewArgs[8] = {
1885 N->getOperand(0), // Chain
1886 SDValue(),
1887 N->getOperand(2), // ArrayBase
1888 N->getOperand(3), // Type
1889 N->getOperand(4), // SWZ_X
1890 N->getOperand(5), // SWZ_Y
1891 N->getOperand(6), // SWZ_Z
1892 N->getOperand(7) // SWZ_W
1893 };
1894 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1895 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1896 }
1898 SDValue Arg = N->getOperand(1);
1899 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1900 break;
1901
1902 SDValue NewArgs[19] = {
1903 N->getOperand(0),
1904 N->getOperand(1),
1905 N->getOperand(2),
1906 N->getOperand(3),
1907 N->getOperand(4),
1908 N->getOperand(5),
1909 N->getOperand(6),
1910 N->getOperand(7),
1911 N->getOperand(8),
1912 N->getOperand(9),
1913 N->getOperand(10),
1914 N->getOperand(11),
1915 N->getOperand(12),
1916 N->getOperand(13),
1917 N->getOperand(14),
1918 N->getOperand(15),
1919 N->getOperand(16),
1920 N->getOperand(17),
1921 N->getOperand(18),
1922 };
1923 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1924 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1925 }
1926
1927 case ISD::LOAD: {
1928 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1929 SDValue Ptr = LoadNode->getBasePtr();
1930 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1931 isa<ConstantSDNode>(Ptr))
1932 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1933 break;
1934 }
1935
1936 default: break;
1937 }
1938
1940}
1941
1942bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1943 SDValue &Src, SDValue &Neg, SDValue &Abs,
1944 SDValue &Sel, SDValue &Imm,
1945 SelectionDAG &DAG) const {
1946 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1947 if (!Src.isMachineOpcode())
1948 return false;
1949
1950 switch (Src.getMachineOpcode()) {
1951 case R600::FNEG_R600:
1952 if (!Neg.getNode())
1953 return false;
1954 Src = Src.getOperand(0);
1955 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1956 return true;
1957 case R600::FABS_R600:
1958 if (!Abs.getNode())
1959 return false;
1960 Src = Src.getOperand(0);
1961 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1962 return true;
1963 case R600::CONST_COPY: {
1964 unsigned Opcode = ParentNode->getMachineOpcode();
1965 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1966
1967 if (!Sel.getNode())
1968 return false;
1969
1970 SDValue CstOffset = Src.getOperand(0);
1971 if (ParentNode->getValueType(0).isVector())
1972 return false;
1973
1974 // Gather constants values
1975 int SrcIndices[] = {
1976 TII->getOperandIdx(Opcode, R600::OpName::src0),
1977 TII->getOperandIdx(Opcode, R600::OpName::src1),
1978 TII->getOperandIdx(Opcode, R600::OpName::src2),
1979 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1980 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1981 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1982 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1983 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1984 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1985 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1986 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1987 };
1988 std::vector<unsigned> Consts;
1989 for (int OtherSrcIdx : SrcIndices) {
1990 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1991 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1992 continue;
1993 if (HasDst) {
1994 OtherSrcIdx--;
1995 OtherSelIdx--;
1996 }
1997 if (RegisterSDNode *Reg =
1998 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
1999 if (Reg->getReg() == R600::ALU_CONST) {
2000 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2001 }
2002 }
2003 }
2004
2005 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2006 Consts.push_back(Cst->getZExtValue());
2007 if (!TII->fitsConstReadLimitations(Consts)) {
2008 return false;
2009 }
2010
2011 Sel = CstOffset;
2012 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2013 return true;
2014 }
2015 case R600::MOV_IMM_GLOBAL_ADDR:
2016 // Check if the Imm slot is used. Taken from below.
2017 if (Imm->getAsZExtVal())
2018 return false;
2019 Imm = Src.getOperand(0);
2020 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2021 return true;
2022 case R600::MOV_IMM_I32:
2023 case R600::MOV_IMM_F32: {
2024 unsigned ImmReg = R600::ALU_LITERAL_X;
2025 uint64_t ImmValue = 0;
2026
2027 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2028 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2029 float FloatValue = FPC->getValueAPF().convertToFloat();
2030 if (FloatValue == 0.0) {
2031 ImmReg = R600::ZERO;
2032 } else if (FloatValue == 0.5) {
2033 ImmReg = R600::HALF;
2034 } else if (FloatValue == 1.0) {
2035 ImmReg = R600::ONE;
2036 } else {
2037 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2038 }
2039 } else {
2040 uint64_t Value = Src.getConstantOperandVal(0);
2041 if (Value == 0) {
2042 ImmReg = R600::ZERO;
2043 } else if (Value == 1) {
2044 ImmReg = R600::ONE_INT;
2045 } else {
2046 ImmValue = Value;
2047 }
2048 }
2049
2050 // Check that we aren't already using an immediate.
2051 // XXX: It's possible for an instruction to have more than one
2052 // immediate operand, but this is not supported yet.
2053 if (ImmReg == R600::ALU_LITERAL_X) {
2054 if (!Imm.getNode())
2055 return false;
2056 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2057 if (C->getZExtValue())
2058 return false;
2059 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2060 }
2061 Src = DAG.getRegister(ImmReg, MVT::i32);
2062 return true;
2063 }
2064 default:
2065 return false;
2066 }
2067}
2068
2069/// Fold the instructions after selecting them
2070SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2071 SelectionDAG &DAG) const {
2072 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2073 if (!Node->isMachineOpcode())
2074 return Node;
2075
2076 unsigned Opcode = Node->getMachineOpcode();
2077 SDValue FakeOp;
2078
2079 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2080
2081 if (Opcode == R600::DOT_4) {
2082 int OperandIdx[] = {
2083 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2084 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2085 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2086 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2087 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2088 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2089 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2090 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2091 };
2092 int NegIdx[] = {
2093 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2094 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2095 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2097 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2098 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2099 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2101 };
2102 int AbsIdx[] = {
2103 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2104 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2105 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2107 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2108 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2109 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2111 };
2112 for (unsigned i = 0; i < 8; i++) {
2113 if (OperandIdx[i] < 0)
2114 return Node;
2115 SDValue &Src = Ops[OperandIdx[i] - 1];
2116 SDValue &Neg = Ops[NegIdx[i] - 1];
2117 SDValue &Abs = Ops[AbsIdx[i] - 1];
2118 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2119 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2120 if (HasDst)
2121 SelIdx--;
2122 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2123 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2124 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2125 }
2126 } else if (Opcode == R600::REG_SEQUENCE) {
2127 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2128 SDValue &Src = Ops[i];
2129 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2130 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2131 }
2132 } else {
2133 if (!TII->hasInstrModifiers(Opcode))
2134 return Node;
2135 int OperandIdx[] = {
2136 TII->getOperandIdx(Opcode, R600::OpName::src0),
2137 TII->getOperandIdx(Opcode, R600::OpName::src1),
2138 TII->getOperandIdx(Opcode, R600::OpName::src2)
2139 };
2140 int NegIdx[] = {
2141 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2142 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2143 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2144 };
2145 int AbsIdx[] = {
2146 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2147 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2148 -1
2149 };
2150 for (unsigned i = 0; i < 3; i++) {
2151 if (OperandIdx[i] < 0)
2152 return Node;
2153 SDValue &Src = Ops[OperandIdx[i] - 1];
2154 SDValue &Neg = Ops[NegIdx[i] - 1];
2155 SDValue FakeAbs;
2156 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2157 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2158 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2159 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2160 if (HasDst) {
2161 SelIdx--;
2162 ImmIdx--;
2163 }
2164 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2165 SDValue &Imm = Ops[ImmIdx];
2166 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2167 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2168 }
2169 }
2170
2171 return Node;
2172}
2173
2175R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2176 switch (RMW->getOperation()) {
2185 // FIXME: Cayman at least appears to have instructions for this, but the
2186 // instruction defintions appear to be missing.
2188 case AtomicRMWInst::Xchg: {
2189 const DataLayout &DL = RMW->getFunction()->getDataLayout();
2190 unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
2191 if (ValSize == 32 || ValSize == 64)
2194 }
2195 default:
2196 if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
2197 unsigned Size = IntTy->getBitWidth();
2198 if (Size == 32 || Size == 64)
2200 }
2201
2203 }
2204
2205 llvm_unreachable("covered atomicrmw op switch");
2206}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define MO_FLAG_NEG
Definition: R600Defines.h:15
#define MO_FLAG_ABS
Definition: R600Defines.h:16
#define MO_FLAG_MASK
Definition: R600Defines.h:17
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:5424
APInt bitcastToAPInt() const
Definition: APFloat.h:1266
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1498
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:696
@ FAdd
*p = old + v
Definition: Instructions.h:733
@ FSub
*p = old - v
Definition: Instructions.h:736
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:748
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:744
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:740
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:752
@ Nand
*p = ~(old & v)
Definition: Instructions.h:718
BinOp getOperation() const
Definition: Instructions.h:787
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1800
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
bool empty() const
Definition: DenseMap.h:98
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:146
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:384
unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:70
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
bool hasFFBH() const
bool hasFMA() const
bool hasBFI() const
Definition: R600Subtarget.h:88
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:52
bool hasCARRY() const
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:60
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:50
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:92
bool hasBORROW() const
Definition: R600Subtarget.h:99
bool hasFFBL() const
bool hasBFE() const
Definition: R600Subtarget.h:84
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:226
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:736
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:842
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:487
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:690
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:482
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:813
SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
Definition: SelectionDAG.h:500
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:570
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const TargetMachine & getTargetMachine() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isShader(CallingConv::ID cc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:779
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:752
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1309
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:501
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1099
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:497
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:953
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:751
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:980
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1145
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:756
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1305
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:734
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:771
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:848
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:886
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:708
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:538
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:919
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:957
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1138
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:793
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:507
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:529
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1603
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1583
int getLDSNoRetOp(uint16_t Opcode)
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr float pif
Definition: MathExtras.h:68
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
@ Read
Definition: CodeGenData.h:102
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
DWARFExpression::Operation Op
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:381
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:275
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:291
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:359
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:371
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:307
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:314
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:283
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:204
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:319
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:327
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:299
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...