LLVM 20.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
17#include "R600Defines.h"
19#include "R600Subtarget.h"
20#include "R600TargetMachine.h"
22#include "llvm/IR/IntrinsicsAMDGPU.h"
23#include "llvm/IR/IntrinsicsR600.h"
24
25using namespace llvm;
26
27#include "R600GenCallingConv.inc"
28
30 const R600Subtarget &STI)
31 : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
32 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
33 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
34 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
35 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
36 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
37 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
38
41
43
44 // Legalize loads and stores to the private address space.
45 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
46
47 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
48 // spaces, so it is custom lowered to handle those where it isn't.
50 for (MVT VT : MVT::integer_valuetypes()) {
51 setLoadExtAction(Op, VT, MVT::i1, Promote);
52 setLoadExtAction(Op, VT, MVT::i8, Custom);
53 setLoadExtAction(Op, VT, MVT::i16, Custom);
54 }
55
56 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
58 MVT::v2i1, Expand);
59
61 MVT::v4i1, Expand);
62
63 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
64 Custom);
65
66 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
67 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
68 // We need to include these since trunc STORES to PRIVATE need
69 // special handling to accommodate RMW
70 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
71 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
72 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
73 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
74 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
75 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
76 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
77 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
78 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
79 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
80
81 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
82 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
83 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
84
85 // Set condition code actions
89 MVT::f32, Expand);
90
92 MVT::i32, Expand);
93
95
96 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
97
98 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
100
102
104 MVT::f64, Custom);
105
106 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
107
108 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
109 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
110 Custom);
111
112 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
113 Expand);
114
115 // ADD, SUB overflow.
116 // TODO: turn these into Legal?
117 if (Subtarget->hasCARRY())
119
120 if (Subtarget->hasBORROW())
122
123 // Expand sign extension of vectors
124 if (!Subtarget->hasBFE())
126
127 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
128
129 if (!Subtarget->hasBFE())
131 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
132
133 if (!Subtarget->hasBFE())
135 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
136
138 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
139
141
143
145 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
146
148 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
149
150 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
151 // to be Legal/Custom in order to avoid library calls.
153 Custom);
154
155 if (!Subtarget->hasFMA())
156 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
157
158 // FIXME: May need no denormals check
160
161 if (!Subtarget->hasBFI())
162 // fcopysign can be done in a single instruction with BFI.
163 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
164
165 if (!Subtarget->hasBCNT(32))
167
168 if (!Subtarget->hasBCNT(64))
170
171 if (Subtarget->hasFFBH())
173
174 if (Subtarget->hasFFBL())
176
177 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
178 // need it for R600.
179 if (Subtarget->hasBFE())
181
184
185 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
186 for (MVT VT : ScalarIntVTs)
188 Expand);
189
190 // LLVM will expand these to atomic_cmp_swap(0)
191 // and atomic_swap, respectively.
193
194 // We need to custom lower some of the intrinsics
196 Custom);
197
199
202}
203
205 if (std::next(I) == I->getParent()->end())
206 return false;
207 return std::next(I)->getOpcode() == R600::RETURN;
208}
209
212 MachineBasicBlock *BB) const {
213 MachineFunction *MF = BB->getParent();
216 const R600InstrInfo *TII = Subtarget->getInstrInfo();
217
218 switch (MI.getOpcode()) {
219 default:
220 // Replace LDS_*_RET instruction that don't have any uses with the
221 // equivalent LDS_*_NORET instruction.
222 if (TII->isLDSRetInstr(MI.getOpcode())) {
223 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
224 assert(DstIdx != -1);
226 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
227 // LDS_1A2D support and remove this special case.
228 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
229 MI.getOpcode() == R600::LDS_CMPST_RET)
230 return BB;
231
232 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
233 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
234 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
235 NewMI.add(MO);
236 } else {
238 }
239 break;
240
241 case R600::FABS_R600: {
242 MachineInstr *NewMI = TII->buildDefaultInstruction(
243 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
244 MI.getOperand(1).getReg());
245 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
246 break;
247 }
248
249 case R600::FNEG_R600: {
250 MachineInstr *NewMI = TII->buildDefaultInstruction(
251 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
252 MI.getOperand(1).getReg());
253 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
254 break;
255 }
256
257 case R600::MASK_WRITE: {
258 Register maskedRegister = MI.getOperand(0).getReg();
259 assert(maskedRegister.isVirtual());
260 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
261 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
262 break;
263 }
264
265 case R600::MOV_IMM_F32:
266 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
267 .getFPImm()
268 ->getValueAPF()
269 .bitcastToAPInt()
270 .getZExtValue());
271 break;
272
273 case R600::MOV_IMM_I32:
274 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
275 MI.getOperand(1).getImm());
276 break;
277
278 case R600::MOV_IMM_GLOBAL_ADDR: {
279 //TODO: Perhaps combine this instruction with the next if possible
280 auto MIB = TII->buildDefaultInstruction(
281 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
282 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
283 //TODO: Ugh this is rather ugly
284 const MachineOperand &MO = MI.getOperand(1);
285 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
286 MO.getTargetFlags());
287 break;
288 }
289
290 case R600::CONST_COPY: {
291 MachineInstr *NewMI = TII->buildDefaultInstruction(
292 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
293 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
294 MI.getOperand(1).getImm());
295 break;
296 }
297
298 case R600::RAT_WRITE_CACHELESS_32_eg:
299 case R600::RAT_WRITE_CACHELESS_64_eg:
300 case R600::RAT_WRITE_CACHELESS_128_eg:
301 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
302 .add(MI.getOperand(0))
303 .add(MI.getOperand(1))
304 .addImm(isEOP(I)); // Set End of program bit
305 break;
306
307 case R600::RAT_STORE_TYPED_eg:
308 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
309 .add(MI.getOperand(0))
310 .add(MI.getOperand(1))
311 .add(MI.getOperand(2))
312 .addImm(isEOP(I)); // Set End of program bit
313 break;
314
315 case R600::BRANCH:
316 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
317 .add(MI.getOperand(0));
318 break;
319
320 case R600::BRANCH_COND_f32: {
321 MachineInstr *NewMI =
322 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
323 R600::PREDICATE_BIT)
324 .add(MI.getOperand(1))
325 .addImm(R600::PRED_SETNE)
326 .addImm(0); // Flags
327 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
328 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
329 .add(MI.getOperand(0))
330 .addReg(R600::PREDICATE_BIT, RegState::Kill);
331 break;
332 }
333
334 case R600::BRANCH_COND_i32: {
335 MachineInstr *NewMI =
336 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
337 R600::PREDICATE_BIT)
338 .add(MI.getOperand(1))
339 .addImm(R600::PRED_SETNE_INT)
340 .addImm(0); // Flags
341 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
342 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
343 .add(MI.getOperand(0))
344 .addReg(R600::PREDICATE_BIT, RegState::Kill);
345 break;
346 }
347
348 case R600::EG_ExportSwz:
349 case R600::R600_ExportSwz: {
350 // Instruction is left unmodified if its not the last one of its type
351 bool isLastInstructionOfItsType = true;
352 unsigned InstExportType = MI.getOperand(1).getImm();
353 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
354 EndBlock = BB->end(); NextExportInst != EndBlock;
355 NextExportInst = std::next(NextExportInst)) {
356 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
357 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
358 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
359 .getImm();
360 if (CurrentInstExportType == InstExportType) {
361 isLastInstructionOfItsType = false;
362 break;
363 }
364 }
365 }
366 bool EOP = isEOP(I);
367 if (!EOP && !isLastInstructionOfItsType)
368 return BB;
369 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
370 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
371 .add(MI.getOperand(0))
372 .add(MI.getOperand(1))
373 .add(MI.getOperand(2))
374 .add(MI.getOperand(3))
375 .add(MI.getOperand(4))
376 .add(MI.getOperand(5))
377 .add(MI.getOperand(6))
378 .addImm(CfInst)
379 .addImm(EOP);
380 break;
381 }
382 case R600::RETURN: {
383 return BB;
384 }
385 }
386
387 MI.eraseFromParent();
388 return BB;
389}
390
391//===----------------------------------------------------------------------===//
392// Custom DAG Lowering Operations
393//===----------------------------------------------------------------------===//
394
398 switch (Op.getOpcode()) {
399 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
400 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
401 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
402 case ISD::SHL_PARTS:
403 case ISD::SRA_PARTS:
404 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
405 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
406 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
407 case ISD::FCOS:
408 case ISD::FSIN: return LowerTrig(Op, DAG);
409 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
410 case ISD::STORE: return LowerSTORE(Op, DAG);
411 case ISD::LOAD: {
412 SDValue Result = LowerLOAD(Op, DAG);
413 assert((!Result.getNode() ||
414 Result.getNode()->getNumValues() == 2) &&
415 "Load should return a value and a chain");
416 return Result;
417 }
418
419 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
420 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
421 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
423 return lowerADDRSPACECAST(Op, DAG);
424 case ISD::INTRINSIC_VOID: {
425 SDValue Chain = Op.getOperand(0);
426 unsigned IntrinsicID = Op.getConstantOperandVal(1);
427 switch (IntrinsicID) {
428 case Intrinsic::r600_store_swizzle: {
429 SDLoc DL(Op);
430 const SDValue Args[8] = {
431 Chain,
432 Op.getOperand(2), // Export Value
433 Op.getOperand(3), // ArrayBase
434 Op.getOperand(4), // Type
435 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
436 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
437 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
438 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
439 };
440 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
441 }
442
443 // default for switch(IntrinsicID)
444 default: break;
445 }
446 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
447 break;
448 }
450 unsigned IntrinsicID = Op.getConstantOperandVal(0);
451 EVT VT = Op.getValueType();
452 SDLoc DL(Op);
453 switch (IntrinsicID) {
454 case Intrinsic::r600_tex:
455 case Intrinsic::r600_texc: {
456 unsigned TextureOp;
457 switch (IntrinsicID) {
458 case Intrinsic::r600_tex:
459 TextureOp = 0;
460 break;
461 case Intrinsic::r600_texc:
462 TextureOp = 1;
463 break;
464 default:
465 llvm_unreachable("unhandled texture operation");
466 }
467
468 SDValue TexArgs[19] = {
469 DAG.getConstant(TextureOp, DL, MVT::i32),
470 Op.getOperand(1),
471 DAG.getConstant(0, DL, MVT::i32),
472 DAG.getConstant(1, DL, MVT::i32),
473 DAG.getConstant(2, DL, MVT::i32),
474 DAG.getConstant(3, DL, MVT::i32),
475 Op.getOperand(2),
476 Op.getOperand(3),
477 Op.getOperand(4),
478 DAG.getConstant(0, DL, MVT::i32),
479 DAG.getConstant(1, DL, MVT::i32),
480 DAG.getConstant(2, DL, MVT::i32),
481 DAG.getConstant(3, DL, MVT::i32),
482 Op.getOperand(5),
483 Op.getOperand(6),
484 Op.getOperand(7),
485 Op.getOperand(8),
486 Op.getOperand(9),
487 Op.getOperand(10)
488 };
489 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
490 }
491 case Intrinsic::r600_dot4: {
492 SDValue Args[8] = {
493 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
494 DAG.getConstant(0, DL, MVT::i32)),
495 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
496 DAG.getConstant(0, DL, MVT::i32)),
497 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
498 DAG.getConstant(1, DL, MVT::i32)),
499 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
500 DAG.getConstant(1, DL, MVT::i32)),
501 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
502 DAG.getConstant(2, DL, MVT::i32)),
503 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
504 DAG.getConstant(2, DL, MVT::i32)),
505 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
506 DAG.getConstant(3, DL, MVT::i32)),
507 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
508 DAG.getConstant(3, DL, MVT::i32))
509 };
510 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
511 }
512
513 case Intrinsic::r600_implicitarg_ptr: {
516 return DAG.getConstant(ByteOffset, DL, PtrVT);
517 }
518 case Intrinsic::r600_read_ngroups_x:
519 return LowerImplicitParameter(DAG, VT, DL, 0);
520 case Intrinsic::r600_read_ngroups_y:
521 return LowerImplicitParameter(DAG, VT, DL, 1);
522 case Intrinsic::r600_read_ngroups_z:
523 return LowerImplicitParameter(DAG, VT, DL, 2);
524 case Intrinsic::r600_read_global_size_x:
525 return LowerImplicitParameter(DAG, VT, DL, 3);
526 case Intrinsic::r600_read_global_size_y:
527 return LowerImplicitParameter(DAG, VT, DL, 4);
528 case Intrinsic::r600_read_global_size_z:
529 return LowerImplicitParameter(DAG, VT, DL, 5);
530 case Intrinsic::r600_read_local_size_x:
531 return LowerImplicitParameter(DAG, VT, DL, 6);
532 case Intrinsic::r600_read_local_size_y:
533 return LowerImplicitParameter(DAG, VT, DL, 7);
534 case Intrinsic::r600_read_local_size_z:
535 return LowerImplicitParameter(DAG, VT, DL, 8);
536
537 case Intrinsic::r600_read_tgid_x:
538 case Intrinsic::amdgcn_workgroup_id_x:
539 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
540 R600::T1_X, VT);
541 case Intrinsic::r600_read_tgid_y:
542 case Intrinsic::amdgcn_workgroup_id_y:
543 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
544 R600::T1_Y, VT);
545 case Intrinsic::r600_read_tgid_z:
546 case Intrinsic::amdgcn_workgroup_id_z:
547 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
548 R600::T1_Z, VT);
549 case Intrinsic::r600_read_tidig_x:
550 case Intrinsic::amdgcn_workitem_id_x:
551 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
552 R600::T0_X, VT);
553 case Intrinsic::r600_read_tidig_y:
554 case Intrinsic::amdgcn_workitem_id_y:
555 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
556 R600::T0_Y, VT);
557 case Intrinsic::r600_read_tidig_z:
558 case Intrinsic::amdgcn_workitem_id_z:
559 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
560 R600::T0_Z, VT);
561
562 case Intrinsic::r600_recipsqrt_ieee:
563 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
564
565 case Intrinsic::r600_recipsqrt_clamped:
566 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
567 default:
568 return Op;
569 }
570
571 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
572 break;
573 }
574 } // end switch(Op.getOpcode())
575 return SDValue();
576}
577
580 SelectionDAG &DAG) const {
581 switch (N->getOpcode()) {
582 default:
584 return;
585 case ISD::FP_TO_UINT:
586 if (N->getValueType(0) == MVT::i1) {
587 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
588 return;
589 }
590 // Since we don't care about out of bounds values we can use FP_TO_SINT for
591 // uints too. The DAGLegalizer code for uint considers some extra cases
592 // which are not necessary here.
593 [[fallthrough]];
594 case ISD::FP_TO_SINT: {
595 if (N->getValueType(0) == MVT::i1) {
596 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
597 return;
598 }
599
600 SDValue Result;
601 if (expandFP_TO_SINT(N, Result, DAG))
602 Results.push_back(Result);
603 return;
604 }
605 case ISD::SDIVREM: {
606 SDValue Op = SDValue(N, 1);
607 SDValue RES = LowerSDIVREM(Op, DAG);
608 Results.push_back(RES);
609 Results.push_back(RES.getValue(1));
610 break;
611 }
612 case ISD::UDIVREM: {
613 SDValue Op = SDValue(N, 0);
615 break;
616 }
617 }
618}
619
620SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
621 SDValue Vector) const {
622 SDLoc DL(Vector);
623 EVT VecVT = Vector.getValueType();
624 EVT EltVT = VecVT.getVectorElementType();
626
627 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
628 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
629 DAG.getVectorIdxConstant(i, DL)));
630 }
631
632 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
633}
634
635SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
636 SelectionDAG &DAG) const {
637 SDLoc DL(Op);
638 SDValue Vector = Op.getOperand(0);
639 SDValue Index = Op.getOperand(1);
640
641 if (isa<ConstantSDNode>(Index) ||
643 return Op;
644
645 Vector = vectorToVerticalVector(DAG, Vector);
646 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
647 Vector, Index);
648}
649
650SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
651 SelectionDAG &DAG) const {
652 SDLoc DL(Op);
653 SDValue Vector = Op.getOperand(0);
654 SDValue Value = Op.getOperand(1);
655 SDValue Index = Op.getOperand(2);
656
657 if (isa<ConstantSDNode>(Index) ||
659 return Op;
660
661 Vector = vectorToVerticalVector(DAG, Vector);
662 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
663 Vector, Value, Index);
664 return vectorToVerticalVector(DAG, Insert);
665}
666
667SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
668 SDValue Op,
669 SelectionDAG &DAG) const {
670 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
673
674 const DataLayout &DL = DAG.getDataLayout();
675 const GlobalValue *GV = GSD->getGlobal();
677
678 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
679 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
680}
681
682SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
683 // On hw >= R700, COS/SIN input must be between -1. and 1.
684 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
685 EVT VT = Op.getValueType();
686 SDValue Arg = Op.getOperand(0);
687 SDLoc DL(Op);
688
689 // TODO: Should this propagate fast-math-flags?
690 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
691 DAG.getNode(ISD::FADD, DL, VT,
692 DAG.getNode(ISD::FMUL, DL, VT, Arg,
693 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
694 DAG.getConstantFP(0.5, DL, MVT::f32)));
695 unsigned TrigNode;
696 switch (Op.getOpcode()) {
697 case ISD::FCOS:
698 TrigNode = AMDGPUISD::COS_HW;
699 break;
700 case ISD::FSIN:
701 TrigNode = AMDGPUISD::SIN_HW;
702 break;
703 default:
704 llvm_unreachable("Wrong trig opcode");
705 }
706 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
707 DAG.getNode(ISD::FADD, DL, VT, FractPart,
708 DAG.getConstantFP(-0.5, DL, MVT::f32)));
709 if (Gen >= AMDGPUSubtarget::R700)
710 return TrigVal;
711 // On R600 hw, COS/SIN input must be between -Pi and Pi.
712 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
713 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
714}
715
716SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
717 SelectionDAG &DAG) const {
718 SDValue Lo, Hi;
719 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
720 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
721}
722
723SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
724 unsigned mainop, unsigned ovf) const {
725 SDLoc DL(Op);
726 EVT VT = Op.getValueType();
727
728 SDValue Lo = Op.getOperand(0);
729 SDValue Hi = Op.getOperand(1);
730
731 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
732 // Extend sign.
733 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
734 DAG.getValueType(MVT::i1));
735
736 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
737
738 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
739}
740
741SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
742 SDLoc DL(Op);
743 return DAG.getNode(
745 DL,
746 MVT::i1,
747 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
749}
750
751SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
752 SDLoc DL(Op);
753 return DAG.getNode(
755 DL,
756 MVT::i1,
757 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
759}
760
761SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
762 const SDLoc &DL,
763 unsigned DwordOffset) const {
764 unsigned ByteOffset = DwordOffset * 4;
767
768 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
769 assert(isInt<16>(ByteOffset));
770
771 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
772 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
774}
775
776bool R600TargetLowering::isZero(SDValue Op) const {
777 if (ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op))
778 return Cst->isZero();
779 if (ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op))
780 return CstFP->isZero();
781 return false;
782}
783
784bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
785 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
786 return CFP->isExactlyValue(1.0);
787 }
788 return isAllOnesConstant(Op);
789}
790
791bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
792 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
793 return CFP->getValueAPF().isZero();
794 }
795 return isNullConstant(Op);
796}
797
798SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
799 SDLoc DL(Op);
800 EVT VT = Op.getValueType();
801
802 SDValue LHS = Op.getOperand(0);
803 SDValue RHS = Op.getOperand(1);
804 SDValue True = Op.getOperand(2);
805 SDValue False = Op.getOperand(3);
806 SDValue CC = Op.getOperand(4);
807 SDValue Temp;
808
809 if (VT == MVT::f32) {
810 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
811 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
812 if (MinMax)
813 return MinMax;
814 }
815
816 // LHS and RHS are guaranteed to be the same value type
817 EVT CompareVT = LHS.getValueType();
818
819 // Check if we can lower this to a native operation.
820
821 // Try to lower to a SET* instruction:
822 //
823 // SET* can match the following patterns:
824 //
825 // select_cc f32, f32, -1, 0, cc_supported
826 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
827 // select_cc i32, i32, -1, 0, cc_supported
828 //
829
830 // Move hardware True/False values to the correct operand.
831 if (isHWTrueValue(False) && isHWFalseValue(True)) {
832 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
833 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
834 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
835 std::swap(False, True);
836 CC = DAG.getCondCode(InverseCC);
837 } else {
838 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
839 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
840 std::swap(False, True);
841 std::swap(LHS, RHS);
842 CC = DAG.getCondCode(SwapInvCC);
843 }
844 }
845 }
846
847 if (isHWTrueValue(True) && isHWFalseValue(False) &&
848 (CompareVT == VT || VT == MVT::i32)) {
849 // This can be matched by a SET* instruction.
850 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
851 }
852
853 // Try to lower to a CND* instruction:
854 //
855 // CND* can match the following patterns:
856 //
857 // select_cc f32, 0.0, f32, f32, cc_supported
858 // select_cc f32, 0.0, i32, i32, cc_supported
859 // select_cc i32, 0, f32, f32, cc_supported
860 // select_cc i32, 0, i32, i32, cc_supported
861 //
862
863 // Try to move the zero value to the RHS
864 if (isZero(LHS)) {
865 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
866 // Try swapping the operands
867 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
868 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
869 std::swap(LHS, RHS);
870 CC = DAG.getCondCode(CCSwapped);
871 } else {
872 // Try inverting the condition and then swapping the operands
873 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
874 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
875 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
876 std::swap(True, False);
877 std::swap(LHS, RHS);
878 CC = DAG.getCondCode(CCSwapped);
879 }
880 }
881 }
882 if (isZero(RHS)) {
883 SDValue Cond = LHS;
884 SDValue Zero = RHS;
885 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
886 if (CompareVT != VT) {
887 // Bitcast True / False to the correct types. This will end up being
888 // a nop, but it allows us to define only a single pattern in the
889 // .TD files for each CND* instruction rather than having to have
890 // one pattern for integer True/False and one for fp True/False
891 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
892 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
893 }
894
895 switch (CCOpcode) {
896 case ISD::SETONE:
897 case ISD::SETUNE:
898 case ISD::SETNE:
899 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
900 Temp = True;
901 True = False;
902 False = Temp;
903 break;
904 default:
905 break;
906 }
907 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
908 Cond, Zero,
909 True, False,
910 DAG.getCondCode(CCOpcode));
911 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
912 }
913
914 // If we make it this for it means we have no native instructions to handle
915 // this SELECT_CC, so we must lower it.
916 SDValue HWTrue, HWFalse;
917
918 if (CompareVT == MVT::f32) {
919 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
920 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
921 } else if (CompareVT == MVT::i32) {
922 HWTrue = DAG.getAllOnesConstant(DL, CompareVT);
923 HWFalse = DAG.getConstant(0, DL, CompareVT);
924 }
925 else {
926 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
927 }
928
929 // Lower this unsupported SELECT_CC into a combination of two supported
930 // SELECT_CC operations.
931 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
932
933 return DAG.getNode(ISD::SELECT_CC, DL, VT,
934 Cond, HWFalse,
935 True, False,
937}
938
939SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
940 SelectionDAG &DAG) const {
941 SDLoc SL(Op);
942 EVT VT = Op.getValueType();
943
944 const R600TargetMachine &TM =
945 static_cast<const R600TargetMachine &>(getTargetMachine());
946
947 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
948 unsigned SrcAS = ASC->getSrcAddressSpace();
949 unsigned DestAS = ASC->getDestAddressSpace();
950
951 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
952 return DAG.getSignedConstant(TM.getNullPointerValue(DestAS), SL, VT);
953
954 return Op;
955}
956
957/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
958/// convert these pointers to a register index. Each register holds
959/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
960/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
961/// for indirect addressing.
962SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
963 unsigned StackWidth,
964 SelectionDAG &DAG) const {
965 unsigned SRLPad;
966 switch(StackWidth) {
967 case 1:
968 SRLPad = 2;
969 break;
970 case 2:
971 SRLPad = 3;
972 break;
973 case 4:
974 SRLPad = 4;
975 break;
976 default: llvm_unreachable("Invalid stack width");
977 }
978
979 SDLoc DL(Ptr);
980 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
981 DAG.getConstant(SRLPad, DL, MVT::i32));
982}
983
984void R600TargetLowering::getStackAddress(unsigned StackWidth,
985 unsigned ElemIdx,
986 unsigned &Channel,
987 unsigned &PtrIncr) const {
988 switch (StackWidth) {
989 default:
990 case 1:
991 Channel = 0;
992 if (ElemIdx > 0) {
993 PtrIncr = 1;
994 } else {
995 PtrIncr = 0;
996 }
997 break;
998 case 2:
999 Channel = ElemIdx % 2;
1000 if (ElemIdx == 2) {
1001 PtrIncr = 1;
1002 } else {
1003 PtrIncr = 0;
1004 }
1005 break;
1006 case 4:
1007 Channel = ElemIdx;
1008 PtrIncr = 0;
1009 break;
1010 }
1011}
1012
1013SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1014 SelectionDAG &DAG) const {
1015 SDLoc DL(Store);
1016 //TODO: Who creates the i8 stores?
1017 assert(Store->isTruncatingStore()
1018 || Store->getValue().getValueType() == MVT::i8);
1019 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1020
1021 SDValue Mask;
1022 if (Store->getMemoryVT() == MVT::i8) {
1023 assert(Store->getAlign() >= 1);
1024 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1025 } else if (Store->getMemoryVT() == MVT::i16) {
1026 assert(Store->getAlign() >= 2);
1027 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1028 } else {
1029 llvm_unreachable("Unsupported private trunc store");
1030 }
1031
1032 SDValue OldChain = Store->getChain();
1033 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1034 // Skip dummy
1035 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1036 SDValue BasePtr = Store->getBasePtr();
1037 SDValue Offset = Store->getOffset();
1038 EVT MemVT = Store->getMemoryVT();
1039
1040 SDValue LoadPtr = BasePtr;
1041 if (!Offset.isUndef()) {
1042 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1043 }
1044
1045 // Get dword location
1046 // TODO: this should be eliminated by the future SHR ptr, 2
1047 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1048 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1049
1050 // Load dword
1051 // TODO: can we be smarter about machine pointer info?
1053 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1054
1055 Chain = Dst.getValue(1);
1056
1057 // Get offset in dword
1058 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1059 DAG.getConstant(0x3, DL, MVT::i32));
1060
1061 // Convert byte offset to bit shift
1062 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1063 DAG.getConstant(3, DL, MVT::i32));
1064
1065 // TODO: Contrary to the name of the function,
1066 // it also handles sub i32 non-truncating stores (like i1)
1067 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1068 Store->getValue());
1069
1070 // Mask the value to the right type
1071 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1072
1073 // Shift the value in place
1074 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1075 MaskedValue, ShiftAmt);
1076
1077 // Shift the mask in place
1078 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1079
1080 // Invert the mask. NOTE: if we had native ROL instructions we could
1081 // use inverted mask
1082 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1083
1084 // Cleanup the target bits
1085 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1086
1087 // Add the new bits
1088 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1089
1090 // Store dword
1091 // TODO: Can we be smarter about MachinePointerInfo?
1092 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1093
1094 // If we are part of expanded vector, make our neighbors depend on this store
1095 if (VectorTrunc) {
1096 // Make all other vector elements depend on this store
1097 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1098 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1099 }
1100 return NewStore;
1101}
1102
1103SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1104 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1105 unsigned AS = StoreNode->getAddressSpace();
1106
1107 SDValue Chain = StoreNode->getChain();
1108 SDValue Ptr = StoreNode->getBasePtr();
1109 SDValue Value = StoreNode->getValue();
1110
1111 EVT VT = Value.getValueType();
1112 EVT MemVT = StoreNode->getMemoryVT();
1113 EVT PtrVT = Ptr.getValueType();
1114
1115 SDLoc DL(Op);
1116
1117 const bool TruncatingStore = StoreNode->isTruncatingStore();
1118
1119 // Neither LOCAL nor PRIVATE can do vectors at the moment
1121 TruncatingStore) &&
1122 VT.isVector()) {
1123 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1124 // Add an extra level of chain to isolate this vector
1125 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1126 // TODO: can the chain be replaced without creating a new store?
1127 SDValue NewStore = DAG.getTruncStore(
1128 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1129 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1130 StoreNode->getAAInfo());
1131 StoreNode = cast<StoreSDNode>(NewStore);
1132 }
1133
1134 return scalarizeVectorStore(StoreNode, DAG);
1135 }
1136
1137 Align Alignment = StoreNode->getAlign();
1138 if (Alignment < MemVT.getStoreSize() &&
1139 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1140 StoreNode->getMemOperand()->getFlags(),
1141 nullptr)) {
1142 return expandUnalignedStore(StoreNode, DAG);
1143 }
1144
1145 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1146 DAG.getConstant(2, DL, PtrVT));
1147
1148 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1149 // It is beneficial to create MSKOR here instead of combiner to avoid
1150 // artificial dependencies introduced by RMW
1151 if (TruncatingStore) {
1152 assert(VT.bitsLE(MVT::i32));
1153 SDValue MaskConstant;
1154 if (MemVT == MVT::i8) {
1155 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1156 } else {
1157 assert(MemVT == MVT::i16);
1158 assert(StoreNode->getAlign() >= 2);
1159 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1160 }
1161
1162 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1163 DAG.getConstant(0x00000003, DL, PtrVT));
1164 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1165 DAG.getConstant(3, DL, VT));
1166
1167 // Put the mask in correct place
1168 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1169
1170 // Put the value bits in correct place
1171 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1172 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1173
1174 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1175 // vector instead.
1176 SDValue Src[4] = {
1177 ShiftedValue,
1178 DAG.getConstant(0, DL, MVT::i32),
1179 DAG.getConstant(0, DL, MVT::i32),
1180 Mask
1181 };
1182 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1183 SDValue Args[3] = { Chain, Input, DWordAddr };
1185 Op->getVTList(), Args, MemVT,
1186 StoreNode->getMemOperand());
1187 }
1188 if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1189 // Convert pointer from byte address to dword address.
1190 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1191
1192 if (StoreNode->isIndexed()) {
1193 llvm_unreachable("Indexed stores not supported yet");
1194 } else {
1195 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1196 }
1197 return Chain;
1198 }
1199 }
1200
1201 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1202 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1203 return SDValue();
1204
1205 if (MemVT.bitsLT(MVT::i32))
1206 return lowerPrivateTruncStore(StoreNode, DAG);
1207
1208 // Standard i32+ store, tag it with DWORDADDR to note that the address
1209 // has been shifted
1210 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1211 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1212 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1213 }
1214
1215 // Tagged i32+ stores will be matched by patterns
1216 return SDValue();
1217}
1218
1219// return (512 + (kc_bank << 12)
1220static int
1222 switch (AddressSpace) {
1224 return 512;
1226 return 512 + 4096;
1228 return 512 + 4096 * 2;
1230 return 512 + 4096 * 3;
1232 return 512 + 4096 * 4;
1234 return 512 + 4096 * 5;
1236 return 512 + 4096 * 6;
1238 return 512 + 4096 * 7;
1240 return 512 + 4096 * 8;
1242 return 512 + 4096 * 9;
1244 return 512 + 4096 * 10;
1246 return 512 + 4096 * 11;
1248 return 512 + 4096 * 12;
1250 return 512 + 4096 * 13;
1252 return 512 + 4096 * 14;
1254 return 512 + 4096 * 15;
1255 default:
1256 return -1;
1257 }
1258}
1259
1260SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1261 SelectionDAG &DAG) const {
1262 SDLoc DL(Op);
1263 LoadSDNode *Load = cast<LoadSDNode>(Op);
1264 ISD::LoadExtType ExtType = Load->getExtensionType();
1265 EVT MemVT = Load->getMemoryVT();
1266 assert(Load->getAlign() >= MemVT.getStoreSize());
1267
1268 SDValue BasePtr = Load->getBasePtr();
1269 SDValue Chain = Load->getChain();
1270 SDValue Offset = Load->getOffset();
1271
1272 SDValue LoadPtr = BasePtr;
1273 if (!Offset.isUndef()) {
1274 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1275 }
1276
1277 // Get dword location
1278 // NOTE: this should be eliminated by the future SHR ptr, 2
1279 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1280 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1281
1282 // Load dword
1283 // TODO: can we be smarter about machine pointer info?
1285 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1286
1287 // Get offset within the register.
1288 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1289 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1290
1291 // Bit offset of target byte (byteIdx * 8).
1292 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1293 DAG.getConstant(3, DL, MVT::i32));
1294
1295 // Shift to the right.
1296 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1297
1298 // Eliminate the upper bits by setting them to ...
1299 EVT MemEltVT = MemVT.getScalarType();
1300
1301 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1302 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1303 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1304 } else { // ... or zeros.
1305 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1306 }
1307
1308 SDValue Ops[] = {
1309 Ret,
1310 Read.getValue(1) // This should be our output chain
1311 };
1312
1313 return DAG.getMergeValues(Ops, DL);
1314}
1315
1316SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1317 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1318 unsigned AS = LoadNode->getAddressSpace();
1319 EVT MemVT = LoadNode->getMemoryVT();
1320 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1321
1322 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1323 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1324 return lowerPrivateExtLoad(Op, DAG);
1325 }
1326
1327 SDLoc DL(Op);
1328 EVT VT = Op.getValueType();
1329 SDValue Chain = LoadNode->getChain();
1330 SDValue Ptr = LoadNode->getBasePtr();
1331
1332 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1334 VT.isVector()) {
1335 SDValue Ops[2];
1336 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1337 return DAG.getMergeValues(Ops, DL);
1338 }
1339
1340 // This is still used for explicit load from addrspace(8)
1341 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1342 if (ConstantBlock > -1 &&
1343 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1344 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1346 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1347 isa<ConstantSDNode>(Ptr)) {
1348 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1349 }
1350 // TODO: Does this even work?
1351 // non-constant ptr can't be folded, keeps it as a v4f32 load
1352 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1353 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1354 DAG.getConstant(4, DL, MVT::i32)),
1355 DAG.getConstant(LoadNode->getAddressSpace() -
1357 DL, MVT::i32));
1358
1359 if (!VT.isVector()) {
1360 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1361 DAG.getConstant(0, DL, MVT::i32));
1362 }
1363
1364 SDValue MergedValues[2] = {
1365 Result,
1366 Chain
1367 };
1368 return DAG.getMergeValues(MergedValues, DL);
1369 }
1370
1371 // For most operations returning SDValue() will result in the node being
1372 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1373 // need to manually expand loads that may be legal in some address spaces and
1374 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1375 // compute shaders, since the data is sign extended when it is uploaded to the
1376 // buffer. However SEXT loads from other address spaces are not supported, so
1377 // we need to expand them here.
1378 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1379 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1380 SDValue NewLoad = DAG.getExtLoad(
1381 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1382 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1383 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1384 DAG.getValueType(MemVT));
1385
1386 SDValue MergedValues[2] = { Res, Chain };
1387 return DAG.getMergeValues(MergedValues, DL);
1388 }
1389
1390 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1391 return SDValue();
1392 }
1393
1394 // DWORDADDR ISD marks already shifted address
1395 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1396 assert(VT == MVT::i32);
1397 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1398 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1399 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1400 }
1401 return SDValue();
1402}
1403
1404SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1405 SDValue Chain = Op.getOperand(0);
1406 SDValue Cond = Op.getOperand(1);
1407 SDValue Jump = Op.getOperand(2);
1408
1409 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1410 Chain, Jump, Cond);
1411}
1412
1413SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1414 SelectionDAG &DAG) const {
1416 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1417
1418 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1419
1420 unsigned FrameIndex = FIN->getIndex();
1421 Register IgnoredFrameReg;
1423 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1424 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1425 SDLoc(Op), Op.getValueType());
1426}
1427
1429 bool IsVarArg) const {
1430 switch (CC) {
1433 case CallingConv::C:
1434 case CallingConv::Fast:
1435 case CallingConv::Cold:
1436 llvm_unreachable("kernels should not be handled here");
1444 return CC_R600;
1445 default:
1446 report_fatal_error("Unsupported calling convention.");
1447 }
1448}
1449
1450/// XXX Only kernel functions are supported, so we can assume for now that
1451/// every function is a kernel function, but in the future we should use
1452/// separate calling conventions for kernel and non-kernel functions.
1454 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1455 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1456 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1458 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1459 *DAG.getContext());
1462
1463 if (AMDGPU::isShader(CallConv)) {
1464 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1465 } else {
1466 analyzeFormalArgumentsCompute(CCInfo, Ins);
1467 }
1468
1469 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1470 CCValAssign &VA = ArgLocs[i];
1471 const ISD::InputArg &In = Ins[i];
1472 EVT VT = In.VT;
1473 EVT MemVT = VA.getLocVT();
1474 if (!VT.isVector() && MemVT.isVector()) {
1475 // Get load source type if scalarized.
1476 MemVT = MemVT.getVectorElementType();
1477 }
1478
1479 if (AMDGPU::isShader(CallConv)) {
1480 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1481 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1482 InVals.push_back(Register);
1483 continue;
1484 }
1485
1486 // i64 isn't a legal type, so the register type used ends up as i32, which
1487 // isn't expected here. It attempts to create this sextload, but it ends up
1488 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1489 // for <1 x i64>.
1490
1491 // The first 36 bytes of the input buffer contains information about
1492 // thread group and global sizes.
1494 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1495 // FIXME: This should really check the extload type, but the handling of
1496 // extload vector parameters seems to be broken.
1497
1498 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1499 Ext = ISD::SEXTLOAD;
1500 }
1501
1502 // Compute the offset from the value.
1503 // XXX - I think PartOffset should give you this, but it seems to give the
1504 // size of the register which isn't useful.
1505
1506 unsigned PartOffset = VA.getLocMemOffset();
1507 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1508
1510 SDValue Arg = DAG.getLoad(
1511 ISD::UNINDEXED, Ext, VT, DL, Chain,
1512 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1513 PtrInfo,
1514 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1517
1518 InVals.push_back(Arg);
1519 }
1520 return Chain;
1521}
1522
1524 EVT VT) const {
1525 if (!VT.isVector())
1526 return MVT::i32;
1528}
1529
1531 const MachineFunction &MF) const {
1532 // Local and Private addresses do not handle vectors. Limit to i32
1534 return (MemVT.getSizeInBits() <= 32);
1535 }
1536 return true;
1537}
1538
1540 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1541 unsigned *IsFast) const {
1542 if (IsFast)
1543 *IsFast = 0;
1544
1545 if (!VT.isSimple() || VT == MVT::Other)
1546 return false;
1547
1548 if (VT.bitsLT(MVT::i32))
1549 return false;
1550
1551 // TODO: This is a rough estimate.
1552 if (IsFast)
1553 *IsFast = 1;
1554
1555 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1556}
1557
1559 SelectionDAG &DAG, SDValue VectorEntry,
1560 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1561 assert(RemapSwizzle.empty());
1562
1563 SDLoc DL(VectorEntry);
1564 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1565
1566 SDValue NewBldVec[4];
1567 for (unsigned i = 0; i < 4; i++)
1568 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1569 DAG.getIntPtrConstant(i, DL));
1570
1571 for (unsigned i = 0; i < 4; i++) {
1572 if (NewBldVec[i].isUndef())
1573 // We mask write here to teach later passes that the ith element of this
1574 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1575 // break false dependencies and additionally make assembly easier to read.
1576 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1577 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1578 if (C->isZero()) {
1579 RemapSwizzle[i] = 4; // SEL_0
1580 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1581 } else if (C->isExactlyValue(1.0)) {
1582 RemapSwizzle[i] = 5; // SEL_1
1583 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1584 }
1585 }
1586
1587 if (NewBldVec[i].isUndef())
1588 continue;
1589
1590 for (unsigned j = 0; j < i; j++) {
1591 if (NewBldVec[i] == NewBldVec[j]) {
1592 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1593 RemapSwizzle[i] = j;
1594 break;
1595 }
1596 }
1597 }
1598
1599 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1600 NewBldVec);
1601}
1602
1604 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1605 assert(RemapSwizzle.empty());
1606
1607 SDLoc DL(VectorEntry);
1608 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1609
1610 SDValue NewBldVec[4];
1611 bool isUnmovable[4] = {false, false, false, false};
1612 for (unsigned i = 0; i < 4; i++)
1613 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1614 DAG.getIntPtrConstant(i, DL));
1615
1616 for (unsigned i = 0; i < 4; i++) {
1617 RemapSwizzle[i] = i;
1618 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1619 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1620 if (i == Idx)
1621 isUnmovable[Idx] = true;
1622 }
1623 }
1624
1625 for (unsigned i = 0; i < 4; i++) {
1626 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1627 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1628 if (isUnmovable[Idx])
1629 continue;
1630 // Swap i and Idx
1631 std::swap(NewBldVec[Idx], NewBldVec[i]);
1632 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1633 break;
1634 }
1635 }
1636
1637 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1638 NewBldVec);
1639}
1640
1641SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1642 SelectionDAG &DAG,
1643 const SDLoc &DL) const {
1644 // Old -> New swizzle values
1645 DenseMap<unsigned, unsigned> SwizzleRemap;
1646
1647 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1648 for (unsigned i = 0; i < 4; i++) {
1649 unsigned Idx = Swz[i]->getAsZExtVal();
1650 auto It = SwizzleRemap.find(Idx);
1651 if (It != SwizzleRemap.end())
1652 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1653 }
1654
1655 SwizzleRemap.clear();
1656 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1657 for (unsigned i = 0; i < 4; i++) {
1658 unsigned Idx = Swz[i]->getAsZExtVal();
1659 auto It = SwizzleRemap.find(Idx);
1660 if (It != SwizzleRemap.end())
1661 Swz[i] = DAG.getConstant(It->second, DL, MVT::i32);
1662 }
1663
1664 return BuildVector;
1665}
1666
1667SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1668 SelectionDAG &DAG) const {
1669 SDLoc DL(LoadNode);
1670 EVT VT = LoadNode->getValueType(0);
1671 SDValue Chain = LoadNode->getChain();
1672 SDValue Ptr = LoadNode->getBasePtr();
1673 assert (isa<ConstantSDNode>(Ptr));
1674
1675 //TODO: Support smaller loads
1676 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1677 return SDValue();
1678
1679 if (LoadNode->getAlign() < Align(4))
1680 return SDValue();
1681
1682 int ConstantBlock = ConstantAddressBlock(Block);
1683
1684 SDValue Slots[4];
1685 for (unsigned i = 0; i < 4; i++) {
1686 // We want Const position encoded with the following formula :
1687 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1688 // const_index is Ptr computed by llvm using an alignment of 16.
1689 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1690 // then div by 4 at the ISel step
1691 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1692 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1693 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1694 }
1695 EVT NewVT = MVT::v4i32;
1696 unsigned NumElements = 4;
1697 if (VT.isVector()) {
1698 NewVT = VT;
1699 NumElements = VT.getVectorNumElements();
1700 }
1701 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1702 if (!VT.isVector()) {
1703 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1704 DAG.getConstant(0, DL, MVT::i32));
1705 }
1706 SDValue MergedValues[2] = {
1707 Result,
1708 Chain
1709 };
1710 return DAG.getMergeValues(MergedValues, DL);
1711}
1712
1713//===----------------------------------------------------------------------===//
1714// Custom DAG Optimizations
1715//===----------------------------------------------------------------------===//
1716
1718 DAGCombinerInfo &DCI) const {
1719 SelectionDAG &DAG = DCI.DAG;
1720 SDLoc DL(N);
1721
1722 switch (N->getOpcode()) {
1723 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1724 case ISD::FP_ROUND: {
1725 SDValue Arg = N->getOperand(0);
1726 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1727 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1728 Arg.getOperand(0));
1729 }
1730 break;
1731 }
1732
1733 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1734 // (i32 select_cc f32, f32, -1, 0 cc)
1735 //
1736 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1737 // this to one of the SET*_DX10 instructions.
1738 case ISD::FP_TO_SINT: {
1739 SDValue FNeg = N->getOperand(0);
1740 if (FNeg.getOpcode() != ISD::FNEG) {
1741 return SDValue();
1742 }
1743 SDValue SelectCC = FNeg.getOperand(0);
1744 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1745 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1746 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1747 !isHWTrueValue(SelectCC.getOperand(2)) ||
1748 !isHWFalseValue(SelectCC.getOperand(3))) {
1749 return SDValue();
1750 }
1751
1752 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1753 SelectCC.getOperand(0), // LHS
1754 SelectCC.getOperand(1), // RHS
1755 DAG.getAllOnesConstant(DL, MVT::i32), // True
1756 DAG.getConstant(0, DL, MVT::i32), // False
1757 SelectCC.getOperand(4)); // CC
1758 }
1759
1760 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1761 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1763 SDValue InVec = N->getOperand(0);
1764 SDValue InVal = N->getOperand(1);
1765 SDValue EltNo = N->getOperand(2);
1766
1767 // If the inserted element is an UNDEF, just use the input vector.
1768 if (InVal.isUndef())
1769 return InVec;
1770
1771 EVT VT = InVec.getValueType();
1772
1773 // If we can't generate a legal BUILD_VECTOR, exit
1775 return SDValue();
1776
1777 // Check that we know which element is being inserted
1778 if (!isa<ConstantSDNode>(EltNo))
1779 return SDValue();
1780 unsigned Elt = EltNo->getAsZExtVal();
1781
1782 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1783 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1784 // vector elements.
1786 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1787 Ops.append(InVec.getNode()->op_begin(),
1788 InVec.getNode()->op_end());
1789 } else if (InVec.isUndef()) {
1790 unsigned NElts = VT.getVectorNumElements();
1791 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1792 } else {
1793 return SDValue();
1794 }
1795
1796 // Insert the element
1797 if (Elt < Ops.size()) {
1798 // All the operands of BUILD_VECTOR must have the same type;
1799 // we enforce that here.
1800 EVT OpVT = Ops[0].getValueType();
1801 if (InVal.getValueType() != OpVT)
1802 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1803 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1804 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1805 Ops[Elt] = InVal;
1806 }
1807
1808 // Return the new vector
1809 return DAG.getBuildVector(VT, DL, Ops);
1810 }
1811
1812 // Extract_vec (Build_vector) generated by custom lowering
1813 // also needs to be customly combined
1815 SDValue Arg = N->getOperand(0);
1816 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1817 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1818 unsigned Element = Const->getZExtValue();
1819 return Arg->getOperand(Element);
1820 }
1821 }
1822 if (Arg.getOpcode() == ISD::BITCAST &&
1826 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1827 unsigned Element = Const->getZExtValue();
1828 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1829 Arg->getOperand(0).getOperand(Element));
1830 }
1831 }
1832 break;
1833 }
1834
1835 case ISD::SELECT_CC: {
1836 // Try common optimizations
1838 return Ret;
1839
1840 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1841 // selectcc x, y, a, b, inv(cc)
1842 //
1843 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1844 // selectcc x, y, a, b, cc
1845 SDValue LHS = N->getOperand(0);
1846 if (LHS.getOpcode() != ISD::SELECT_CC) {
1847 return SDValue();
1848 }
1849
1850 SDValue RHS = N->getOperand(1);
1851 SDValue True = N->getOperand(2);
1852 SDValue False = N->getOperand(3);
1853 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1854
1855 if (LHS.getOperand(2).getNode() != True.getNode() ||
1856 LHS.getOperand(3).getNode() != False.getNode() ||
1857 RHS.getNode() != False.getNode()) {
1858 return SDValue();
1859 }
1860
1861 switch (NCC) {
1862 default: return SDValue();
1863 case ISD::SETNE: return LHS;
1864 case ISD::SETEQ: {
1865 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1866 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1867 if (DCI.isBeforeLegalizeOps() ||
1868 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1869 return DAG.getSelectCC(DL,
1870 LHS.getOperand(0),
1871 LHS.getOperand(1),
1872 LHS.getOperand(2),
1873 LHS.getOperand(3),
1874 LHSCC);
1875 break;
1876 }
1877 }
1878 return SDValue();
1879 }
1880
1882 SDValue Arg = N->getOperand(1);
1883 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1884 break;
1885
1886 SDValue NewArgs[8] = {
1887 N->getOperand(0), // Chain
1888 SDValue(),
1889 N->getOperand(2), // ArrayBase
1890 N->getOperand(3), // Type
1891 N->getOperand(4), // SWZ_X
1892 N->getOperand(5), // SWZ_Y
1893 N->getOperand(6), // SWZ_Z
1894 N->getOperand(7) // SWZ_W
1895 };
1896 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1897 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1898 }
1900 SDValue Arg = N->getOperand(1);
1901 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1902 break;
1903
1904 SDValue NewArgs[19] = {
1905 N->getOperand(0),
1906 N->getOperand(1),
1907 N->getOperand(2),
1908 N->getOperand(3),
1909 N->getOperand(4),
1910 N->getOperand(5),
1911 N->getOperand(6),
1912 N->getOperand(7),
1913 N->getOperand(8),
1914 N->getOperand(9),
1915 N->getOperand(10),
1916 N->getOperand(11),
1917 N->getOperand(12),
1918 N->getOperand(13),
1919 N->getOperand(14),
1920 N->getOperand(15),
1921 N->getOperand(16),
1922 N->getOperand(17),
1923 N->getOperand(18),
1924 };
1925 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1926 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1927 }
1928
1929 case ISD::LOAD: {
1930 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1931 SDValue Ptr = LoadNode->getBasePtr();
1932 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1933 isa<ConstantSDNode>(Ptr))
1934 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1935 break;
1936 }
1937
1938 default: break;
1939 }
1940
1942}
1943
1944bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1945 SDValue &Src, SDValue &Neg, SDValue &Abs,
1946 SDValue &Sel, SDValue &Imm,
1947 SelectionDAG &DAG) const {
1948 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1949 if (!Src.isMachineOpcode())
1950 return false;
1951
1952 switch (Src.getMachineOpcode()) {
1953 case R600::FNEG_R600:
1954 if (!Neg.getNode())
1955 return false;
1956 Src = Src.getOperand(0);
1957 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1958 return true;
1959 case R600::FABS_R600:
1960 if (!Abs.getNode())
1961 return false;
1962 Src = Src.getOperand(0);
1963 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1964 return true;
1965 case R600::CONST_COPY: {
1966 unsigned Opcode = ParentNode->getMachineOpcode();
1967 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1968
1969 if (!Sel.getNode())
1970 return false;
1971
1972 SDValue CstOffset = Src.getOperand(0);
1973 if (ParentNode->getValueType(0).isVector())
1974 return false;
1975
1976 // Gather constants values
1977 int SrcIndices[] = {
1978 TII->getOperandIdx(Opcode, R600::OpName::src0),
1979 TII->getOperandIdx(Opcode, R600::OpName::src1),
1980 TII->getOperandIdx(Opcode, R600::OpName::src2),
1981 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1982 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1983 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1984 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1985 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1986 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1987 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1988 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1989 };
1990 std::vector<unsigned> Consts;
1991 for (int OtherSrcIdx : SrcIndices) {
1992 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1993 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1994 continue;
1995 if (HasDst) {
1996 OtherSrcIdx--;
1997 OtherSelIdx--;
1998 }
1999 if (RegisterSDNode *Reg =
2000 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2001 if (Reg->getReg() == R600::ALU_CONST) {
2002 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2003 }
2004 }
2005 }
2006
2007 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2008 Consts.push_back(Cst->getZExtValue());
2009 if (!TII->fitsConstReadLimitations(Consts)) {
2010 return false;
2011 }
2012
2013 Sel = CstOffset;
2014 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2015 return true;
2016 }
2017 case R600::MOV_IMM_GLOBAL_ADDR:
2018 // Check if the Imm slot is used. Taken from below.
2019 if (Imm->getAsZExtVal())
2020 return false;
2021 Imm = Src.getOperand(0);
2022 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2023 return true;
2024 case R600::MOV_IMM_I32:
2025 case R600::MOV_IMM_F32: {
2026 unsigned ImmReg = R600::ALU_LITERAL_X;
2027 uint64_t ImmValue = 0;
2028
2029 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2030 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2031 float FloatValue = FPC->getValueAPF().convertToFloat();
2032 if (FloatValue == 0.0) {
2033 ImmReg = R600::ZERO;
2034 } else if (FloatValue == 0.5) {
2035 ImmReg = R600::HALF;
2036 } else if (FloatValue == 1.0) {
2037 ImmReg = R600::ONE;
2038 } else {
2039 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2040 }
2041 } else {
2042 uint64_t Value = Src.getConstantOperandVal(0);
2043 if (Value == 0) {
2044 ImmReg = R600::ZERO;
2045 } else if (Value == 1) {
2046 ImmReg = R600::ONE_INT;
2047 } else {
2048 ImmValue = Value;
2049 }
2050 }
2051
2052 // Check that we aren't already using an immediate.
2053 // XXX: It's possible for an instruction to have more than one
2054 // immediate operand, but this is not supported yet.
2055 if (ImmReg == R600::ALU_LITERAL_X) {
2056 if (!Imm.getNode())
2057 return false;
2058 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2059 if (C->getZExtValue())
2060 return false;
2061 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2062 }
2063 Src = DAG.getRegister(ImmReg, MVT::i32);
2064 return true;
2065 }
2066 default:
2067 return false;
2068 }
2069}
2070
2071/// Fold the instructions after selecting them
2072SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2073 SelectionDAG &DAG) const {
2074 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2075 if (!Node->isMachineOpcode())
2076 return Node;
2077
2078 unsigned Opcode = Node->getMachineOpcode();
2079 SDValue FakeOp;
2080
2081 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2082
2083 if (Opcode == R600::DOT_4) {
2084 int OperandIdx[] = {
2085 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2086 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2087 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2088 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2089 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2090 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2091 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2092 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2093 };
2094 int NegIdx[] = {
2095 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2097 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2098 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2099 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2101 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2102 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2103 };
2104 int AbsIdx[] = {
2105 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2107 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2108 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2109 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2111 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2112 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2113 };
2114 for (unsigned i = 0; i < 8; i++) {
2115 if (OperandIdx[i] < 0)
2116 return Node;
2117 SDValue &Src = Ops[OperandIdx[i] - 1];
2118 SDValue &Neg = Ops[NegIdx[i] - 1];
2119 SDValue &Abs = Ops[AbsIdx[i] - 1];
2120 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2121 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2122 if (HasDst)
2123 SelIdx--;
2124 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2125 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2126 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2127 }
2128 } else if (Opcode == R600::REG_SEQUENCE) {
2129 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2130 SDValue &Src = Ops[i];
2131 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2132 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2133 }
2134 } else {
2135 if (!TII->hasInstrModifiers(Opcode))
2136 return Node;
2137 int OperandIdx[] = {
2138 TII->getOperandIdx(Opcode, R600::OpName::src0),
2139 TII->getOperandIdx(Opcode, R600::OpName::src1),
2140 TII->getOperandIdx(Opcode, R600::OpName::src2)
2141 };
2142 int NegIdx[] = {
2143 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2144 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2145 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2146 };
2147 int AbsIdx[] = {
2148 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2149 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2150 -1
2151 };
2152 for (unsigned i = 0; i < 3; i++) {
2153 if (OperandIdx[i] < 0)
2154 return Node;
2155 SDValue &Src = Ops[OperandIdx[i] - 1];
2156 SDValue &Neg = Ops[NegIdx[i] - 1];
2157 SDValue FakeAbs;
2158 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2159 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2160 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2161 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2162 if (HasDst) {
2163 SelIdx--;
2164 ImmIdx--;
2165 }
2166 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2167 SDValue &Imm = Ops[ImmIdx];
2168 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2169 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2170 }
2171 }
2172
2173 return Node;
2174}
2175
2177R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2178 switch (RMW->getOperation()) {
2187 // FIXME: Cayman at least appears to have instructions for this, but the
2188 // instruction defintions appear to be missing.
2190 case AtomicRMWInst::Xchg: {
2191 const DataLayout &DL = RMW->getFunction()->getDataLayout();
2192 unsigned ValSize = DL.getTypeSizeInBits(RMW->getType());
2193 if (ValSize == 32 || ValSize == 64)
2196 }
2197 default:
2198 if (auto *IntTy = dyn_cast<IntegerType>(RMW->getType())) {
2199 unsigned Size = IntTy->getBitWidth();
2200 if (Size == 32 || Size == 64)
2202 }
2203
2205 }
2206
2207 llvm_unreachable("covered atomicrmw op switch");
2208}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
#define MO_FLAG_NEG
Definition: R600Defines.h:15
#define MO_FLAG_ABS
Definition: R600Defines.h:16
#define MO_FLAG_MASK
Definition: R600Defines.h:17
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static Type * getValueType(Value *V)
Returns the type of the given value/instruction V.
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:5555
APInt bitcastToAPInt() const
Definition: APFloat.h:1346
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1520
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:704
@ FAdd
*p = old + v
Definition: Instructions.h:741
@ FSub
*p = old - v
Definition: Instructions.h:744
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:756
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
Definition: Instructions.h:752
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
Definition: Instructions.h:748
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:760
@ Nand
*p = ~(old & v)
Definition: Instructions.h:726
BinOp getOperation() const
Definition: Instructions.h:805
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1826
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:156
bool empty() const
Definition: DenseMap.h:98
iterator end()
Definition: DenseMap.h:84
const DataLayout & getDataLayout() const
Get the data layout of the module this function belongs to.
Definition: Function.cpp:373
unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
const Function * getFunction() const
Return the function this instruction belongs to.
Definition: Instruction.cpp:72
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
bool hasFFBH() const
bool hasFMA() const
bool hasBFI() const
Definition: R600Subtarget.h:86
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:53
bool hasCARRY() const
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:61
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:51
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:90
bool hasBORROW() const
Definition: R600Subtarget.h:97
bool hasFFBL() const
bool hasBFE() const
Definition: R600Subtarget.h:82
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getAllOnesConstant(const SDLoc &DL, EVT VT, bool IsTarget=false, bool IsOpaque=false)
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:854
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getSignedConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:683
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const TargetMachine & getTargetMachine() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal for a comparison of the specified types on this ...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:77
LLVM Value Representation.
Definition: Value.h:74
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isShader(CallingConv::ID cc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:780
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:243
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:753
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, val, ptr) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1312
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:276
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:502
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1102
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:498
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:205
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:397
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:262
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:752
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:981
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1148
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:757
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1308
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:735
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:772
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:849
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:887
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:709
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:190
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:286
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:539
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:958
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1141
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:794
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:508
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:530
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1602
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1582
int getLDSNoRetOp(uint16_t Opcode)
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr float pif
Definition: MathExtras.h:68
constexpr double e
Definition: MathExtras.h:47
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:480
@ Read
Definition: CodeGenData.h:107
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
DWARFExpression::Operation Op
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:137
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:279
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:380
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:318
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:287
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:210
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:303
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...