LLVM 19.0.0git
R600ISelLowering.cpp
Go to the documentation of this file.
1//===-- R600ISelLowering.cpp - R600 DAG Lowering Implementation -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// Custom DAG lowering for R600
11//
12//===----------------------------------------------------------------------===//
13
14#include "R600ISelLowering.h"
15#include "AMDGPU.h"
17#include "R600Defines.h"
18#include "R600InstrInfo.h"
20#include "R600Subtarget.h"
21#include "R600TargetMachine.h"
23#include "llvm/IR/IntrinsicsAMDGPU.h"
24#include "llvm/IR/IntrinsicsR600.h"
25
26using namespace llvm;
27
28#include "R600GenCallingConv.inc"
29
31 const R600Subtarget &STI)
32 : AMDGPUTargetLowering(TM, STI), Subtarget(&STI), Gen(STI.getGeneration()) {
33 addRegisterClass(MVT::f32, &R600::R600_Reg32RegClass);
34 addRegisterClass(MVT::i32, &R600::R600_Reg32RegClass);
35 addRegisterClass(MVT::v2f32, &R600::R600_Reg64RegClass);
36 addRegisterClass(MVT::v2i32, &R600::R600_Reg64RegClass);
37 addRegisterClass(MVT::v4f32, &R600::R600_Reg128RegClass);
38 addRegisterClass(MVT::v4i32, &R600::R600_Reg128RegClass);
39
42
44
45 // Legalize loads and stores to the private address space.
46 setOperationAction(ISD::LOAD, {MVT::i32, MVT::v2i32, MVT::v4i32}, Custom);
47
48 // EXTLOAD should be the same as ZEXTLOAD. It is legal for some address
49 // spaces, so it is custom lowered to handle those where it isn't.
51 for (MVT VT : MVT::integer_valuetypes()) {
52 setLoadExtAction(Op, VT, MVT::i1, Promote);
53 setLoadExtAction(Op, VT, MVT::i8, Custom);
54 setLoadExtAction(Op, VT, MVT::i16, Custom);
55 }
56
57 // Workaround for LegalizeDAG asserting on expansion of i1 vector loads.
59 MVT::v2i1, Expand);
60
62 MVT::v4i1, Expand);
63
64 setOperationAction(ISD::STORE, {MVT::i8, MVT::i32, MVT::v2i32, MVT::v4i32},
65 Custom);
66
67 setTruncStoreAction(MVT::i32, MVT::i8, Custom);
68 setTruncStoreAction(MVT::i32, MVT::i16, Custom);
69 // We need to include these since trunc STORES to PRIVATE need
70 // special handling to accommodate RMW
71 setTruncStoreAction(MVT::v2i32, MVT::v2i16, Custom);
72 setTruncStoreAction(MVT::v4i32, MVT::v4i16, Custom);
73 setTruncStoreAction(MVT::v8i32, MVT::v8i16, Custom);
74 setTruncStoreAction(MVT::v16i32, MVT::v16i16, Custom);
75 setTruncStoreAction(MVT::v32i32, MVT::v32i16, Custom);
76 setTruncStoreAction(MVT::v2i32, MVT::v2i8, Custom);
77 setTruncStoreAction(MVT::v4i32, MVT::v4i8, Custom);
78 setTruncStoreAction(MVT::v8i32, MVT::v8i8, Custom);
79 setTruncStoreAction(MVT::v16i32, MVT::v16i8, Custom);
80 setTruncStoreAction(MVT::v32i32, MVT::v32i8, Custom);
81
82 // Workaround for LegalizeDAG asserting on expansion of i1 vector stores.
83 setTruncStoreAction(MVT::v2i32, MVT::v2i1, Expand);
84 setTruncStoreAction(MVT::v4i32, MVT::v4i1, Expand);
85
86 // Set condition code actions
90 MVT::f32, Expand);
91
93 MVT::i32, Expand);
94
96
97 setOperationAction(ISD::SETCC, {MVT::v4i32, MVT::v2i32}, Expand);
98
99 setOperationAction(ISD::BR_CC, {MVT::i32, MVT::f32}, Expand);
101
103
105 MVT::f64, Custom);
106
107 setOperationAction(ISD::SELECT_CC, {MVT::f32, MVT::i32}, Custom);
108
109 setOperationAction(ISD::SETCC, {MVT::i32, MVT::f32}, Expand);
110 setOperationAction({ISD::FP_TO_UINT, ISD::FP_TO_SINT}, {MVT::i1, MVT::i64},
111 Custom);
112
113 setOperationAction(ISD::SELECT, {MVT::i32, MVT::f32, MVT::v2i32, MVT::v4i32},
114 Expand);
115
116 // ADD, SUB overflow.
117 // TODO: turn these into Legal?
118 if (Subtarget->hasCARRY())
120
121 if (Subtarget->hasBORROW())
123
124 // Expand sign extension of vectors
125 if (!Subtarget->hasBFE())
127
128 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i1, MVT::v4i1}, Expand);
129
130 if (!Subtarget->hasBFE())
132 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i8, MVT::v4i8}, Expand);
133
134 if (!Subtarget->hasBFE())
136 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i16, MVT::v4i16}, Expand);
137
139 setOperationAction(ISD::SIGN_EXTEND_INREG, {MVT::v2i32, MVT::v4i32}, Expand);
140
142
144
146 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
147
149 {MVT::v2i32, MVT::v2f32, MVT::v4i32, MVT::v4f32}, Custom);
150
151 // We don't have 64-bit shifts. Thus we need either SHX i64 or SHX_PARTS i32
152 // to be Legal/Custom in order to avoid library calls.
154 Custom);
155
156 if (!Subtarget->hasFMA())
157 setOperationAction(ISD::FMA, {MVT::f32, MVT::f64}, Expand);
158
159 // FIXME: May need no denormals check
161
162 if (!Subtarget->hasBFI())
163 // fcopysign can be done in a single instruction with BFI.
164 setOperationAction(ISD::FCOPYSIGN, {MVT::f32, MVT::f64}, Expand);
165
166 if (!Subtarget->hasBCNT(32))
168
169 if (!Subtarget->hasBCNT(64))
171
172 if (Subtarget->hasFFBH())
174
175 if (Subtarget->hasFFBL())
177
178 // FIXME: This was moved from AMDGPUTargetLowering, I'm not sure if we
179 // need it for R600.
180 if (Subtarget->hasBFE())
182
185
186 const MVT ScalarIntVTs[] = { MVT::i32, MVT::i64 };
187 for (MVT VT : ScalarIntVTs)
189 Expand);
190
191 // LLVM will expand these to atomic_cmp_swap(0)
192 // and atomic_swap, respectively.
194
195 // We need to custom lower some of the intrinsics
197 Custom);
198
200
203}
204
206 if (std::next(I) == I->getParent()->end())
207 return false;
208 return std::next(I)->getOpcode() == R600::RETURN;
209}
210
213 MachineBasicBlock *BB) const {
214 MachineFunction *MF = BB->getParent();
217 const R600InstrInfo *TII = Subtarget->getInstrInfo();
218
219 switch (MI.getOpcode()) {
220 default:
221 // Replace LDS_*_RET instruction that don't have any uses with the
222 // equivalent LDS_*_NORET instruction.
223 if (TII->isLDSRetInstr(MI.getOpcode())) {
224 int DstIdx = TII->getOperandIdx(MI.getOpcode(), R600::OpName::dst);
225 assert(DstIdx != -1);
227 // FIXME: getLDSNoRetOp method only handles LDS_1A1D LDS ops. Add
228 // LDS_1A2D support and remove this special case.
229 if (!MRI.use_empty(MI.getOperand(DstIdx).getReg()) ||
230 MI.getOpcode() == R600::LDS_CMPST_RET)
231 return BB;
232
233 NewMI = BuildMI(*BB, I, BB->findDebugLoc(I),
234 TII->get(R600::getLDSNoRetOp(MI.getOpcode())));
235 for (const MachineOperand &MO : llvm::drop_begin(MI.operands()))
236 NewMI.add(MO);
237 } else {
239 }
240 break;
241
242 case R600::FABS_R600: {
243 MachineInstr *NewMI = TII->buildDefaultInstruction(
244 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
245 MI.getOperand(1).getReg());
246 TII->addFlag(*NewMI, 0, MO_FLAG_ABS);
247 break;
248 }
249
250 case R600::FNEG_R600: {
251 MachineInstr *NewMI = TII->buildDefaultInstruction(
252 *BB, I, R600::MOV, MI.getOperand(0).getReg(),
253 MI.getOperand(1).getReg());
254 TII->addFlag(*NewMI, 0, MO_FLAG_NEG);
255 break;
256 }
257
258 case R600::MASK_WRITE: {
259 Register maskedRegister = MI.getOperand(0).getReg();
260 assert(maskedRegister.isVirtual());
261 MachineInstr * defInstr = MRI.getVRegDef(maskedRegister);
262 TII->addFlag(*defInstr, 0, MO_FLAG_MASK);
263 break;
264 }
265
266 case R600::MOV_IMM_F32:
267 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(), MI.getOperand(1)
268 .getFPImm()
269 ->getValueAPF()
270 .bitcastToAPInt()
271 .getZExtValue());
272 break;
273
274 case R600::MOV_IMM_I32:
275 TII->buildMovImm(*BB, I, MI.getOperand(0).getReg(),
276 MI.getOperand(1).getImm());
277 break;
278
279 case R600::MOV_IMM_GLOBAL_ADDR: {
280 //TODO: Perhaps combine this instruction with the next if possible
281 auto MIB = TII->buildDefaultInstruction(
282 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_LITERAL_X);
283 int Idx = TII->getOperandIdx(*MIB, R600::OpName::literal);
284 //TODO: Ugh this is rather ugly
285 const MachineOperand &MO = MI.getOperand(1);
286 MIB->getOperand(Idx).ChangeToGA(MO.getGlobal(), MO.getOffset(),
287 MO.getTargetFlags());
288 break;
289 }
290
291 case R600::CONST_COPY: {
292 MachineInstr *NewMI = TII->buildDefaultInstruction(
293 *BB, MI, R600::MOV, MI.getOperand(0).getReg(), R600::ALU_CONST);
294 TII->setImmOperand(*NewMI, R600::OpName::src0_sel,
295 MI.getOperand(1).getImm());
296 break;
297 }
298
299 case R600::RAT_WRITE_CACHELESS_32_eg:
300 case R600::RAT_WRITE_CACHELESS_64_eg:
301 case R600::RAT_WRITE_CACHELESS_128_eg:
302 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
303 .add(MI.getOperand(0))
304 .add(MI.getOperand(1))
305 .addImm(isEOP(I)); // Set End of program bit
306 break;
307
308 case R600::RAT_STORE_TYPED_eg:
309 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
310 .add(MI.getOperand(0))
311 .add(MI.getOperand(1))
312 .add(MI.getOperand(2))
313 .addImm(isEOP(I)); // Set End of program bit
314 break;
315
316 case R600::BRANCH:
317 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP))
318 .add(MI.getOperand(0));
319 break;
320
321 case R600::BRANCH_COND_f32: {
322 MachineInstr *NewMI =
323 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
324 R600::PREDICATE_BIT)
325 .add(MI.getOperand(1))
326 .addImm(R600::PRED_SETNE)
327 .addImm(0); // Flags
328 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
329 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
330 .add(MI.getOperand(0))
331 .addReg(R600::PREDICATE_BIT, RegState::Kill);
332 break;
333 }
334
335 case R600::BRANCH_COND_i32: {
336 MachineInstr *NewMI =
337 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::PRED_X),
338 R600::PREDICATE_BIT)
339 .add(MI.getOperand(1))
340 .addImm(R600::PRED_SETNE_INT)
341 .addImm(0); // Flags
342 TII->addFlag(*NewMI, 0, MO_FLAG_PUSH);
343 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(R600::JUMP_COND))
344 .add(MI.getOperand(0))
345 .addReg(R600::PREDICATE_BIT, RegState::Kill);
346 break;
347 }
348
349 case R600::EG_ExportSwz:
350 case R600::R600_ExportSwz: {
351 // Instruction is left unmodified if its not the last one of its type
352 bool isLastInstructionOfItsType = true;
353 unsigned InstExportType = MI.getOperand(1).getImm();
354 for (MachineBasicBlock::iterator NextExportInst = std::next(I),
355 EndBlock = BB->end(); NextExportInst != EndBlock;
356 NextExportInst = std::next(NextExportInst)) {
357 if (NextExportInst->getOpcode() == R600::EG_ExportSwz ||
358 NextExportInst->getOpcode() == R600::R600_ExportSwz) {
359 unsigned CurrentInstExportType = NextExportInst->getOperand(1)
360 .getImm();
361 if (CurrentInstExportType == InstExportType) {
362 isLastInstructionOfItsType = false;
363 break;
364 }
365 }
366 }
367 bool EOP = isEOP(I);
368 if (!EOP && !isLastInstructionOfItsType)
369 return BB;
370 unsigned CfInst = (MI.getOpcode() == R600::EG_ExportSwz) ? 84 : 40;
371 BuildMI(*BB, I, BB->findDebugLoc(I), TII->get(MI.getOpcode()))
372 .add(MI.getOperand(0))
373 .add(MI.getOperand(1))
374 .add(MI.getOperand(2))
375 .add(MI.getOperand(3))
376 .add(MI.getOperand(4))
377 .add(MI.getOperand(5))
378 .add(MI.getOperand(6))
379 .addImm(CfInst)
380 .addImm(EOP);
381 break;
382 }
383 case R600::RETURN: {
384 return BB;
385 }
386 }
387
388 MI.eraseFromParent();
389 return BB;
390}
391
392//===----------------------------------------------------------------------===//
393// Custom DAG Lowering Operations
394//===----------------------------------------------------------------------===//
395
399 switch (Op.getOpcode()) {
400 default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
401 case ISD::EXTRACT_VECTOR_ELT: return LowerEXTRACT_VECTOR_ELT(Op, DAG);
402 case ISD::INSERT_VECTOR_ELT: return LowerINSERT_VECTOR_ELT(Op, DAG);
403 case ISD::SHL_PARTS:
404 case ISD::SRA_PARTS:
405 case ISD::SRL_PARTS: return LowerShiftParts(Op, DAG);
406 case ISD::UADDO: return LowerUADDSUBO(Op, DAG, ISD::ADD, AMDGPUISD::CARRY);
407 case ISD::USUBO: return LowerUADDSUBO(Op, DAG, ISD::SUB, AMDGPUISD::BORROW);
408 case ISD::FCOS:
409 case ISD::FSIN: return LowerTrig(Op, DAG);
410 case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
411 case ISD::STORE: return LowerSTORE(Op, DAG);
412 case ISD::LOAD: {
413 SDValue Result = LowerLOAD(Op, DAG);
414 assert((!Result.getNode() ||
415 Result.getNode()->getNumValues() == 2) &&
416 "Load should return a value and a chain");
417 return Result;
418 }
419
420 case ISD::BRCOND: return LowerBRCOND(Op, DAG);
421 case ISD::GlobalAddress: return LowerGlobalAddress(MFI, Op, DAG);
422 case ISD::FrameIndex: return lowerFrameIndex(Op, DAG);
424 return lowerADDRSPACECAST(Op, DAG);
425 case ISD::INTRINSIC_VOID: {
426 SDValue Chain = Op.getOperand(0);
427 unsigned IntrinsicID = Op.getConstantOperandVal(1);
428 switch (IntrinsicID) {
429 case Intrinsic::r600_store_swizzle: {
430 SDLoc DL(Op);
431 const SDValue Args[8] = {
432 Chain,
433 Op.getOperand(2), // Export Value
434 Op.getOperand(3), // ArrayBase
435 Op.getOperand(4), // Type
436 DAG.getConstant(0, DL, MVT::i32), // SWZ_X
437 DAG.getConstant(1, DL, MVT::i32), // SWZ_Y
438 DAG.getConstant(2, DL, MVT::i32), // SWZ_Z
439 DAG.getConstant(3, DL, MVT::i32) // SWZ_W
440 };
441 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, Op.getValueType(), Args);
442 }
443
444 // default for switch(IntrinsicID)
445 default: break;
446 }
447 // break out of case ISD::INTRINSIC_VOID in switch(Op.getOpcode())
448 break;
449 }
451 unsigned IntrinsicID = Op.getConstantOperandVal(0);
452 EVT VT = Op.getValueType();
453 SDLoc DL(Op);
454 switch (IntrinsicID) {
455 case Intrinsic::r600_tex:
456 case Intrinsic::r600_texc: {
457 unsigned TextureOp;
458 switch (IntrinsicID) {
459 case Intrinsic::r600_tex:
460 TextureOp = 0;
461 break;
462 case Intrinsic::r600_texc:
463 TextureOp = 1;
464 break;
465 default:
466 llvm_unreachable("unhandled texture operation");
467 }
468
469 SDValue TexArgs[19] = {
470 DAG.getConstant(TextureOp, DL, MVT::i32),
471 Op.getOperand(1),
472 DAG.getConstant(0, DL, MVT::i32),
473 DAG.getConstant(1, DL, MVT::i32),
474 DAG.getConstant(2, DL, MVT::i32),
475 DAG.getConstant(3, DL, MVT::i32),
476 Op.getOperand(2),
477 Op.getOperand(3),
478 Op.getOperand(4),
479 DAG.getConstant(0, DL, MVT::i32),
480 DAG.getConstant(1, DL, MVT::i32),
481 DAG.getConstant(2, DL, MVT::i32),
482 DAG.getConstant(3, DL, MVT::i32),
483 Op.getOperand(5),
484 Op.getOperand(6),
485 Op.getOperand(7),
486 Op.getOperand(8),
487 Op.getOperand(9),
488 Op.getOperand(10)
489 };
490 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, MVT::v4f32, TexArgs);
491 }
492 case Intrinsic::r600_dot4: {
493 SDValue Args[8] = {
494 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
495 DAG.getConstant(0, DL, MVT::i32)),
496 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
497 DAG.getConstant(0, DL, MVT::i32)),
498 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
499 DAG.getConstant(1, DL, MVT::i32)),
500 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
501 DAG.getConstant(1, DL, MVT::i32)),
502 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
503 DAG.getConstant(2, DL, MVT::i32)),
504 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
505 DAG.getConstant(2, DL, MVT::i32)),
506 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(1),
507 DAG.getConstant(3, DL, MVT::i32)),
508 DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, Op.getOperand(2),
509 DAG.getConstant(3, DL, MVT::i32))
510 };
511 return DAG.getNode(AMDGPUISD::DOT4, DL, MVT::f32, Args);
512 }
513
514 case Intrinsic::r600_implicitarg_ptr: {
517 return DAG.getConstant(ByteOffset, DL, PtrVT);
518 }
519 case Intrinsic::r600_read_ngroups_x:
520 return LowerImplicitParameter(DAG, VT, DL, 0);
521 case Intrinsic::r600_read_ngroups_y:
522 return LowerImplicitParameter(DAG, VT, DL, 1);
523 case Intrinsic::r600_read_ngroups_z:
524 return LowerImplicitParameter(DAG, VT, DL, 2);
525 case Intrinsic::r600_read_global_size_x:
526 return LowerImplicitParameter(DAG, VT, DL, 3);
527 case Intrinsic::r600_read_global_size_y:
528 return LowerImplicitParameter(DAG, VT, DL, 4);
529 case Intrinsic::r600_read_global_size_z:
530 return LowerImplicitParameter(DAG, VT, DL, 5);
531 case Intrinsic::r600_read_local_size_x:
532 return LowerImplicitParameter(DAG, VT, DL, 6);
533 case Intrinsic::r600_read_local_size_y:
534 return LowerImplicitParameter(DAG, VT, DL, 7);
535 case Intrinsic::r600_read_local_size_z:
536 return LowerImplicitParameter(DAG, VT, DL, 8);
537
538 case Intrinsic::r600_read_tgid_x:
539 case Intrinsic::amdgcn_workgroup_id_x:
540 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
541 R600::T1_X, VT);
542 case Intrinsic::r600_read_tgid_y:
543 case Intrinsic::amdgcn_workgroup_id_y:
544 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
545 R600::T1_Y, VT);
546 case Intrinsic::r600_read_tgid_z:
547 case Intrinsic::amdgcn_workgroup_id_z:
548 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
549 R600::T1_Z, VT);
550 case Intrinsic::r600_read_tidig_x:
551 case Intrinsic::amdgcn_workitem_id_x:
552 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
553 R600::T0_X, VT);
554 case Intrinsic::r600_read_tidig_y:
555 case Intrinsic::amdgcn_workitem_id_y:
556 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
557 R600::T0_Y, VT);
558 case Intrinsic::r600_read_tidig_z:
559 case Intrinsic::amdgcn_workitem_id_z:
560 return CreateLiveInRegisterRaw(DAG, &R600::R600_TReg32RegClass,
561 R600::T0_Z, VT);
562
563 case Intrinsic::r600_recipsqrt_ieee:
564 return DAG.getNode(AMDGPUISD::RSQ, DL, VT, Op.getOperand(1));
565
566 case Intrinsic::r600_recipsqrt_clamped:
567 return DAG.getNode(AMDGPUISD::RSQ_CLAMP, DL, VT, Op.getOperand(1));
568 default:
569 return Op;
570 }
571
572 // break out of case ISD::INTRINSIC_WO_CHAIN in switch(Op.getOpcode())
573 break;
574 }
575 } // end switch(Op.getOpcode())
576 return SDValue();
577}
578
581 SelectionDAG &DAG) const {
582 switch (N->getOpcode()) {
583 default:
585 return;
586 case ISD::FP_TO_UINT:
587 if (N->getValueType(0) == MVT::i1) {
588 Results.push_back(lowerFP_TO_UINT(N->getOperand(0), DAG));
589 return;
590 }
591 // Since we don't care about out of bounds values we can use FP_TO_SINT for
592 // uints too. The DAGLegalizer code for uint considers some extra cases
593 // which are not necessary here.
594 [[fallthrough]];
595 case ISD::FP_TO_SINT: {
596 if (N->getValueType(0) == MVT::i1) {
597 Results.push_back(lowerFP_TO_SINT(N->getOperand(0), DAG));
598 return;
599 }
600
601 SDValue Result;
602 if (expandFP_TO_SINT(N, Result, DAG))
603 Results.push_back(Result);
604 return;
605 }
606 case ISD::SDIVREM: {
607 SDValue Op = SDValue(N, 1);
608 SDValue RES = LowerSDIVREM(Op, DAG);
609 Results.push_back(RES);
610 Results.push_back(RES.getValue(1));
611 break;
612 }
613 case ISD::UDIVREM: {
614 SDValue Op = SDValue(N, 0);
616 break;
617 }
618 }
619}
620
621SDValue R600TargetLowering::vectorToVerticalVector(SelectionDAG &DAG,
622 SDValue Vector) const {
623 SDLoc DL(Vector);
624 EVT VecVT = Vector.getValueType();
625 EVT EltVT = VecVT.getVectorElementType();
627
628 for (unsigned i = 0, e = VecVT.getVectorNumElements(); i != e; ++i) {
629 Args.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, Vector,
630 DAG.getVectorIdxConstant(i, DL)));
631 }
632
633 return DAG.getNode(AMDGPUISD::BUILD_VERTICAL_VECTOR, DL, VecVT, Args);
634}
635
636SDValue R600TargetLowering::LowerEXTRACT_VECTOR_ELT(SDValue Op,
637 SelectionDAG &DAG) const {
638 SDLoc DL(Op);
639 SDValue Vector = Op.getOperand(0);
640 SDValue Index = Op.getOperand(1);
641
642 if (isa<ConstantSDNode>(Index) ||
644 return Op;
645
646 Vector = vectorToVerticalVector(DAG, Vector);
647 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, Op.getValueType(),
648 Vector, Index);
649}
650
651SDValue R600TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
652 SelectionDAG &DAG) const {
653 SDLoc DL(Op);
654 SDValue Vector = Op.getOperand(0);
655 SDValue Value = Op.getOperand(1);
656 SDValue Index = Op.getOperand(2);
657
658 if (isa<ConstantSDNode>(Index) ||
660 return Op;
661
662 Vector = vectorToVerticalVector(DAG, Vector);
663 SDValue Insert = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, Op.getValueType(),
664 Vector, Value, Index);
665 return vectorToVerticalVector(DAG, Insert);
666}
667
668SDValue R600TargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI,
669 SDValue Op,
670 SelectionDAG &DAG) const {
671 GlobalAddressSDNode *GSD = cast<GlobalAddressSDNode>(Op);
674
675 const DataLayout &DL = DAG.getDataLayout();
676 const GlobalValue *GV = GSD->getGlobal();
678
679 SDValue GA = DAG.getTargetGlobalAddress(GV, SDLoc(GSD), ConstPtrVT);
680 return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, SDLoc(GSD), ConstPtrVT, GA);
681}
682
683SDValue R600TargetLowering::LowerTrig(SDValue Op, SelectionDAG &DAG) const {
684 // On hw >= R700, COS/SIN input must be between -1. and 1.
685 // Thus we lower them to TRIG ( FRACT ( x / 2Pi + 0.5) - 0.5)
686 EVT VT = Op.getValueType();
687 SDValue Arg = Op.getOperand(0);
688 SDLoc DL(Op);
689
690 // TODO: Should this propagate fast-math-flags?
691 SDValue FractPart = DAG.getNode(AMDGPUISD::FRACT, DL, VT,
692 DAG.getNode(ISD::FADD, DL, VT,
693 DAG.getNode(ISD::FMUL, DL, VT, Arg,
694 DAG.getConstantFP(0.15915494309, DL, MVT::f32)),
695 DAG.getConstantFP(0.5, DL, MVT::f32)));
696 unsigned TrigNode;
697 switch (Op.getOpcode()) {
698 case ISD::FCOS:
699 TrigNode = AMDGPUISD::COS_HW;
700 break;
701 case ISD::FSIN:
702 TrigNode = AMDGPUISD::SIN_HW;
703 break;
704 default:
705 llvm_unreachable("Wrong trig opcode");
706 }
707 SDValue TrigVal = DAG.getNode(TrigNode, DL, VT,
708 DAG.getNode(ISD::FADD, DL, VT, FractPart,
709 DAG.getConstantFP(-0.5, DL, MVT::f32)));
710 if (Gen >= AMDGPUSubtarget::R700)
711 return TrigVal;
712 // On R600 hw, COS/SIN input must be between -Pi and Pi.
713 return DAG.getNode(ISD::FMUL, DL, VT, TrigVal,
714 DAG.getConstantFP(numbers::pif, DL, MVT::f32));
715}
716
717SDValue R600TargetLowering::LowerShiftParts(SDValue Op,
718 SelectionDAG &DAG) const {
719 SDValue Lo, Hi;
720 expandShiftParts(Op.getNode(), Lo, Hi, DAG);
721 return DAG.getMergeValues({Lo, Hi}, SDLoc(Op));
722}
723
724SDValue R600TargetLowering::LowerUADDSUBO(SDValue Op, SelectionDAG &DAG,
725 unsigned mainop, unsigned ovf) const {
726 SDLoc DL(Op);
727 EVT VT = Op.getValueType();
728
729 SDValue Lo = Op.getOperand(0);
730 SDValue Hi = Op.getOperand(1);
731
732 SDValue OVF = DAG.getNode(ovf, DL, VT, Lo, Hi);
733 // Extend sign.
734 OVF = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, OVF,
735 DAG.getValueType(MVT::i1));
736
737 SDValue Res = DAG.getNode(mainop, DL, VT, Lo, Hi);
738
739 return DAG.getNode(ISD::MERGE_VALUES, DL, DAG.getVTList(VT, VT), Res, OVF);
740}
741
742SDValue R600TargetLowering::lowerFP_TO_UINT(SDValue Op, SelectionDAG &DAG) const {
743 SDLoc DL(Op);
744 return DAG.getNode(
746 DL,
747 MVT::i1,
748 Op, DAG.getConstantFP(1.0f, DL, MVT::f32),
750}
751
752SDValue R600TargetLowering::lowerFP_TO_SINT(SDValue Op, SelectionDAG &DAG) const {
753 SDLoc DL(Op);
754 return DAG.getNode(
756 DL,
757 MVT::i1,
758 Op, DAG.getConstantFP(-1.0f, DL, MVT::f32),
760}
761
762SDValue R600TargetLowering::LowerImplicitParameter(SelectionDAG &DAG, EVT VT,
763 const SDLoc &DL,
764 unsigned DwordOffset) const {
765 unsigned ByteOffset = DwordOffset * 4;
768
769 // We shouldn't be using an offset wider than 16-bits for implicit parameters.
770 assert(isInt<16>(ByteOffset));
771
772 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
773 DAG.getConstant(ByteOffset, DL, MVT::i32), // PTR
775}
776
777bool R600TargetLowering::isZero(SDValue Op) const {
778 if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
779 return Cst->isZero();
780 } else if(ConstantFPSDNode *CstFP = dyn_cast<ConstantFPSDNode>(Op)){
781 return CstFP->isZero();
782 } else {
783 return false;
784 }
785}
786
787bool R600TargetLowering::isHWTrueValue(SDValue Op) const {
788 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
789 return CFP->isExactlyValue(1.0);
790 }
791 return isAllOnesConstant(Op);
792}
793
794bool R600TargetLowering::isHWFalseValue(SDValue Op) const {
795 if (ConstantFPSDNode * CFP = dyn_cast<ConstantFPSDNode>(Op)) {
796 return CFP->getValueAPF().isZero();
797 }
798 return isNullConstant(Op);
799}
800
801SDValue R600TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
802 SDLoc DL(Op);
803 EVT VT = Op.getValueType();
804
805 SDValue LHS = Op.getOperand(0);
806 SDValue RHS = Op.getOperand(1);
807 SDValue True = Op.getOperand(2);
808 SDValue False = Op.getOperand(3);
809 SDValue CC = Op.getOperand(4);
810 SDValue Temp;
811
812 if (VT == MVT::f32) {
813 DAGCombinerInfo DCI(DAG, AfterLegalizeVectorOps, true, nullptr);
814 SDValue MinMax = combineFMinMaxLegacy(DL, VT, LHS, RHS, True, False, CC, DCI);
815 if (MinMax)
816 return MinMax;
817 }
818
819 // LHS and RHS are guaranteed to be the same value type
820 EVT CompareVT = LHS.getValueType();
821
822 // Check if we can lower this to a native operation.
823
824 // Try to lower to a SET* instruction:
825 //
826 // SET* can match the following patterns:
827 //
828 // select_cc f32, f32, -1, 0, cc_supported
829 // select_cc f32, f32, 1.0f, 0.0f, cc_supported
830 // select_cc i32, i32, -1, 0, cc_supported
831 //
832
833 // Move hardware True/False values to the correct operand.
834 if (isHWTrueValue(False) && isHWFalseValue(True)) {
835 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
836 ISD::CondCode InverseCC = ISD::getSetCCInverse(CCOpcode, CompareVT);
837 if (isCondCodeLegal(InverseCC, CompareVT.getSimpleVT())) {
838 std::swap(False, True);
839 CC = DAG.getCondCode(InverseCC);
840 } else {
841 ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InverseCC);
842 if (isCondCodeLegal(SwapInvCC, CompareVT.getSimpleVT())) {
843 std::swap(False, True);
844 std::swap(LHS, RHS);
845 CC = DAG.getCondCode(SwapInvCC);
846 }
847 }
848 }
849
850 if (isHWTrueValue(True) && isHWFalseValue(False) &&
851 (CompareVT == VT || VT == MVT::i32)) {
852 // This can be matched by a SET* instruction.
853 return DAG.getNode(ISD::SELECT_CC, DL, VT, LHS, RHS, True, False, CC);
854 }
855
856 // Try to lower to a CND* instruction:
857 //
858 // CND* can match the following patterns:
859 //
860 // select_cc f32, 0.0, f32, f32, cc_supported
861 // select_cc f32, 0.0, i32, i32, cc_supported
862 // select_cc i32, 0, f32, f32, cc_supported
863 // select_cc i32, 0, i32, i32, cc_supported
864 //
865
866 // Try to move the zero value to the RHS
867 if (isZero(LHS)) {
868 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
869 // Try swapping the operands
870 ISD::CondCode CCSwapped = ISD::getSetCCSwappedOperands(CCOpcode);
871 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
872 std::swap(LHS, RHS);
873 CC = DAG.getCondCode(CCSwapped);
874 } else {
875 // Try inverting the condition and then swapping the operands
876 ISD::CondCode CCInv = ISD::getSetCCInverse(CCOpcode, CompareVT);
877 CCSwapped = ISD::getSetCCSwappedOperands(CCInv);
878 if (isCondCodeLegal(CCSwapped, CompareVT.getSimpleVT())) {
879 std::swap(True, False);
880 std::swap(LHS, RHS);
881 CC = DAG.getCondCode(CCSwapped);
882 }
883 }
884 }
885 if (isZero(RHS)) {
886 SDValue Cond = LHS;
887 SDValue Zero = RHS;
888 ISD::CondCode CCOpcode = cast<CondCodeSDNode>(CC)->get();
889 if (CompareVT != VT) {
890 // Bitcast True / False to the correct types. This will end up being
891 // a nop, but it allows us to define only a single pattern in the
892 // .TD files for each CND* instruction rather than having to have
893 // one pattern for integer True/False and one for fp True/False
894 True = DAG.getNode(ISD::BITCAST, DL, CompareVT, True);
895 False = DAG.getNode(ISD::BITCAST, DL, CompareVT, False);
896 }
897
898 switch (CCOpcode) {
899 case ISD::SETONE:
900 case ISD::SETUNE:
901 case ISD::SETNE:
902 CCOpcode = ISD::getSetCCInverse(CCOpcode, CompareVT);
903 Temp = True;
904 True = False;
905 False = Temp;
906 break;
907 default:
908 break;
909 }
910 SDValue SelectNode = DAG.getNode(ISD::SELECT_CC, DL, CompareVT,
911 Cond, Zero,
912 True, False,
913 DAG.getCondCode(CCOpcode));
914 return DAG.getNode(ISD::BITCAST, DL, VT, SelectNode);
915 }
916
917 // If we make it this for it means we have no native instructions to handle
918 // this SELECT_CC, so we must lower it.
919 SDValue HWTrue, HWFalse;
920
921 if (CompareVT == MVT::f32) {
922 HWTrue = DAG.getConstantFP(1.0f, DL, CompareVT);
923 HWFalse = DAG.getConstantFP(0.0f, DL, CompareVT);
924 } else if (CompareVT == MVT::i32) {
925 HWTrue = DAG.getConstant(-1, DL, CompareVT);
926 HWFalse = DAG.getConstant(0, DL, CompareVT);
927 }
928 else {
929 llvm_unreachable("Unhandled value type in LowerSELECT_CC");
930 }
931
932 // Lower this unsupported SELECT_CC into a combination of two supported
933 // SELECT_CC operations.
934 SDValue Cond = DAG.getNode(ISD::SELECT_CC, DL, CompareVT, LHS, RHS, HWTrue, HWFalse, CC);
935
936 return DAG.getNode(ISD::SELECT_CC, DL, VT,
937 Cond, HWFalse,
938 True, False,
940}
941
942SDValue R600TargetLowering::lowerADDRSPACECAST(SDValue Op,
943 SelectionDAG &DAG) const {
944 SDLoc SL(Op);
945 EVT VT = Op.getValueType();
946
947 const R600TargetMachine &TM =
948 static_cast<const R600TargetMachine &>(getTargetMachine());
949
950 const AddrSpaceCastSDNode *ASC = cast<AddrSpaceCastSDNode>(Op);
951 unsigned SrcAS = ASC->getSrcAddressSpace();
952 unsigned DestAS = ASC->getDestAddressSpace();
953
954 if (isNullConstant(Op.getOperand(0)) && SrcAS == AMDGPUAS::FLAT_ADDRESS)
955 return DAG.getConstant(TM.getNullPointerValue(DestAS), SL, VT);
956
957 return Op;
958}
959
960/// LLVM generates byte-addressed pointers. For indirect addressing, we need to
961/// convert these pointers to a register index. Each register holds
962/// 16 bytes, (4 x 32bit sub-register), but we need to take into account the
963/// \p StackWidth, which tells us how many of the 4 sub-registers will be used
964/// for indirect addressing.
965SDValue R600TargetLowering::stackPtrToRegIndex(SDValue Ptr,
966 unsigned StackWidth,
967 SelectionDAG &DAG) const {
968 unsigned SRLPad;
969 switch(StackWidth) {
970 case 1:
971 SRLPad = 2;
972 break;
973 case 2:
974 SRLPad = 3;
975 break;
976 case 4:
977 SRLPad = 4;
978 break;
979 default: llvm_unreachable("Invalid stack width");
980 }
981
982 SDLoc DL(Ptr);
983 return DAG.getNode(ISD::SRL, DL, Ptr.getValueType(), Ptr,
984 DAG.getConstant(SRLPad, DL, MVT::i32));
985}
986
987void R600TargetLowering::getStackAddress(unsigned StackWidth,
988 unsigned ElemIdx,
989 unsigned &Channel,
990 unsigned &PtrIncr) const {
991 switch (StackWidth) {
992 default:
993 case 1:
994 Channel = 0;
995 if (ElemIdx > 0) {
996 PtrIncr = 1;
997 } else {
998 PtrIncr = 0;
999 }
1000 break;
1001 case 2:
1002 Channel = ElemIdx % 2;
1003 if (ElemIdx == 2) {
1004 PtrIncr = 1;
1005 } else {
1006 PtrIncr = 0;
1007 }
1008 break;
1009 case 4:
1010 Channel = ElemIdx;
1011 PtrIncr = 0;
1012 break;
1013 }
1014}
1015
1016SDValue R600TargetLowering::lowerPrivateTruncStore(StoreSDNode *Store,
1017 SelectionDAG &DAG) const {
1018 SDLoc DL(Store);
1019 //TODO: Who creates the i8 stores?
1020 assert(Store->isTruncatingStore()
1021 || Store->getValue().getValueType() == MVT::i8);
1022 assert(Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS);
1023
1024 SDValue Mask;
1025 if (Store->getMemoryVT() == MVT::i8) {
1026 assert(Store->getAlign() >= 1);
1027 Mask = DAG.getConstant(0xff, DL, MVT::i32);
1028 } else if (Store->getMemoryVT() == MVT::i16) {
1029 assert(Store->getAlign() >= 2);
1030 Mask = DAG.getConstant(0xffff, DL, MVT::i32);
1031 } else {
1032 llvm_unreachable("Unsupported private trunc store");
1033 }
1034
1035 SDValue OldChain = Store->getChain();
1036 bool VectorTrunc = (OldChain.getOpcode() == AMDGPUISD::DUMMY_CHAIN);
1037 // Skip dummy
1038 SDValue Chain = VectorTrunc ? OldChain->getOperand(0) : OldChain;
1039 SDValue BasePtr = Store->getBasePtr();
1040 SDValue Offset = Store->getOffset();
1041 EVT MemVT = Store->getMemoryVT();
1042
1043 SDValue LoadPtr = BasePtr;
1044 if (!Offset.isUndef()) {
1045 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1046 }
1047
1048 // Get dword location
1049 // TODO: this should be eliminated by the future SHR ptr, 2
1050 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1051 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1052
1053 // Load dword
1054 // TODO: can we be smarter about machine pointer info?
1056 SDValue Dst = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1057
1058 Chain = Dst.getValue(1);
1059
1060 // Get offset in dword
1061 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1062 DAG.getConstant(0x3, DL, MVT::i32));
1063
1064 // Convert byte offset to bit shift
1065 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1066 DAG.getConstant(3, DL, MVT::i32));
1067
1068 // TODO: Contrary to the name of the function,
1069 // it also handles sub i32 non-truncating stores (like i1)
1070 SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
1071 Store->getValue());
1072
1073 // Mask the value to the right type
1074 SDValue MaskedValue = DAG.getZeroExtendInReg(SExtValue, DL, MemVT);
1075
1076 // Shift the value in place
1077 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
1078 MaskedValue, ShiftAmt);
1079
1080 // Shift the mask in place
1081 SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, Mask, ShiftAmt);
1082
1083 // Invert the mask. NOTE: if we had native ROL instructions we could
1084 // use inverted mask
1085 DstMask = DAG.getNOT(DL, DstMask, MVT::i32);
1086
1087 // Cleanup the target bits
1088 Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
1089
1090 // Add the new bits
1091 SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
1092
1093 // Store dword
1094 // TODO: Can we be smarter about MachinePointerInfo?
1095 SDValue NewStore = DAG.getStore(Chain, DL, Value, Ptr, PtrInfo);
1096
1097 // If we are part of expanded vector, make our neighbors depend on this store
1098 if (VectorTrunc) {
1099 // Make all other vector elements depend on this store
1100 Chain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, NewStore);
1101 DAG.ReplaceAllUsesOfValueWith(OldChain, Chain);
1102 }
1103 return NewStore;
1104}
1105
1106SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
1107 StoreSDNode *StoreNode = cast<StoreSDNode>(Op);
1108 unsigned AS = StoreNode->getAddressSpace();
1109
1110 SDValue Chain = StoreNode->getChain();
1111 SDValue Ptr = StoreNode->getBasePtr();
1112 SDValue Value = StoreNode->getValue();
1113
1114 EVT VT = Value.getValueType();
1115 EVT MemVT = StoreNode->getMemoryVT();
1116 EVT PtrVT = Ptr.getValueType();
1117
1118 SDLoc DL(Op);
1119
1120 const bool TruncatingStore = StoreNode->isTruncatingStore();
1121
1122 // Neither LOCAL nor PRIVATE can do vectors at the moment
1124 TruncatingStore) &&
1125 VT.isVector()) {
1126 if ((AS == AMDGPUAS::PRIVATE_ADDRESS) && TruncatingStore) {
1127 // Add an extra level of chain to isolate this vector
1128 SDValue NewChain = DAG.getNode(AMDGPUISD::DUMMY_CHAIN, DL, MVT::Other, Chain);
1129 // TODO: can the chain be replaced without creating a new store?
1130 SDValue NewStore = DAG.getTruncStore(
1131 NewChain, DL, Value, Ptr, StoreNode->getPointerInfo(), MemVT,
1132 StoreNode->getAlign(), StoreNode->getMemOperand()->getFlags(),
1133 StoreNode->getAAInfo());
1134 StoreNode = cast<StoreSDNode>(NewStore);
1135 }
1136
1137 return scalarizeVectorStore(StoreNode, DAG);
1138 }
1139
1140 Align Alignment = StoreNode->getAlign();
1141 if (Alignment < MemVT.getStoreSize() &&
1142 !allowsMisalignedMemoryAccesses(MemVT, AS, Alignment,
1143 StoreNode->getMemOperand()->getFlags(),
1144 nullptr)) {
1145 return expandUnalignedStore(StoreNode, DAG);
1146 }
1147
1148 SDValue DWordAddr = DAG.getNode(ISD::SRL, DL, PtrVT, Ptr,
1149 DAG.getConstant(2, DL, PtrVT));
1150
1151 if (AS == AMDGPUAS::GLOBAL_ADDRESS) {
1152 // It is beneficial to create MSKOR here instead of combiner to avoid
1153 // artificial dependencies introduced by RMW
1154 if (TruncatingStore) {
1155 assert(VT.bitsLE(MVT::i32));
1156 SDValue MaskConstant;
1157 if (MemVT == MVT::i8) {
1158 MaskConstant = DAG.getConstant(0xFF, DL, MVT::i32);
1159 } else {
1160 assert(MemVT == MVT::i16);
1161 assert(StoreNode->getAlign() >= 2);
1162 MaskConstant = DAG.getConstant(0xFFFF, DL, MVT::i32);
1163 }
1164
1165 SDValue ByteIndex = DAG.getNode(ISD::AND, DL, PtrVT, Ptr,
1166 DAG.getConstant(0x00000003, DL, PtrVT));
1167 SDValue BitShift = DAG.getNode(ISD::SHL, DL, VT, ByteIndex,
1168 DAG.getConstant(3, DL, VT));
1169
1170 // Put the mask in correct place
1171 SDValue Mask = DAG.getNode(ISD::SHL, DL, VT, MaskConstant, BitShift);
1172
1173 // Put the value bits in correct place
1174 SDValue TruncValue = DAG.getNode(ISD::AND, DL, VT, Value, MaskConstant);
1175 SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, VT, TruncValue, BitShift);
1176
1177 // XXX: If we add a 64-bit ZW register class, then we could use a 2 x i32
1178 // vector instead.
1179 SDValue Src[4] = {
1180 ShiftedValue,
1181 DAG.getConstant(0, DL, MVT::i32),
1182 DAG.getConstant(0, DL, MVT::i32),
1183 Mask
1184 };
1185 SDValue Input = DAG.getBuildVector(MVT::v4i32, DL, Src);
1186 SDValue Args[3] = { Chain, Input, DWordAddr };
1188 Op->getVTList(), Args, MemVT,
1189 StoreNode->getMemOperand());
1190 } else if (Ptr->getOpcode() != AMDGPUISD::DWORDADDR && VT.bitsGE(MVT::i32)) {
1191 // Convert pointer from byte address to dword address.
1192 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1193
1194 if (StoreNode->isIndexed()) {
1195 llvm_unreachable("Indexed stores not supported yet");
1196 } else {
1197 Chain = DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1198 }
1199 return Chain;
1200 }
1201 }
1202
1203 // GLOBAL_ADDRESS has been handled above, LOCAL_ADDRESS allows all sizes
1204 if (AS != AMDGPUAS::PRIVATE_ADDRESS)
1205 return SDValue();
1206
1207 if (MemVT.bitsLT(MVT::i32))
1208 return lowerPrivateTruncStore(StoreNode, DAG);
1209
1210 // Standard i32+ store, tag it with DWORDADDR to note that the address
1211 // has been shifted
1212 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1213 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, PtrVT, DWordAddr);
1214 return DAG.getStore(Chain, DL, Value, Ptr, StoreNode->getMemOperand());
1215 }
1216
1217 // Tagged i32+ stores will be matched by patterns
1218 return SDValue();
1219}
1220
1221// return (512 + (kc_bank << 12)
1222static int
1224 switch (AddressSpace) {
1226 return 512;
1228 return 512 + 4096;
1230 return 512 + 4096 * 2;
1232 return 512 + 4096 * 3;
1234 return 512 + 4096 * 4;
1236 return 512 + 4096 * 5;
1238 return 512 + 4096 * 6;
1240 return 512 + 4096 * 7;
1242 return 512 + 4096 * 8;
1244 return 512 + 4096 * 9;
1246 return 512 + 4096 * 10;
1248 return 512 + 4096 * 11;
1250 return 512 + 4096 * 12;
1252 return 512 + 4096 * 13;
1254 return 512 + 4096 * 14;
1256 return 512 + 4096 * 15;
1257 default:
1258 return -1;
1259 }
1260}
1261
1262SDValue R600TargetLowering::lowerPrivateExtLoad(SDValue Op,
1263 SelectionDAG &DAG) const {
1264 SDLoc DL(Op);
1265 LoadSDNode *Load = cast<LoadSDNode>(Op);
1266 ISD::LoadExtType ExtType = Load->getExtensionType();
1267 EVT MemVT = Load->getMemoryVT();
1268 assert(Load->getAlign() >= MemVT.getStoreSize());
1269
1270 SDValue BasePtr = Load->getBasePtr();
1271 SDValue Chain = Load->getChain();
1272 SDValue Offset = Load->getOffset();
1273
1274 SDValue LoadPtr = BasePtr;
1275 if (!Offset.isUndef()) {
1276 LoadPtr = DAG.getNode(ISD::ADD, DL, MVT::i32, BasePtr, Offset);
1277 }
1278
1279 // Get dword location
1280 // NOTE: this should be eliminated by the future SHR ptr, 2
1281 SDValue Ptr = DAG.getNode(ISD::AND, DL, MVT::i32, LoadPtr,
1282 DAG.getConstant(0xfffffffc, DL, MVT::i32));
1283
1284 // Load dword
1285 // TODO: can we be smarter about machine pointer info?
1287 SDValue Read = DAG.getLoad(MVT::i32, DL, Chain, Ptr, PtrInfo);
1288
1289 // Get offset within the register.
1290 SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
1291 LoadPtr, DAG.getConstant(0x3, DL, MVT::i32));
1292
1293 // Bit offset of target byte (byteIdx * 8).
1294 SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
1295 DAG.getConstant(3, DL, MVT::i32));
1296
1297 // Shift to the right.
1298 SDValue Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Read, ShiftAmt);
1299
1300 // Eliminate the upper bits by setting them to ...
1301 EVT MemEltVT = MemVT.getScalarType();
1302
1303 if (ExtType == ISD::SEXTLOAD) { // ... ones.
1304 SDValue MemEltVTNode = DAG.getValueType(MemEltVT);
1305 Ret = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, MVT::i32, Ret, MemEltVTNode);
1306 } else { // ... or zeros.
1307 Ret = DAG.getZeroExtendInReg(Ret, DL, MemEltVT);
1308 }
1309
1310 SDValue Ops[] = {
1311 Ret,
1312 Read.getValue(1) // This should be our output chain
1313 };
1314
1315 return DAG.getMergeValues(Ops, DL);
1316}
1317
1318SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
1319 LoadSDNode *LoadNode = cast<LoadSDNode>(Op);
1320 unsigned AS = LoadNode->getAddressSpace();
1321 EVT MemVT = LoadNode->getMemoryVT();
1322 ISD::LoadExtType ExtType = LoadNode->getExtensionType();
1323
1324 if (AS == AMDGPUAS::PRIVATE_ADDRESS &&
1325 ExtType != ISD::NON_EXTLOAD && MemVT.bitsLT(MVT::i32)) {
1326 return lowerPrivateExtLoad(Op, DAG);
1327 }
1328
1329 SDLoc DL(Op);
1330 EVT VT = Op.getValueType();
1331 SDValue Chain = LoadNode->getChain();
1332 SDValue Ptr = LoadNode->getBasePtr();
1333
1334 if ((LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
1336 VT.isVector()) {
1337 SDValue Ops[2];
1338 std::tie(Ops[0], Ops[1]) = scalarizeVectorLoad(LoadNode, DAG);
1339 return DAG.getMergeValues(Ops, DL);
1340 }
1341
1342 // This is still used for explicit load from addrspace(8)
1343 int ConstantBlock = ConstantAddressBlock(LoadNode->getAddressSpace());
1344 if (ConstantBlock > -1 &&
1345 ((LoadNode->getExtensionType() == ISD::NON_EXTLOAD) ||
1346 (LoadNode->getExtensionType() == ISD::ZEXTLOAD))) {
1348 if (isa<Constant>(LoadNode->getMemOperand()->getValue()) ||
1349 isa<ConstantSDNode>(Ptr)) {
1350 return constBufferLoad(LoadNode, LoadNode->getAddressSpace(), DAG);
1351 } else {
1352 //TODO: Does this even work?
1353 // non-constant ptr can't be folded, keeps it as a v4f32 load
1354 Result = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::v4i32,
1355 DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr,
1356 DAG.getConstant(4, DL, MVT::i32)),
1357 DAG.getConstant(LoadNode->getAddressSpace() -
1359 );
1360 }
1361
1362 if (!VT.isVector()) {
1363 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1364 DAG.getConstant(0, DL, MVT::i32));
1365 }
1366
1367 SDValue MergedValues[2] = {
1368 Result,
1369 Chain
1370 };
1371 return DAG.getMergeValues(MergedValues, DL);
1372 }
1373
1374 // For most operations returning SDValue() will result in the node being
1375 // expanded by the DAG Legalizer. This is not the case for ISD::LOAD, so we
1376 // need to manually expand loads that may be legal in some address spaces and
1377 // illegal in others. SEXT loads from CONSTANT_BUFFER_0 are supported for
1378 // compute shaders, since the data is sign extended when it is uploaded to the
1379 // buffer. However SEXT loads from other address spaces are not supported, so
1380 // we need to expand them here.
1381 if (LoadNode->getExtensionType() == ISD::SEXTLOAD) {
1382 assert(!MemVT.isVector() && (MemVT == MVT::i16 || MemVT == MVT::i8));
1383 SDValue NewLoad = DAG.getExtLoad(
1384 ISD::EXTLOAD, DL, VT, Chain, Ptr, LoadNode->getPointerInfo(), MemVT,
1385 LoadNode->getAlign(), LoadNode->getMemOperand()->getFlags());
1386 SDValue Res = DAG.getNode(ISD::SIGN_EXTEND_INREG, DL, VT, NewLoad,
1387 DAG.getValueType(MemVT));
1388
1389 SDValue MergedValues[2] = { Res, Chain };
1390 return DAG.getMergeValues(MergedValues, DL);
1391 }
1392
1393 if (LoadNode->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
1394 return SDValue();
1395 }
1396
1397 // DWORDADDR ISD marks already shifted address
1398 if (Ptr.getOpcode() != AMDGPUISD::DWORDADDR) {
1399 assert(VT == MVT::i32);
1400 Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Ptr, DAG.getConstant(2, DL, MVT::i32));
1401 Ptr = DAG.getNode(AMDGPUISD::DWORDADDR, DL, MVT::i32, Ptr);
1402 return DAG.getLoad(MVT::i32, DL, Chain, Ptr, LoadNode->getMemOperand());
1403 }
1404 return SDValue();
1405}
1406
1407SDValue R600TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const {
1408 SDValue Chain = Op.getOperand(0);
1409 SDValue Cond = Op.getOperand(1);
1410 SDValue Jump = Op.getOperand(2);
1411
1412 return DAG.getNode(AMDGPUISD::BRANCH_COND, SDLoc(Op), Op.getValueType(),
1413 Chain, Jump, Cond);
1414}
1415
1416SDValue R600TargetLowering::lowerFrameIndex(SDValue Op,
1417 SelectionDAG &DAG) const {
1419 const R600FrameLowering *TFL = Subtarget->getFrameLowering();
1420
1421 FrameIndexSDNode *FIN = cast<FrameIndexSDNode>(Op);
1422
1423 unsigned FrameIndex = FIN->getIndex();
1424 Register IgnoredFrameReg;
1426 TFL->getFrameIndexReference(MF, FrameIndex, IgnoredFrameReg);
1427 return DAG.getConstant(Offset.getFixed() * 4 * TFL->getStackWidth(MF),
1428 SDLoc(Op), Op.getValueType());
1429}
1430
1432 bool IsVarArg) const {
1433 switch (CC) {
1436 case CallingConv::C:
1437 case CallingConv::Fast:
1438 case CallingConv::Cold:
1439 llvm_unreachable("kernels should not be handled here");
1447 return CC_R600;
1448 default:
1449 report_fatal_error("Unsupported calling convention.");
1450 }
1451}
1452
1453/// XXX Only kernel functions are supported, so we can assume for now that
1454/// every function is a kernel function, but in the future we should use
1455/// separate calling conventions for kernel and non-kernel functions.
1457 SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
1458 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
1459 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1461 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), ArgLocs,
1462 *DAG.getContext());
1465
1466 if (AMDGPU::isShader(CallConv)) {
1467 CCInfo.AnalyzeFormalArguments(Ins, CCAssignFnForCall(CallConv, isVarArg));
1468 } else {
1469 analyzeFormalArgumentsCompute(CCInfo, Ins);
1470 }
1471
1472 for (unsigned i = 0, e = Ins.size(); i < e; ++i) {
1473 CCValAssign &VA = ArgLocs[i];
1474 const ISD::InputArg &In = Ins[i];
1475 EVT VT = In.VT;
1476 EVT MemVT = VA.getLocVT();
1477 if (!VT.isVector() && MemVT.isVector()) {
1478 // Get load source type if scalarized.
1479 MemVT = MemVT.getVectorElementType();
1480 }
1481
1482 if (AMDGPU::isShader(CallConv)) {
1483 Register Reg = MF.addLiveIn(VA.getLocReg(), &R600::R600_Reg128RegClass);
1484 SDValue Register = DAG.getCopyFromReg(Chain, DL, Reg, VT);
1485 InVals.push_back(Register);
1486 continue;
1487 }
1488
1489 // i64 isn't a legal type, so the register type used ends up as i32, which
1490 // isn't expected here. It attempts to create this sextload, but it ends up
1491 // being invalid. Somehow this seems to work with i64 arguments, but breaks
1492 // for <1 x i64>.
1493
1494 // The first 36 bytes of the input buffer contains information about
1495 // thread group and global sizes.
1497 if (MemVT.getScalarSizeInBits() != VT.getScalarSizeInBits()) {
1498 // FIXME: This should really check the extload type, but the handling of
1499 // extload vector parameters seems to be broken.
1500
1501 // Ext = In.Flags.isSExt() ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
1502 Ext = ISD::SEXTLOAD;
1503 }
1504
1505 // Compute the offset from the value.
1506 // XXX - I think PartOffset should give you this, but it seems to give the
1507 // size of the register which isn't useful.
1508
1509 unsigned PartOffset = VA.getLocMemOffset();
1510 Align Alignment = commonAlignment(Align(VT.getStoreSize()), PartOffset);
1511
1513 SDValue Arg = DAG.getLoad(
1514 ISD::UNINDEXED, Ext, VT, DL, Chain,
1515 DAG.getConstant(PartOffset, DL, MVT::i32), DAG.getUNDEF(MVT::i32),
1516 PtrInfo,
1517 MemVT, Alignment, MachineMemOperand::MONonTemporal |
1520
1521 InVals.push_back(Arg);
1522 }
1523 return Chain;
1524}
1525
1527 EVT VT) const {
1528 if (!VT.isVector())
1529 return MVT::i32;
1531}
1532
1534 const MachineFunction &MF) const {
1535 // Local and Private addresses do not handle vectors. Limit to i32
1537 return (MemVT.getSizeInBits() <= 32);
1538 }
1539 return true;
1540}
1541
1543 EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags,
1544 unsigned *IsFast) const {
1545 if (IsFast)
1546 *IsFast = 0;
1547
1548 if (!VT.isSimple() || VT == MVT::Other)
1549 return false;
1550
1551 if (VT.bitsLT(MVT::i32))
1552 return false;
1553
1554 // TODO: This is a rough estimate.
1555 if (IsFast)
1556 *IsFast = 1;
1557
1558 return VT.bitsGT(MVT::i32) && Alignment >= Align(4);
1559}
1560
1562 SelectionDAG &DAG, SDValue VectorEntry,
1563 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1564 assert(RemapSwizzle.empty());
1565
1566 SDLoc DL(VectorEntry);
1567 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1568
1569 SDValue NewBldVec[4];
1570 for (unsigned i = 0; i < 4; i++)
1571 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1572 DAG.getIntPtrConstant(i, DL));
1573
1574 for (unsigned i = 0; i < 4; i++) {
1575 if (NewBldVec[i].isUndef())
1576 // We mask write here to teach later passes that the ith element of this
1577 // vector is undef. Thus we can use it to reduce 128 bits reg usage,
1578 // break false dependencies and additionally make assembly easier to read.
1579 RemapSwizzle[i] = 7; // SEL_MASK_WRITE
1580 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(NewBldVec[i])) {
1581 if (C->isZero()) {
1582 RemapSwizzle[i] = 4; // SEL_0
1583 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1584 } else if (C->isExactlyValue(1.0)) {
1585 RemapSwizzle[i] = 5; // SEL_1
1586 NewBldVec[i] = DAG.getUNDEF(MVT::f32);
1587 }
1588 }
1589
1590 if (NewBldVec[i].isUndef())
1591 continue;
1592
1593 for (unsigned j = 0; j < i; j++) {
1594 if (NewBldVec[i] == NewBldVec[j]) {
1595 NewBldVec[i] = DAG.getUNDEF(NewBldVec[i].getValueType());
1596 RemapSwizzle[i] = j;
1597 break;
1598 }
1599 }
1600 }
1601
1602 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1603 NewBldVec);
1604}
1605
1607 DenseMap<unsigned, unsigned> &RemapSwizzle) {
1608 assert(RemapSwizzle.empty());
1609
1610 SDLoc DL(VectorEntry);
1611 EVT EltTy = VectorEntry.getValueType().getVectorElementType();
1612
1613 SDValue NewBldVec[4];
1614 bool isUnmovable[4] = {false, false, false, false};
1615 for (unsigned i = 0; i < 4; i++)
1616 NewBldVec[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltTy, VectorEntry,
1617 DAG.getIntPtrConstant(i, DL));
1618
1619 for (unsigned i = 0; i < 4; i++) {
1620 RemapSwizzle[i] = i;
1621 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1622 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1623 if (i == Idx)
1624 isUnmovable[Idx] = true;
1625 }
1626 }
1627
1628 for (unsigned i = 0; i < 4; i++) {
1629 if (NewBldVec[i].getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
1630 unsigned Idx = NewBldVec[i].getConstantOperandVal(1);
1631 if (isUnmovable[Idx])
1632 continue;
1633 // Swap i and Idx
1634 std::swap(NewBldVec[Idx], NewBldVec[i]);
1635 std::swap(RemapSwizzle[i], RemapSwizzle[Idx]);
1636 break;
1637 }
1638 }
1639
1640 return DAG.getBuildVector(VectorEntry.getValueType(), SDLoc(VectorEntry),
1641 NewBldVec);
1642}
1643
1644SDValue R600TargetLowering::OptimizeSwizzle(SDValue BuildVector, SDValue Swz[],
1645 SelectionDAG &DAG,
1646 const SDLoc &DL) const {
1647 // Old -> New swizzle values
1648 DenseMap<unsigned, unsigned> SwizzleRemap;
1649
1650 BuildVector = CompactSwizzlableVector(DAG, BuildVector, SwizzleRemap);
1651 for (unsigned i = 0; i < 4; i++) {
1652 unsigned Idx = Swz[i]->getAsZExtVal();
1653 if (SwizzleRemap.contains(Idx))
1654 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1655 }
1656
1657 SwizzleRemap.clear();
1658 BuildVector = ReorganizeVector(DAG, BuildVector, SwizzleRemap);
1659 for (unsigned i = 0; i < 4; i++) {
1660 unsigned Idx = Swz[i]->getAsZExtVal();
1661 if (SwizzleRemap.contains(Idx))
1662 Swz[i] = DAG.getConstant(SwizzleRemap[Idx], DL, MVT::i32);
1663 }
1664
1665 return BuildVector;
1666}
1667
1668SDValue R600TargetLowering::constBufferLoad(LoadSDNode *LoadNode, int Block,
1669 SelectionDAG &DAG) const {
1670 SDLoc DL(LoadNode);
1671 EVT VT = LoadNode->getValueType(0);
1672 SDValue Chain = LoadNode->getChain();
1673 SDValue Ptr = LoadNode->getBasePtr();
1674 assert (isa<ConstantSDNode>(Ptr));
1675
1676 //TODO: Support smaller loads
1677 if (LoadNode->getMemoryVT().getScalarType() != MVT::i32 || !ISD::isNON_EXTLoad(LoadNode))
1678 return SDValue();
1679
1680 if (LoadNode->getAlign() < Align(4))
1681 return SDValue();
1682
1683 int ConstantBlock = ConstantAddressBlock(Block);
1684
1685 SDValue Slots[4];
1686 for (unsigned i = 0; i < 4; i++) {
1687 // We want Const position encoded with the following formula :
1688 // (((512 + (kc_bank << 12) + const_index) << 2) + chan)
1689 // const_index is Ptr computed by llvm using an alignment of 16.
1690 // Thus we add (((512 + (kc_bank << 12)) + chan ) * 4 here and
1691 // then div by 4 at the ISel step
1692 SDValue NewPtr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
1693 DAG.getConstant(4 * i + ConstantBlock * 16, DL, MVT::i32));
1694 Slots[i] = DAG.getNode(AMDGPUISD::CONST_ADDRESS, DL, MVT::i32, NewPtr);
1695 }
1696 EVT NewVT = MVT::v4i32;
1697 unsigned NumElements = 4;
1698 if (VT.isVector()) {
1699 NewVT = VT;
1700 NumElements = VT.getVectorNumElements();
1701 }
1702 SDValue Result = DAG.getBuildVector(NewVT, DL, ArrayRef(Slots, NumElements));
1703 if (!VT.isVector()) {
1704 Result = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::i32, Result,
1705 DAG.getConstant(0, DL, MVT::i32));
1706 }
1707 SDValue MergedValues[2] = {
1708 Result,
1709 Chain
1710 };
1711 return DAG.getMergeValues(MergedValues, DL);
1712}
1713
1714//===----------------------------------------------------------------------===//
1715// Custom DAG Optimizations
1716//===----------------------------------------------------------------------===//
1717
1719 DAGCombinerInfo &DCI) const {
1720 SelectionDAG &DAG = DCI.DAG;
1721 SDLoc DL(N);
1722
1723 switch (N->getOpcode()) {
1724 // (f32 fp_round (f64 uint_to_fp a)) -> (f32 uint_to_fp a)
1725 case ISD::FP_ROUND: {
1726 SDValue Arg = N->getOperand(0);
1727 if (Arg.getOpcode() == ISD::UINT_TO_FP && Arg.getValueType() == MVT::f64) {
1728 return DAG.getNode(ISD::UINT_TO_FP, DL, N->getValueType(0),
1729 Arg.getOperand(0));
1730 }
1731 break;
1732 }
1733
1734 // (i32 fp_to_sint (fneg (select_cc f32, f32, 1.0, 0.0 cc))) ->
1735 // (i32 select_cc f32, f32, -1, 0 cc)
1736 //
1737 // Mesa's GLSL frontend generates the above pattern a lot and we can lower
1738 // this to one of the SET*_DX10 instructions.
1739 case ISD::FP_TO_SINT: {
1740 SDValue FNeg = N->getOperand(0);
1741 if (FNeg.getOpcode() != ISD::FNEG) {
1742 return SDValue();
1743 }
1744 SDValue SelectCC = FNeg.getOperand(0);
1745 if (SelectCC.getOpcode() != ISD::SELECT_CC ||
1746 SelectCC.getOperand(0).getValueType() != MVT::f32 || // LHS
1747 SelectCC.getOperand(2).getValueType() != MVT::f32 || // True
1748 !isHWTrueValue(SelectCC.getOperand(2)) ||
1749 !isHWFalseValue(SelectCC.getOperand(3))) {
1750 return SDValue();
1751 }
1752
1753 return DAG.getNode(ISD::SELECT_CC, DL, N->getValueType(0),
1754 SelectCC.getOperand(0), // LHS
1755 SelectCC.getOperand(1), // RHS
1756 DAG.getConstant(-1, DL, MVT::i32), // True
1757 DAG.getConstant(0, DL, MVT::i32), // False
1758 SelectCC.getOperand(4)); // CC
1759 }
1760
1761 // insert_vector_elt (build_vector elt0, ... , eltN), NewEltIdx, idx
1762 // => build_vector elt0, ... , NewEltIdx, ... , eltN
1764 SDValue InVec = N->getOperand(0);
1765 SDValue InVal = N->getOperand(1);
1766 SDValue EltNo = N->getOperand(2);
1767
1768 // If the inserted element is an UNDEF, just use the input vector.
1769 if (InVal.isUndef())
1770 return InVec;
1771
1772 EVT VT = InVec.getValueType();
1773
1774 // If we can't generate a legal BUILD_VECTOR, exit
1776 return SDValue();
1777
1778 // Check that we know which element is being inserted
1779 if (!isa<ConstantSDNode>(EltNo))
1780 return SDValue();
1781 unsigned Elt = EltNo->getAsZExtVal();
1782
1783 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
1784 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the
1785 // vector elements.
1787 if (InVec.getOpcode() == ISD::BUILD_VECTOR) {
1788 Ops.append(InVec.getNode()->op_begin(),
1789 InVec.getNode()->op_end());
1790 } else if (InVec.isUndef()) {
1791 unsigned NElts = VT.getVectorNumElements();
1792 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
1793 } else {
1794 return SDValue();
1795 }
1796
1797 // Insert the element
1798 if (Elt < Ops.size()) {
1799 // All the operands of BUILD_VECTOR must have the same type;
1800 // we enforce that here.
1801 EVT OpVT = Ops[0].getValueType();
1802 if (InVal.getValueType() != OpVT)
1803 InVal = OpVT.bitsGT(InVal.getValueType()) ?
1804 DAG.getNode(ISD::ANY_EXTEND, DL, OpVT, InVal) :
1805 DAG.getNode(ISD::TRUNCATE, DL, OpVT, InVal);
1806 Ops[Elt] = InVal;
1807 }
1808
1809 // Return the new vector
1810 return DAG.getBuildVector(VT, DL, Ops);
1811 }
1812
1813 // Extract_vec (Build_vector) generated by custom lowering
1814 // also needs to be customly combined
1816 SDValue Arg = N->getOperand(0);
1817 if (Arg.getOpcode() == ISD::BUILD_VECTOR) {
1818 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1819 unsigned Element = Const->getZExtValue();
1820 return Arg->getOperand(Element);
1821 }
1822 }
1823 if (Arg.getOpcode() == ISD::BITCAST &&
1827 if (ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
1828 unsigned Element = Const->getZExtValue();
1829 return DAG.getNode(ISD::BITCAST, DL, N->getVTList(),
1830 Arg->getOperand(0).getOperand(Element));
1831 }
1832 }
1833 break;
1834 }
1835
1836 case ISD::SELECT_CC: {
1837 // Try common optimizations
1839 return Ret;
1840
1841 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, seteq ->
1842 // selectcc x, y, a, b, inv(cc)
1843 //
1844 // fold selectcc (selectcc x, y, a, b, cc), b, a, b, setne ->
1845 // selectcc x, y, a, b, cc
1846 SDValue LHS = N->getOperand(0);
1847 if (LHS.getOpcode() != ISD::SELECT_CC) {
1848 return SDValue();
1849 }
1850
1851 SDValue RHS = N->getOperand(1);
1852 SDValue True = N->getOperand(2);
1853 SDValue False = N->getOperand(3);
1854 ISD::CondCode NCC = cast<CondCodeSDNode>(N->getOperand(4))->get();
1855
1856 if (LHS.getOperand(2).getNode() != True.getNode() ||
1857 LHS.getOperand(3).getNode() != False.getNode() ||
1858 RHS.getNode() != False.getNode()) {
1859 return SDValue();
1860 }
1861
1862 switch (NCC) {
1863 default: return SDValue();
1864 case ISD::SETNE: return LHS;
1865 case ISD::SETEQ: {
1866 ISD::CondCode LHSCC = cast<CondCodeSDNode>(LHS.getOperand(4))->get();
1867 LHSCC = ISD::getSetCCInverse(LHSCC, LHS.getOperand(0).getValueType());
1868 if (DCI.isBeforeLegalizeOps() ||
1869 isCondCodeLegal(LHSCC, LHS.getOperand(0).getSimpleValueType()))
1870 return DAG.getSelectCC(DL,
1871 LHS.getOperand(0),
1872 LHS.getOperand(1),
1873 LHS.getOperand(2),
1874 LHS.getOperand(3),
1875 LHSCC);
1876 break;
1877 }
1878 }
1879 return SDValue();
1880 }
1881
1883 SDValue Arg = N->getOperand(1);
1884 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1885 break;
1886
1887 SDValue NewArgs[8] = {
1888 N->getOperand(0), // Chain
1889 SDValue(),
1890 N->getOperand(2), // ArrayBase
1891 N->getOperand(3), // Type
1892 N->getOperand(4), // SWZ_X
1893 N->getOperand(5), // SWZ_Y
1894 N->getOperand(6), // SWZ_Z
1895 N->getOperand(7) // SWZ_W
1896 };
1897 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[4], DAG, DL);
1898 return DAG.getNode(AMDGPUISD::R600_EXPORT, DL, N->getVTList(), NewArgs);
1899 }
1901 SDValue Arg = N->getOperand(1);
1902 if (Arg.getOpcode() != ISD::BUILD_VECTOR)
1903 break;
1904
1905 SDValue NewArgs[19] = {
1906 N->getOperand(0),
1907 N->getOperand(1),
1908 N->getOperand(2),
1909 N->getOperand(3),
1910 N->getOperand(4),
1911 N->getOperand(5),
1912 N->getOperand(6),
1913 N->getOperand(7),
1914 N->getOperand(8),
1915 N->getOperand(9),
1916 N->getOperand(10),
1917 N->getOperand(11),
1918 N->getOperand(12),
1919 N->getOperand(13),
1920 N->getOperand(14),
1921 N->getOperand(15),
1922 N->getOperand(16),
1923 N->getOperand(17),
1924 N->getOperand(18),
1925 };
1926 NewArgs[1] = OptimizeSwizzle(N->getOperand(1), &NewArgs[2], DAG, DL);
1927 return DAG.getNode(AMDGPUISD::TEXTURE_FETCH, DL, N->getVTList(), NewArgs);
1928 }
1929
1930 case ISD::LOAD: {
1931 LoadSDNode *LoadNode = cast<LoadSDNode>(N);
1932 SDValue Ptr = LoadNode->getBasePtr();
1933 if (LoadNode->getAddressSpace() == AMDGPUAS::PARAM_I_ADDRESS &&
1934 isa<ConstantSDNode>(Ptr))
1935 return constBufferLoad(LoadNode, AMDGPUAS::CONSTANT_BUFFER_0, DAG);
1936 break;
1937 }
1938
1939 default: break;
1940 }
1941
1943}
1944
1945bool R600TargetLowering::FoldOperand(SDNode *ParentNode, unsigned SrcIdx,
1946 SDValue &Src, SDValue &Neg, SDValue &Abs,
1947 SDValue &Sel, SDValue &Imm,
1948 SelectionDAG &DAG) const {
1949 const R600InstrInfo *TII = Subtarget->getInstrInfo();
1950 if (!Src.isMachineOpcode())
1951 return false;
1952
1953 switch (Src.getMachineOpcode()) {
1954 case R600::FNEG_R600:
1955 if (!Neg.getNode())
1956 return false;
1957 Src = Src.getOperand(0);
1958 Neg = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1959 return true;
1960 case R600::FABS_R600:
1961 if (!Abs.getNode())
1962 return false;
1963 Src = Src.getOperand(0);
1964 Abs = DAG.getTargetConstant(1, SDLoc(ParentNode), MVT::i32);
1965 return true;
1966 case R600::CONST_COPY: {
1967 unsigned Opcode = ParentNode->getMachineOpcode();
1968 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
1969
1970 if (!Sel.getNode())
1971 return false;
1972
1973 SDValue CstOffset = Src.getOperand(0);
1974 if (ParentNode->getValueType(0).isVector())
1975 return false;
1976
1977 // Gather constants values
1978 int SrcIndices[] = {
1979 TII->getOperandIdx(Opcode, R600::OpName::src0),
1980 TII->getOperandIdx(Opcode, R600::OpName::src1),
1981 TII->getOperandIdx(Opcode, R600::OpName::src2),
1982 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
1983 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
1984 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
1985 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
1986 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
1987 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
1988 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
1989 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
1990 };
1991 std::vector<unsigned> Consts;
1992 for (int OtherSrcIdx : SrcIndices) {
1993 int OtherSelIdx = TII->getSelIdx(Opcode, OtherSrcIdx);
1994 if (OtherSrcIdx < 0 || OtherSelIdx < 0)
1995 continue;
1996 if (HasDst) {
1997 OtherSrcIdx--;
1998 OtherSelIdx--;
1999 }
2000 if (RegisterSDNode *Reg =
2001 dyn_cast<RegisterSDNode>(ParentNode->getOperand(OtherSrcIdx))) {
2002 if (Reg->getReg() == R600::ALU_CONST) {
2003 Consts.push_back(ParentNode->getConstantOperandVal(OtherSelIdx));
2004 }
2005 }
2006 }
2007
2008 ConstantSDNode *Cst = cast<ConstantSDNode>(CstOffset);
2009 Consts.push_back(Cst->getZExtValue());
2010 if (!TII->fitsConstReadLimitations(Consts)) {
2011 return false;
2012 }
2013
2014 Sel = CstOffset;
2015 Src = DAG.getRegister(R600::ALU_CONST, MVT::f32);
2016 return true;
2017 }
2018 case R600::MOV_IMM_GLOBAL_ADDR:
2019 // Check if the Imm slot is used. Taken from below.
2020 if (Imm->getAsZExtVal())
2021 return false;
2022 Imm = Src.getOperand(0);
2023 Src = DAG.getRegister(R600::ALU_LITERAL_X, MVT::i32);
2024 return true;
2025 case R600::MOV_IMM_I32:
2026 case R600::MOV_IMM_F32: {
2027 unsigned ImmReg = R600::ALU_LITERAL_X;
2028 uint64_t ImmValue = 0;
2029
2030 if (Src.getMachineOpcode() == R600::MOV_IMM_F32) {
2031 ConstantFPSDNode *FPC = cast<ConstantFPSDNode>(Src.getOperand(0));
2032 float FloatValue = FPC->getValueAPF().convertToFloat();
2033 if (FloatValue == 0.0) {
2034 ImmReg = R600::ZERO;
2035 } else if (FloatValue == 0.5) {
2036 ImmReg = R600::HALF;
2037 } else if (FloatValue == 1.0) {
2038 ImmReg = R600::ONE;
2039 } else {
2040 ImmValue = FPC->getValueAPF().bitcastToAPInt().getZExtValue();
2041 }
2042 } else {
2043 uint64_t Value = Src.getConstantOperandVal(0);
2044 if (Value == 0) {
2045 ImmReg = R600::ZERO;
2046 } else if (Value == 1) {
2047 ImmReg = R600::ONE_INT;
2048 } else {
2049 ImmValue = Value;
2050 }
2051 }
2052
2053 // Check that we aren't already using an immediate.
2054 // XXX: It's possible for an instruction to have more than one
2055 // immediate operand, but this is not supported yet.
2056 if (ImmReg == R600::ALU_LITERAL_X) {
2057 if (!Imm.getNode())
2058 return false;
2059 ConstantSDNode *C = cast<ConstantSDNode>(Imm);
2060 if (C->getZExtValue())
2061 return false;
2062 Imm = DAG.getTargetConstant(ImmValue, SDLoc(ParentNode), MVT::i32);
2063 }
2064 Src = DAG.getRegister(ImmReg, MVT::i32);
2065 return true;
2066 }
2067 default:
2068 return false;
2069 }
2070}
2071
2072/// Fold the instructions after selecting them
2073SDNode *R600TargetLowering::PostISelFolding(MachineSDNode *Node,
2074 SelectionDAG &DAG) const {
2075 const R600InstrInfo *TII = Subtarget->getInstrInfo();
2076 if (!Node->isMachineOpcode())
2077 return Node;
2078
2079 unsigned Opcode = Node->getMachineOpcode();
2080 SDValue FakeOp;
2081
2082 std::vector<SDValue> Ops(Node->op_begin(), Node->op_end());
2083
2084 if (Opcode == R600::DOT_4) {
2085 int OperandIdx[] = {
2086 TII->getOperandIdx(Opcode, R600::OpName::src0_X),
2087 TII->getOperandIdx(Opcode, R600::OpName::src0_Y),
2088 TII->getOperandIdx(Opcode, R600::OpName::src0_Z),
2089 TII->getOperandIdx(Opcode, R600::OpName::src0_W),
2090 TII->getOperandIdx(Opcode, R600::OpName::src1_X),
2091 TII->getOperandIdx(Opcode, R600::OpName::src1_Y),
2092 TII->getOperandIdx(Opcode, R600::OpName::src1_Z),
2093 TII->getOperandIdx(Opcode, R600::OpName::src1_W)
2094 };
2095 int NegIdx[] = {
2096 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_X),
2097 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Y),
2098 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_Z),
2099 TII->getOperandIdx(Opcode, R600::OpName::src0_neg_W),
2100 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_X),
2101 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Y),
2102 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_Z),
2103 TII->getOperandIdx(Opcode, R600::OpName::src1_neg_W)
2104 };
2105 int AbsIdx[] = {
2106 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_X),
2107 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Y),
2108 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_Z),
2109 TII->getOperandIdx(Opcode, R600::OpName::src0_abs_W),
2110 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_X),
2111 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Y),
2112 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_Z),
2113 TII->getOperandIdx(Opcode, R600::OpName::src1_abs_W)
2114 };
2115 for (unsigned i = 0; i < 8; i++) {
2116 if (OperandIdx[i] < 0)
2117 return Node;
2118 SDValue &Src = Ops[OperandIdx[i] - 1];
2119 SDValue &Neg = Ops[NegIdx[i] - 1];
2120 SDValue &Abs = Ops[AbsIdx[i] - 1];
2121 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2122 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2123 if (HasDst)
2124 SelIdx--;
2125 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2126 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, FakeOp, DAG))
2127 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2128 }
2129 } else if (Opcode == R600::REG_SEQUENCE) {
2130 for (unsigned i = 1, e = Node->getNumOperands(); i < e; i += 2) {
2131 SDValue &Src = Ops[i];
2132 if (FoldOperand(Node, i, Src, FakeOp, FakeOp, FakeOp, FakeOp, DAG))
2133 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2134 }
2135 } else {
2136 if (!TII->hasInstrModifiers(Opcode))
2137 return Node;
2138 int OperandIdx[] = {
2139 TII->getOperandIdx(Opcode, R600::OpName::src0),
2140 TII->getOperandIdx(Opcode, R600::OpName::src1),
2141 TII->getOperandIdx(Opcode, R600::OpName::src2)
2142 };
2143 int NegIdx[] = {
2144 TII->getOperandIdx(Opcode, R600::OpName::src0_neg),
2145 TII->getOperandIdx(Opcode, R600::OpName::src1_neg),
2146 TII->getOperandIdx(Opcode, R600::OpName::src2_neg)
2147 };
2148 int AbsIdx[] = {
2149 TII->getOperandIdx(Opcode, R600::OpName::src0_abs),
2150 TII->getOperandIdx(Opcode, R600::OpName::src1_abs),
2151 -1
2152 };
2153 for (unsigned i = 0; i < 3; i++) {
2154 if (OperandIdx[i] < 0)
2155 return Node;
2156 SDValue &Src = Ops[OperandIdx[i] - 1];
2157 SDValue &Neg = Ops[NegIdx[i] - 1];
2158 SDValue FakeAbs;
2159 SDValue &Abs = (AbsIdx[i] > -1) ? Ops[AbsIdx[i] - 1] : FakeAbs;
2160 bool HasDst = TII->getOperandIdx(Opcode, R600::OpName::dst) > -1;
2161 int SelIdx = TII->getSelIdx(Opcode, OperandIdx[i]);
2162 int ImmIdx = TII->getOperandIdx(Opcode, R600::OpName::literal);
2163 if (HasDst) {
2164 SelIdx--;
2165 ImmIdx--;
2166 }
2167 SDValue &Sel = (SelIdx > -1) ? Ops[SelIdx] : FakeOp;
2168 SDValue &Imm = Ops[ImmIdx];
2169 if (FoldOperand(Node, i, Src, Neg, Abs, Sel, Imm, DAG))
2170 return DAG.getMachineNode(Opcode, SDLoc(Node), Node->getVTList(), Ops);
2171 }
2172 }
2173
2174 return Node;
2175}
2176
2178R600TargetLowering::shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const {
2179 switch (RMW->getOperation()) {
2182 // FIXME: Cayman at least appears to have instructions for this, but the
2183 // instruction defintions appear to be missing.
2185 default:
2186 break;
2187 }
2188
2190}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Function Alias Analysis Results
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
const HexagonInstrInfo * TII
static bool isUndef(ArrayRef< int > Mask)
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
const char LLVMTargetMachineRef TM
#define MO_FLAG_NEG
Definition: R600Defines.h:15
#define MO_FLAG_ABS
Definition: R600Defines.h:16
#define MO_FLAG_MASK
Definition: R600Defines.h:17
#define MO_FLAG_PUSH
Definition: R600Defines.h:18
static bool isEOP(MachineBasicBlock::iterator I)
static SDValue ReorganizeVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
static int ConstantAddressBlock(unsigned AddressSpace)
static SDValue CompactSwizzlableVector(SelectionDAG &DAG, SDValue VectorEntry, DenseMap< unsigned, unsigned > &RemapSwizzle)
R600 DAG Lowering interface definition.
Interface definition for R600InstrInfo.
Provides R600 specific target descriptions.
AMDGPU R600 specific subclass of TargetSubtarget.
The AMDGPU TargetMachine interface definition for hw codegen targets.
const SmallVectorImpl< MachineOperand > & Cond
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static std::optional< unsigned > getOpcode(ArrayRef< VPValue * > Values)
Returns the opcode of Values or ~0 if they do not all agree.
Definition: VPlanSLP.cpp:191
Value * RHS
Value * LHS
unsigned getStackWidth(const MachineFunction &MF) const
SDValue combineFMinMaxLegacy(const SDLoc &DL, EVT VT, SDValue LHS, SDValue RHS, SDValue True, SDValue False, SDValue CC, DAGCombinerInfo &DCI) const
Generate Min/Max node.
void analyzeFormalArgumentsCompute(CCState &State, const SmallVectorImpl< ISD::InputArg > &Ins) const
The SelectionDAGBuilder will automatically promote function arguments with illegal types.
AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
SDValue LowerSDIVREM(SDValue Op, SelectionDAG &DAG) const
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
uint32_t getImplicitParameterOffset(const MachineFunction &MF, const ImplicitParameter Param) const
Helper function that returns the byte offset of the given type of implicit parameter.
virtual SDValue LowerGlobalAddress(AMDGPUMachineFunction *MFI, SDValue Op, SelectionDAG &DAG) const
SDValue CreateLiveInRegisterRaw(SelectionDAG &DAG, const TargetRegisterClass *RC, Register Reg, EVT VT) const
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
void LowerUDIVREM64(SDValue Op, SelectionDAG &DAG, SmallVectorImpl< SDValue > &Results) const
float convertToFloat() const
Converts this APFloat to host float value.
Definition: APFloat.cpp:5268
APInt bitcastToAPInt() const
Definition: APFloat.h:1210
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1485
unsigned getSrcAddressSpace() const
unsigned getDestAddressSpace() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:727
@ UIncWrap
Increment one up to a maximum value.
Definition: Instructions.h:779
@ UDecWrap
Decrement one until a minimum value or zero.
Definition: Instructions.h:783
BinOp getOperation() const
Definition: Instructions.h:821
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
Register getLocReg() const
int64_t getLocMemOffset() const
const APFloat & getValueAPF() const
static ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
Definition: Constants.cpp:1691
uint64_t getZExtValue() const
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
bool empty() const
Definition: DenseMap.h:98
bool contains(const_arg_type_t< KeyT > Val) const
Return true if the specified key is in the map, false otherwise.
Definition: DenseMap.h:145
unsigned getAddressSpace() const
const GlobalValue * getGlobal() const
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
bool isIndexed() const
Return true if this is a pre/post inc/dec load/store.
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
ISD::LoadExtType getExtensionType() const
Return whether this is a plain node, or one of the varieties of value-extending loads.
Machine Value Type.
static auto integer_valuetypes()
DebugLoc findDebugLoc(instr_iterator MBBI)
Find the next valid DebugLoc starting at MBBI, skipping any debug instructions.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
Representation of each machine instruction.
Definition: MachineInstr.h:68
Flags
Flags values. These may be or'd together.
@ MODereferenceable
The memory access is dereferenceable (i.e., doesn't trap).
@ MONonTemporal
The memory access is non-temporal.
@ MOInvariant
The memory access always returns the same value (or traps).
Flags getFlags() const
Return the raw flags of the source value,.
const Value * getValue() const
Return the base address of the memory access.
MachineOperand class - Representation of each machine instruction operand.
const GlobalValue * getGlobal() const
unsigned getTargetFlags() const
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
An SDNode that represents everything that will be needed to construct a MachineInstr.
unsigned getAddressSpace() const
Return the address space for the associated pointer.
Align getAlign() const
AAMDNodes getAAInfo() const
Returns the AA info that describes the dereference.
MachineMemOperand * getMemOperand() const
Return a MachineMemOperand object describing the memory reference performed by operation.
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
static PointerType * get(Type *ElementType, unsigned AddressSpace)
This constructs a pointer to an object of the specified type in a numbered address space.
StackOffset getFrameIndexReference(const MachineFunction &MF, int FI, Register &FrameReg) const override
bool hasFFBH() const
bool hasFMA() const
bool hasBFI() const
Definition: R600Subtarget.h:88
const R600FrameLowering * getFrameLowering() const override
Definition: R600Subtarget.h:52
bool hasCARRY() const
const R600RegisterInfo * getRegisterInfo() const override
Definition: R600Subtarget.h:60
const R600InstrInfo * getInstrInfo() const override
Definition: R600Subtarget.h:50
bool hasBCNT(unsigned Size) const
Definition: R600Subtarget.h:92
bool hasBORROW() const
Definition: R600Subtarget.h:99
bool hasFFBL() const
bool hasBFE() const
Definition: R600Subtarget.h:84
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
This method will be invoked for all target nodes and for any target-independent nodes that the target...
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *BB) const override
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
bool canMergeStoresTo(unsigned AS, EVT MemVT, const MachineFunction &MF) const override
Returns if it's reasonable to merge stores to MemVT size.
R600TargetLowering(const TargetMachine &TM, const R600Subtarget &STI)
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &, EVT VT) const override
Return the ValueType of the result of SETCC operations.
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
This callback is invoked when a node result type is illegal for the target, and the operation was reg...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *IsFast=nullptr) const override
Determine if the target supports unaligned memory accesses.
CCAssignFn * CCAssignFnForCall(CallingConv::ID CC, bool IsVarArg) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &DL, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
XXX Only kernel functions are supported, so we can assume for now that every function is a kernel fun...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
uint64_t getAsZExtVal() const
Helper method returns the zero-extended integer value of a ConstantSDNode.
unsigned getMachineOpcode() const
This may only be called if isMachineOpcode returns true.
const SDValue & getOperand(unsigned Num) const
uint64_t getConstantOperandVal(unsigned Num) const
Helper method returns the integer value of a ConstantSDNode operand.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
op_iterator op_end() const
op_iterator op_begin() const
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
uint64_t getConstantOperandVal(unsigned i) const
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getExtLoad(ISD::LoadExtType ExtType, const SDLoc &dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:722
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getConstantFP(double Val, const SDLoc &DL, EVT VT, bool isTarget=false)
Create a ConstantFPSDNode wrapping a constant value.
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getNOT(const SDLoc &DL, SDValue Val, EVT VT)
Create a bitwise NOT operation as (XOR Val, -1).
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getBuildVector(EVT VT, const SDLoc &DL, ArrayRef< SDValue > Ops)
Return an ISD::BUILD_VECTOR node.
Definition: SelectionDAG.h:828
SDValue getZeroExtendInReg(SDValue Op, const SDLoc &DL, EVT VT)
Return the expression required to zero extend the Op value assuming it was the smaller SrcTy value.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getTruncStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getRegister(unsigned Reg, EVT VT)
SDValue getSelectCC(const SDLoc &DL, SDValue LHS, SDValue RHS, SDValue True, SDValue False, ISD::CondCode Cond)
Helper function to make it easier to build SelectCC's if you just have an ISD::CondCode instead of an...
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, uint64_t Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:676
SDValue getVectorIdxConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
void ReplaceAllUsesOfValueWith(SDValue From, SDValue To)
Replace any uses of From with To, leaving uses of other values produced by From.getNode() alone.
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getCondCode(ISD::CondCode Cond)
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:586
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
Definition: SmallVector.h:696
void push_back(const T &Elt)
Definition: SmallVector.h:426
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1209
StackOffset holds a fixed and a scalable offset in bytes.
Definition: TypeSize.h:33
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getValue() const
bool isTruncatingStore() const
Return true if the op does a truncation before store.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
const TargetMachine & getTargetMachine() const
void setHasExtractBitsInsn(bool hasExtractInsn=true)
Tells the code generator that the target has BitExtract instructions.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const
Return true if the specified condition code is legal on this target.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
bool isOperationLegal(unsigned Op, EVT VT) const
Return true if the specified operation is legal on this target.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setCondCodeAction(ArrayRef< ISD::CondCode > CCs, MVT VT, LegalizeAction Action)
Indicate that the specified condition code is or isn't supported on the target and indicate what to d...
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
void setSchedulingPreference(Sched::Preference Pref)
Specify the target scheduling preference.
SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const
SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const
Expands an unaligned store to 2 half-size stores for integer values, and possibly more for vectors.
bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const
Expand float(f32) to SINT(i64) conversion.
std::pair< SDValue, SDValue > scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const
Turn load of vector type into a load of the individual elements.
virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const
This method should be implemented by targets that mark instructions with the 'usesCustomInserter' fla...
void expandShiftParts(SDNode *N, SDValue &Lo, SDValue &Hi, SelectionDAG &DAG) const
Expand shift-by-parts.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
LLVM Value Representation.
Definition: Value.h:74
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ PARAM_I_ADDRESS
Address space for indirect addressable parameter memory (VTX1).
@ CONSTANT_ADDRESS
Address space for constant memory (VTX2).
@ FLAT_ADDRESS
Address space for flat memory.
@ GLOBAL_ADDRESS
Address space for global memory (RAT0, VTX0).
@ PRIVATE_ADDRESS
Address space for private memory.
@ BUILD_VERTICAL_VECTOR
This node is for VLIW targets and it is used to represent a vector that is stored in consecutive regi...
@ CONST_DATA_PTR
Pointer to the start of the shader's constant data.
constexpr char Args[]
Key for Kernel::Metadata::mArgs.
bool isShader(CallingConv::ID cc)
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
@ AMDGPU_CS
Used for Mesa/AMDPAL compute shaders.
Definition: CallingConv.h:197
@ AMDGPU_VS
Used for Mesa vertex shaders, or AMDPAL last shader stage before rasterization (vertex shader if tess...
Definition: CallingConv.h:188
@ AMDGPU_KERNEL
Used for AMDGPU code object kernels.
Definition: CallingConv.h:200
@ AMDGPU_HS
Used for Mesa/AMDPAL hull shaders (= tessellation control shaders).
Definition: CallingConv.h:206
@ AMDGPU_GS
Used for Mesa/AMDPAL geometry shaders.
Definition: CallingConv.h:191
@ AMDGPU_PS
Used for Mesa/AMDPAL pixel shaders.
Definition: CallingConv.h:194
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ SPIR_KERNEL
Used for SPIR kernel functions.
Definition: CallingConv.h:144
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ AMDGPU_ES
Used for AMDPAL shader stage before geometry shader if geometry is in use.
Definition: CallingConv.h:218
@ AMDGPU_LS
Used for AMDPAL vertex shader if tessellation is in use.
Definition: CallingConv.h:213
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
bool isNON_EXTLoad(const SDNode *N)
Returns true if the specified node is a non-extending load.
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:750
@ MERGE_VALUES
MERGE_VALUES - This node takes multiple discrete operands and returns them all as its individual resu...
Definition: ISDOpcodes.h:236
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:723
@ ATOMIC_STORE
OUTCHAIN = ATOMIC_STORE(INCHAIN, ptr, val) This corresponds to "store atomic" instruction.
Definition: ISDOpcodes.h:1239
@ ADDC
Carry-setting nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:269
@ FMAD
FMAD - Perform a * b + c, while getting the same result as the separately rounded operations.
Definition: ISDOpcodes.h:487
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:1029
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:783
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:483
@ INTRINSIC_VOID
OUTCHAIN = INTRINSIC_VOID(INCHAIN, INTRINSICID, arg1, arg2, ...) This node represents a target intrin...
Definition: ISDOpcodes.h:199
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ FADD
Simple binary floating point operators.
Definition: ISDOpcodes.h:390
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:903
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:774
@ CTTZ_ZERO_UNDEF
Bit counting operators with an undefined result for zero inputs.
Definition: ISDOpcodes.h:722
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:928
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1075
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:727
@ ATOMIC_LOAD
Val, OUTCHAIN = ATOMIC_LOAD(INCHAIN, ptr) This corresponds to "load atomic" instruction.
Definition: ISDOpcodes.h:1235
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:705
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:535
@ SELECT_CC
Select with condition operator - This selects between a true value and a false value (ops #2 and #3) ...
Definition: ISDOpcodes.h:742
@ SIGN_EXTEND_INREG
SIGN_EXTEND_INREG - This operator atomically performs a SHL/SRA pair to sign extend a small value in ...
Definition: ISDOpcodes.h:798
@ FP_TO_SINT
FP_TO_[US]INT - Convert a floating point value to a signed or unsigned integer.
Definition: ISDOpcodes.h:836
@ AND
Bitwise operators - logical and, logical or, logical xor.
Definition: ISDOpcodes.h:680
@ INTRINSIC_WO_CHAIN
RESULT = INTRINSIC_WO_CHAIN(INTRINSICID, arg1, arg2, ...) This node represents a target intrinsic fun...
Definition: ISDOpcodes.h:184
@ ADDE
Carry-using nodes for multiple precision addition and subtraction.
Definition: ISDOpcodes.h:279
@ INSERT_VECTOR_ELT
INSERT_VECTOR_ELT(VECTOR, VAL, IDX) - Returns VECTOR with the element at IDX replaced with VAL.
Definition: ISDOpcodes.h:524
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:869
@ ADDRSPACECAST
ADDRSPACECAST - This operator converts between pointers of different address spaces.
Definition: ISDOpcodes.h:907
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:786
@ BRCOND
BRCOND - Conditional branch.
Definition: ISDOpcodes.h:1068
@ SHL_PARTS
SHL_PARTS/SRA_PARTS/SRL_PARTS - These operators are used for expanded integer shift operations.
Definition: ISDOpcodes.h:763
@ FCOPYSIGN
FCOPYSIGN(X, Y) - Return the value of X with the sign of Y.
Definition: ISDOpcodes.h:493
@ BUILD_VECTOR
BUILD_VECTOR(ELT0, ELT1, ELT2, ELT3,...) - Return a fixed-width vector with the specified,...
Definition: ISDOpcodes.h:515
CondCode getSetCCInverse(CondCode Operation, EVT Type)
Return the operation corresponding to !(X op Y), where 'op' is a valid SetCC operation.
CondCode getSetCCSwappedOperands(CondCode Operation)
Return the operation corresponding to (Y op X) when given the operation for (X op Y).
CondCode
ISD::CondCode enum - These are ordered carefully to make the bitfields below work out,...
Definition: ISDOpcodes.h:1512
LoadExtType
LoadExtType enum - This enum defines the three variants of LOADEXT (load with extension).
Definition: ISDOpcodes.h:1492
int getLDSNoRetOp(uint16_t Opcode)
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
constexpr float pif
Definition: MathExtras.h:52
constexpr double e
Definition: MathExtras.h:31
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:329
@ Offset
Definition: DWP.cpp:456
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
bool isNullConstant(SDValue V)
Returns true if V is a constant integer zero.
AddressSpace
Definition: NVPTXBaseInfo.h:21
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
bool CCAssignFn(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
CCAssignFn - This function assigns a location for Val, updating State to reflect the change.
@ AfterLegalizeVectorOps
Definition: DAGCombine.h:18
DWARFExpression::Operation Op
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
bool isAllOnesConstant(SDValue V)
Returns true if V is an integer constant with all bits set.
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:373
bool isSimple() const
Test if the given EVT is simple (as opposed to being extended).
Definition: ValueTypes.h:129
bool bitsGT(EVT VT) const
Return true if this has more bits than VT.
Definition: ValueTypes.h:267
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:283
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:351
uint64_t getScalarSizeInBits() const
Definition: ValueTypes.h:363
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:299
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:160
EVT getScalarType() const
If this is a vector type, return the element type, otherwise return this.
Definition: ValueTypes.h:306
bool bitsGE(EVT VT) const
Return true if this has no less bits than VT.
Definition: ValueTypes.h:275
Type * getTypeForEVT(LLVMContext &Context) const
This method returns an LLVM type corresponding to the specified EVT.
Definition: ValueTypes.cpp:202
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:311
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:319
bool bitsLE(EVT VT) const
Return true if this has no more bits than VT.
Definition: ValueTypes.h:291
InputArg - This struct carries flags and type information about a single incoming (formal) argument o...
This class contains a discriminated union of information about pointers in memory operands,...