LLVM 17.0.0git
VEISelLowering.cpp
Go to the documentation of this file.
1//===-- VEISelLowering.cpp - VE DAG Lowering Implementation ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the interfaces that VE uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "VEISelLowering.h"
16#include "VECustomDAG.h"
17#include "VEInstrBuilder.h"
19#include "VERegisterInfo.h"
20#include "VETargetMachine.h"
32#include "llvm/IR/Function.h"
33#include "llvm/IR/IRBuilder.h"
34#include "llvm/IR/Module.h"
37using namespace llvm;
38
39#define DEBUG_TYPE "ve-lower"
40
41//===----------------------------------------------------------------------===//
42// Calling Convention Implementation
43//===----------------------------------------------------------------------===//
44
45#include "VEGenCallingConv.inc"
46
48 switch (CallConv) {
49 default:
50 return RetCC_VE_C;
52 return RetCC_VE_Fast;
53 }
54}
55
56CCAssignFn *getParamCC(CallingConv::ID CallConv, bool IsVarArg) {
57 if (IsVarArg)
58 return CC_VE2;
59 switch (CallConv) {
60 default:
61 return CC_VE_C;
63 return CC_VE_Fast;
64 }
65}
66
68 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
69 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
70 CCAssignFn *RetCC = getReturnCC(CallConv);
72 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
73 return CCInfo.CheckReturn(Outs, RetCC);
74}
75
78
80
82
83void VETargetLowering::initRegisterClasses() {
84 // Set up the register classes.
85 addRegisterClass(MVT::i32, &VE::I32RegClass);
86 addRegisterClass(MVT::i64, &VE::I64RegClass);
87 addRegisterClass(MVT::f32, &VE::F32RegClass);
88 addRegisterClass(MVT::f64, &VE::I64RegClass);
89 addRegisterClass(MVT::f128, &VE::F128RegClass);
90
91 if (Subtarget->enableVPU()) {
92 for (MVT VecVT : AllVectorVTs)
93 addRegisterClass(VecVT, &VE::V64RegClass);
94 addRegisterClass(MVT::v256i1, &VE::VMRegClass);
95 addRegisterClass(MVT::v512i1, &VE::VM512RegClass);
96 }
97}
98
99void VETargetLowering::initSPUActions() {
100 const auto &TM = getTargetMachine();
101 /// Load & Store {
102
103 // VE doesn't have i1 sign extending load.
104 for (MVT VT : MVT::integer_valuetypes()) {
109 }
110
111 // VE doesn't have floating point extload/truncstore, so expand them.
112 for (MVT FPVT : MVT::fp_valuetypes()) {
113 for (MVT OtherFPVT : MVT::fp_valuetypes()) {
114 setLoadExtAction(ISD::EXTLOAD, FPVT, OtherFPVT, Expand);
115 setTruncStoreAction(FPVT, OtherFPVT, Expand);
116 }
117 }
118
119 // VE doesn't have fp128 load/store, so expand them in custom lower.
122
123 /// } Load & Store
124
125 // Custom legalize address nodes into LO/HI parts.
126 MVT PtrVT = MVT::getIntegerVT(TM.getPointerSizeInBits(0));
132
133 /// VAARG handling {
135 // VAARG needs to be lowered to access with 8 bytes alignment.
137 // Use the default implementation.
140 /// } VAARG handling
141
142 /// Stack {
145
146 // Use the default implementation.
149 /// } Stack
150
151 /// Branch {
152
153 // VE doesn't have BRCOND
155
156 // BR_JT is not implemented yet.
158
159 /// } Branch
160
161 /// Int Ops {
162 for (MVT IntVT : {MVT::i32, MVT::i64}) {
163 // VE has no REM or DIVREM operations.
168
169 // VE has no SHL_PARTS/SRA_PARTS/SRL_PARTS operations.
173
174 // VE has no MULHU/S or U/SMUL_LOHI operations.
175 // TODO: Use MPD instruction to implement SMUL_LOHI for i32 type.
180
181 // VE has no CTTZ, ROTL, ROTR operations.
185
186 // VE has 64 bits instruction which works as i64 BSWAP operation. This
187 // instruction works fine as i32 BSWAP operation with an additional
188 // parameter. Use isel patterns to lower BSWAP.
190
191 // VE has only 64 bits instructions which work as i64 BITREVERSE/CTLZ/CTPOP
192 // operations. Use isel patterns for i64, promote for i32.
193 LegalizeAction Act = (IntVT == MVT::i32) ? Promote : Legal;
195 setOperationAction(ISD::CTLZ, IntVT, Act);
197 setOperationAction(ISD::CTPOP, IntVT, Act);
198
199 // VE has only 64 bits instructions which work as i64 AND/OR/XOR operations.
200 // Use isel patterns for i64, promote for i32.
201 setOperationAction(ISD::AND, IntVT, Act);
202 setOperationAction(ISD::OR, IntVT, Act);
203 setOperationAction(ISD::XOR, IntVT, Act);
204
205 // Legal smax and smin
208 }
209 /// } Int Ops
210
211 /// Conversion {
212 // VE doesn't have instructions for fp<->uint, so expand them by llvm
217
218 // fp16 not supported
219 for (MVT FPVT : MVT::fp_valuetypes()) {
222 }
223 /// } Conversion
224
225 /// Floating-point Ops {
226 /// Note: Floating-point operations are fneg, fadd, fsub, fmul, fdiv, frem,
227 /// and fcmp.
228
229 // VE doesn't have following floating point operations.
230 for (MVT VT : MVT::fp_valuetypes()) {
233 }
234
235 // VE doesn't have fdiv of f128.
237
238 for (MVT FPVT : {MVT::f32, MVT::f64}) {
239 // f32 and f64 uses ConstantFP. f128 uses ConstantPool.
241 }
242 /// } Floating-point Ops
243
244 /// Floating-point math functions {
245
246 // VE doesn't have following floating point math functions.
247 for (MVT VT : MVT::fp_valuetypes()) {
255 }
256
257 // VE has single and double FMINNUM and FMAXNUM
258 for (MVT VT : {MVT::f32, MVT::f64}) {
260 }
261
262 /// } Floating-point math functions
263
264 /// Atomic instructions {
265
269
270 // Use custom inserter for ATOMIC_FENCE.
272
273 // Other atomic instructions.
274 for (MVT VT : MVT::integer_valuetypes()) {
275 // Support i8/i16 atomic swap.
277
278 // FIXME: Support "atmam" instructions.
283
284 // VE doesn't have follwing instructions.
293 }
294
295 /// } Atomic instructions
296
297 /// SJLJ instructions {
301 if (TM.Options.ExceptionModel == ExceptionHandling::SjLj)
302 setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
303 /// } SJLJ instructions
304
305 // Intrinsic instructions
307}
308
309void VETargetLowering::initVPUActions() {
310 for (MVT LegalMaskVT : AllMaskVTs)
312
313 for (unsigned Opc : {ISD::AND, ISD::OR, ISD::XOR})
315
316 for (MVT LegalVecVT : AllVectorVTs) {
320 // Translate all vector instructions with legal element types to VVP_*
321 // nodes.
322 // TODO We will custom-widen into VVP_* nodes in the future. While we are
323 // buildling the infrastructure for this, we only do this for legal vector
324 // VTs.
325#define HANDLE_VP_TO_VVP(VP_OPC, VVP_NAME) \
326 setOperationAction(ISD::VP_OPC, LegalVecVT, Custom);
327#define ADD_VVP_OP(VVP_NAME, ISD_NAME) \
328 setOperationAction(ISD::ISD_NAME, LegalVecVT, Custom);
329 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_LOAD, LegalVecVT, Custom);
330 setOperationAction(ISD::EXPERIMENTAL_VP_STRIDED_STORE, LegalVecVT, Custom);
331#include "VVPNodes.def"
332 }
333
334 for (MVT LegalPackedVT : AllPackedVTs) {
337 }
338
339 // vNt32, vNt64 ops (legal element types)
340 for (MVT VT : MVT::vector_valuetypes()) {
341 MVT ElemVT = VT.getVectorElementType();
342 unsigned ElemBits = ElemVT.getScalarSizeInBits();
343 if (ElemBits != 32 && ElemBits != 64)
344 continue;
345
346 for (unsigned MemOpc : {ISD::MLOAD, ISD::MSTORE, ISD::LOAD, ISD::STORE})
347 setOperationAction(MemOpc, VT, Custom);
348
349 const ISD::NodeType IntReductionOCs[] = {
353
354 for (unsigned IntRedOpc : IntReductionOCs)
355 setOperationAction(IntRedOpc, VT, Custom);
356 }
357
358 // v256i1 and v512i1 ops
359 for (MVT MaskVT : AllMaskVTs) {
360 // Custom lower mask ops
363 }
364}
365
368 bool IsVarArg,
370 const SmallVectorImpl<SDValue> &OutVals,
371 const SDLoc &DL, SelectionDAG &DAG) const {
372 // CCValAssign - represent the assignment of the return value to locations.
374
375 // CCState - Info about the registers and stack slot.
376 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), RVLocs,
377 *DAG.getContext());
378
379 // Analyze return values.
380 CCInfo.AnalyzeReturn(Outs, getReturnCC(CallConv));
381
382 SDValue Flag;
383 SmallVector<SDValue, 4> RetOps(1, Chain);
384
385 // Copy the result values into the output registers.
386 for (unsigned i = 0; i != RVLocs.size(); ++i) {
387 CCValAssign &VA = RVLocs[i];
388 assert(VA.isRegLoc() && "Can only return in registers!");
389 assert(!VA.needsCustom() && "Unexpected custom lowering");
390 SDValue OutVal = OutVals[i];
391
392 // Integer return values must be sign or zero extended by the callee.
393 switch (VA.getLocInfo()) {
395 break;
397 OutVal = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), OutVal);
398 break;
400 OutVal = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), OutVal);
401 break;
403 OutVal = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), OutVal);
404 break;
405 case CCValAssign::BCvt: {
406 // Convert a float return value to i64 with padding.
407 // 63 31 0
408 // +------+------+
409 // | float| 0 |
410 // +------+------+
411 assert(VA.getLocVT() == MVT::i64);
412 assert(VA.getValVT() == MVT::f32);
413 SDValue Undef = SDValue(
414 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
415 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
416 OutVal = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
417 MVT::i64, Undef, OutVal, Sub_f32),
418 0);
419 break;
420 }
421 default:
422 llvm_unreachable("Unknown loc info!");
423 }
424
425 Chain = DAG.getCopyToReg(Chain, DL, VA.getLocReg(), OutVal, Flag);
426
427 // Guarantee that all emitted copies are stuck together with flags.
428 Flag = Chain.getValue(1);
429 RetOps.push_back(DAG.getRegister(VA.getLocReg(), VA.getLocVT()));
430 }
431
432 RetOps[0] = Chain; // Update chain.
433
434 // Add the flag if we have it.
435 if (Flag.getNode())
436 RetOps.push_back(Flag);
437
438 return DAG.getNode(VEISD::RET_FLAG, DL, MVT::Other, RetOps);
439}
440
442 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
443 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
444 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
446
447 // Get the base offset of the incoming arguments stack space.
448 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
449 // Get the size of the preserved arguments area
450 unsigned ArgsPreserved = 64;
451
452 // Analyze arguments according to CC_VE.
454 CCState CCInfo(CallConv, IsVarArg, DAG.getMachineFunction(), ArgLocs,
455 *DAG.getContext());
456 // Allocate the preserved area first.
457 CCInfo.AllocateStack(ArgsPreserved, Align(8));
458 // We already allocated the preserved area, so the stack offset computed
459 // by CC_VE would be correct now.
460 CCInfo.AnalyzeFormalArguments(Ins, getParamCC(CallConv, false));
461
462 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
463 CCValAssign &VA = ArgLocs[i];
464 assert(!VA.needsCustom() && "Unexpected custom lowering");
465 if (VA.isRegLoc()) {
466 // This argument is passed in a register.
467 // All integer register arguments are promoted by the caller to i64.
468
469 // Create a virtual register for the promoted live-in value.
470 Register VReg =
472 SDValue Arg = DAG.getCopyFromReg(Chain, DL, VReg, VA.getLocVT());
473
474 // The caller promoted the argument, so insert an Assert?ext SDNode so we
475 // won't promote the value again in this function.
476 switch (VA.getLocInfo()) {
479 DAG.getValueType(VA.getValVT()));
480 break;
483 DAG.getValueType(VA.getValVT()));
484 break;
485 case CCValAssign::BCvt: {
486 // Extract a float argument from i64 with padding.
487 // 63 31 0
488 // +------+------+
489 // | float| 0 |
490 // +------+------+
491 assert(VA.getLocVT() == MVT::i64);
492 assert(VA.getValVT() == MVT::f32);
493 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
494 Arg = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
495 MVT::f32, Arg, Sub_f32),
496 0);
497 break;
498 }
499 default:
500 break;
501 }
502
503 // Truncate the register down to the argument type.
504 if (VA.isExtInLoc())
505 Arg = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), Arg);
506
507 InVals.push_back(Arg);
508 continue;
509 }
510
511 // The registers are exhausted. This argument was passed on the stack.
512 assert(VA.isMemLoc());
513 // The CC_VE_Full/Half functions compute stack offsets relative to the
514 // beginning of the arguments area at %fp + the size of reserved area.
515 unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
516 unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
517
518 // Adjust offset for a float argument by adding 4 since the argument is
519 // stored in 8 bytes buffer with offset like below. LLVM generates
520 // 4 bytes load instruction, so need to adjust offset here. This
521 // adjustment is required in only LowerFormalArguments. In LowerCall,
522 // a float argument is converted to i64 first, and stored as 8 bytes
523 // data, which is required by ABI, so no need for adjustment.
524 // 0 4
525 // +------+------+
526 // | empty| float|
527 // +------+------+
528 if (VA.getValVT() == MVT::f32)
529 Offset += 4;
530
531 int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
532 InVals.push_back(
533 DAG.getLoad(VA.getValVT(), DL, Chain,
536 }
537
538 if (!IsVarArg)
539 return Chain;
540
541 // This function takes variable arguments, some of which may have been passed
542 // in registers %s0-%s8.
543 //
544 // The va_start intrinsic needs to know the offset to the first variable
545 // argument.
546 // TODO: need to calculate offset correctly once we support f128.
547 unsigned ArgOffset = ArgLocs.size() * 8;
549 // Skip the reserved area at the top of stack.
550 FuncInfo->setVarArgsFrameOffset(ArgOffset + ArgsBaseOffset);
551
552 return Chain;
553}
554
555// FIXME? Maybe this could be a TableGen attribute on some registers and
556// this table could be generated automatically from RegInfo.
558 const MachineFunction &MF) const {
560 .Case("sp", VE::SX11) // Stack pointer
561 .Case("fp", VE::SX9) // Frame pointer
562 .Case("sl", VE::SX8) // Stack limit
563 .Case("lr", VE::SX10) // Link register
564 .Case("tp", VE::SX14) // Thread pointer
565 .Case("outer", VE::SX12) // Outer regiser
566 .Case("info", VE::SX17) // Info area register
567 .Case("got", VE::SX15) // Global offset table register
568 .Case("plt", VE::SX16) // Procedure linkage table register
569 .Default(0);
570
571 if (Reg)
572 return Reg;
573
574 report_fatal_error("Invalid register name global variable");
575}
576
577//===----------------------------------------------------------------------===//
578// TargetLowering Implementation
579//===----------------------------------------------------------------------===//
580
582 SmallVectorImpl<SDValue> &InVals) const {
583 SelectionDAG &DAG = CLI.DAG;
584 SDLoc DL = CLI.DL;
585 SDValue Chain = CLI.Chain;
586 auto PtrVT = getPointerTy(DAG.getDataLayout());
587
588 // VE target does not yet support tail call optimization.
589 CLI.IsTailCall = false;
590
591 // Get the base offset of the outgoing arguments stack space.
592 unsigned ArgsBaseOffset = Subtarget->getRsaSize();
593 // Get the size of the preserved arguments area
594 unsigned ArgsPreserved = 8 * 8u;
595
596 // Analyze operands of the call, assigning locations to each operand.
598 CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
599 *DAG.getContext());
600 // Allocate the preserved area first.
601 CCInfo.AllocateStack(ArgsPreserved, Align(8));
602 // We already allocated the preserved area, so the stack offset computed
603 // by CC_VE would be correct now.
604 CCInfo.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, false));
605
606 // VE requires to use both register and stack for varargs or no-prototyped
607 // functions.
608 bool UseBoth = CLI.IsVarArg;
609
610 // Analyze operands again if it is required to store BOTH.
612 CCState CCInfo2(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(),
613 ArgLocs2, *DAG.getContext());
614 if (UseBoth)
615 CCInfo2.AnalyzeCallOperands(CLI.Outs, getParamCC(CLI.CallConv, true));
616
617 // Get the size of the outgoing arguments stack space requirement.
618 unsigned ArgsSize = CCInfo.getNextStackOffset();
619
620 // Keep stack frames 16-byte aligned.
621 ArgsSize = alignTo(ArgsSize, 16);
622
623 // Adjust the stack pointer to make room for the arguments.
624 // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
625 // with more than 6 arguments.
626 Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
627
628 // Collect the set of registers to pass to the function and their values.
629 // This will be emitted as a sequence of CopyToReg nodes glued to the call
630 // instruction.
632
633 // Collect chains from all the memory opeations that copy arguments to the
634 // stack. They must follow the stack pointer adjustment above and precede the
635 // call instruction itself.
636 SmallVector<SDValue, 8> MemOpChains;
637
638 // VE needs to get address of callee function in a register
639 // So, prepare to copy it to SX12 here.
640
641 // If the callee is a GlobalAddress node (quite common, every direct call is)
642 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
643 // Likewise ExternalSymbol -> TargetExternalSymbol.
644 SDValue Callee = CLI.Callee;
645
646 bool IsPICCall = isPositionIndependent();
647
648 // PC-relative references to external symbols should go through $stub.
649 // If so, we need to prepare GlobalBaseReg first.
650 const TargetMachine &TM = DAG.getTarget();
652 const GlobalValue *GV = nullptr;
653 auto *CalleeG = dyn_cast<GlobalAddressSDNode>(Callee);
654 if (CalleeG)
655 GV = CalleeG->getGlobal();
656 bool Local = TM.shouldAssumeDSOLocal(*Mod, GV);
657 bool UsePlt = !Local;
659
660 // Turn GlobalAddress/ExternalSymbol node into a value node
661 // containing the address of them here.
662 if (CalleeG) {
663 if (IsPICCall) {
664 if (UsePlt)
665 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
666 Callee = DAG.getTargetGlobalAddress(GV, DL, PtrVT, 0, 0);
667 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
668 } else {
669 Callee =
671 }
672 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Callee)) {
673 if (IsPICCall) {
674 if (UsePlt)
675 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
676 Callee = DAG.getTargetExternalSymbol(E->getSymbol(), PtrVT, 0);
677 Callee = DAG.getNode(VEISD::GETFUNPLT, DL, PtrVT, Callee);
678 } else {
679 Callee =
681 }
682 }
683
684 RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
685
686 for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
687 CCValAssign &VA = ArgLocs[i];
688 SDValue Arg = CLI.OutVals[i];
689
690 // Promote the value if needed.
691 switch (VA.getLocInfo()) {
692 default:
693 llvm_unreachable("Unknown location info!");
695 break;
697 Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
698 break;
700 Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
701 break;
703 Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
704 break;
705 case CCValAssign::BCvt: {
706 // Convert a float argument to i64 with padding.
707 // 63 31 0
708 // +------+------+
709 // | float| 0 |
710 // +------+------+
711 assert(VA.getLocVT() == MVT::i64);
712 assert(VA.getValVT() == MVT::f32);
713 SDValue Undef = SDValue(
714 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::i64), 0);
715 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
716 Arg = SDValue(DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL,
717 MVT::i64, Undef, Arg, Sub_f32),
718 0);
719 break;
720 }
721 }
722
723 if (VA.isRegLoc()) {
724 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
725 if (!UseBoth)
726 continue;
727 VA = ArgLocs2[i];
728 }
729
730 assert(VA.isMemLoc());
731
732 // Create a store off the stack pointer for this argument.
733 SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
734 // The argument area starts at %fp/%sp + the size of reserved area.
735 SDValue PtrOff =
736 DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
737 PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
738 MemOpChains.push_back(
739 DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
740 }
741
742 // Emit all stores, make sure they occur before the call.
743 if (!MemOpChains.empty())
744 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
745
746 // Build a sequence of CopyToReg nodes glued together with token chain and
747 // glue operands which copy the outgoing args into registers. The InGlue is
748 // necessary since all emitted instructions must be stuck together in order
749 // to pass the live physical registers.
750 SDValue InGlue;
751 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
752 Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
753 RegsToPass[i].second, InGlue);
754 InGlue = Chain.getValue(1);
755 }
756
757 // Build the operands for the call instruction itself.
759 Ops.push_back(Chain);
760 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
761 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
762 RegsToPass[i].second.getValueType()));
763
764 // Add a register mask operand representing the call-preserved registers.
765 const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
766 const uint32_t *Mask =
767 TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
768 assert(Mask && "Missing call preserved mask for calling convention");
769 Ops.push_back(DAG.getRegisterMask(Mask));
770
771 // Make sure the CopyToReg nodes are glued to the call instruction which
772 // consumes the registers.
773 if (InGlue.getNode())
774 Ops.push_back(InGlue);
775
776 // Now the call itself.
777 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
778 Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
779 InGlue = Chain.getValue(1);
780
781 // Revert the stack pointer immediately after the call.
782 Chain = DAG.getCALLSEQ_END(Chain, ArgsSize, 0, InGlue, DL);
783 InGlue = Chain.getValue(1);
784
785 // Now extract the return values. This is more or less the same as
786 // LowerFormalArguments.
787
788 // Assign locations to each value returned by this call.
790 CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
791 *DAG.getContext());
792
793 // Set inreg flag manually for codegen generated library calls that
794 // return float.
795 if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CB)
796 CLI.Ins[0].Flags.setInReg();
797
798 RVInfo.AnalyzeCallResult(CLI.Ins, getReturnCC(CLI.CallConv));
799
800 // Copy all of the result registers out of their specified physreg.
801 for (unsigned i = 0; i != RVLocs.size(); ++i) {
802 CCValAssign &VA = RVLocs[i];
803 assert(!VA.needsCustom() && "Unexpected custom lowering");
804 Register Reg = VA.getLocReg();
805
806 // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
807 // reside in the same register in the high and low bits. Reuse the
808 // CopyFromReg previous node to avoid duplicate copies.
809 SDValue RV;
810 if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
811 if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
812 RV = Chain.getValue(0);
813
814 // But usually we'll create a new CopyFromReg for a different register.
815 if (!RV.getNode()) {
816 RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
817 Chain = RV.getValue(1);
818 InGlue = Chain.getValue(2);
819 }
820
821 // The callee promoted the return value, so insert an Assert?ext SDNode so
822 // we won't promote the value again in this function.
823 switch (VA.getLocInfo()) {
825 RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
826 DAG.getValueType(VA.getValVT()));
827 break;
829 RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
830 DAG.getValueType(VA.getValVT()));
831 break;
832 case CCValAssign::BCvt: {
833 // Extract a float return value from i64 with padding.
834 // 63 31 0
835 // +------+------+
836 // | float| 0 |
837 // +------+------+
838 assert(VA.getLocVT() == MVT::i64);
839 assert(VA.getValVT() == MVT::f32);
840 SDValue Sub_f32 = DAG.getTargetConstant(VE::sub_f32, DL, MVT::i32);
841 RV = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
842 MVT::f32, RV, Sub_f32),
843 0);
844 break;
845 }
846 default:
847 break;
848 }
849
850 // Truncate the register down to the return value type.
851 if (VA.isExtInLoc())
852 RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
853
854 InVals.push_back(RV);
855 }
856
857 return Chain;
858}
859
861 const GlobalAddressSDNode *GA) const {
862 // VE uses 64 bit addressing, so we need multiple instructions to generate
863 // an address. Folding address with offset increases the number of
864 // instructions, so that we disable it here. Offsets will be folded in
865 // the DAG combine later if it worth to do so.
866 return false;
867}
868
869/// isFPImmLegal - Returns true if the target can instruction select the
870/// specified FP immediate natively. If false, the legalizer will
871/// materialize the FP immediate as a load from a constant pool.
873 bool ForCodeSize) const {
874 return VT == MVT::f32 || VT == MVT::f64;
875}
876
877/// Determine if the target supports unaligned memory accesses.
878///
879/// This function returns true if the target allows unaligned memory accesses
880/// of the specified type in the given address space. If true, it also returns
881/// whether the unaligned memory access is "fast" in the last argument by
882/// reference. This is used, for example, in situations where an array
883/// copy/move/set is converted to a sequence of store operations. Its use
884/// helps to ensure that such replacements don't generate code that causes an
885/// alignment error (trap) on the target machine.
887 unsigned AddrSpace,
888 Align A,
890 unsigned *Fast) const {
891 if (Fast) {
892 // It's fast anytime on VE
893 *Fast = 1;
894 }
895 return true;
896}
897
899 const VESubtarget &STI)
900 : TargetLowering(TM), Subtarget(&STI) {
901 // Instructions which use registers as conditionals examine all the
902 // bits (as does the pseudo SELECT_CC expansion). I don't think it
903 // matters much whether it's ZeroOrOneBooleanContent, or
904 // ZeroOrNegativeOneBooleanContent, so, arbitrarily choose the
905 // former.
908
909 initRegisterClasses();
910 initSPUActions();
911 initVPUActions();
912
914
915 // We have target-specific dag combine patterns for the following nodes:
919
920 // Set function alignment to 16 bytes
922
923 // VE stores all argument by 8 bytes alignment
925
927}
928
929const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
930#define TARGET_NODE_CASE(NAME) \
931 case VEISD::NAME: \
932 return "VEISD::" #NAME;
933 switch ((VEISD::NodeType)Opcode) {
935 break;
936 TARGET_NODE_CASE(CMPI)
937 TARGET_NODE_CASE(CMPU)
938 TARGET_NODE_CASE(CMPF)
939 TARGET_NODE_CASE(CMPQ)
940 TARGET_NODE_CASE(CMOV)
941 TARGET_NODE_CASE(CALL)
942 TARGET_NODE_CASE(EH_SJLJ_LONGJMP)
943 TARGET_NODE_CASE(EH_SJLJ_SETJMP)
944 TARGET_NODE_CASE(EH_SJLJ_SETUP_DISPATCH)
945 TARGET_NODE_CASE(GETFUNPLT)
946 TARGET_NODE_CASE(GETSTACKTOP)
947 TARGET_NODE_CASE(GETTLSADDR)
948 TARGET_NODE_CASE(GLOBAL_BASE_REG)
951 TARGET_NODE_CASE(RET_FLAG)
952 TARGET_NODE_CASE(TS1AM)
953 TARGET_NODE_CASE(VEC_UNPACK_LO)
954 TARGET_NODE_CASE(VEC_UNPACK_HI)
955 TARGET_NODE_CASE(VEC_PACK)
956 TARGET_NODE_CASE(VEC_BROADCAST)
957 TARGET_NODE_CASE(REPL_I32)
958 TARGET_NODE_CASE(REPL_F32)
959
960 TARGET_NODE_CASE(LEGALAVL)
961
962 // Register the VVP_* SDNodes.
963#define ADD_VVP_OP(VVP_NAME, ...) TARGET_NODE_CASE(VVP_NAME)
964#include "VVPNodes.def"
965 }
966#undef TARGET_NODE_CASE
967 return nullptr;
968}
969
971 EVT VT) const {
972 return MVT::i32;
973}
974
975// Convert to a target node and set target flags.
977 SelectionDAG &DAG) const {
978 if (const GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op))
979 return DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(GA),
980 GA->getValueType(0), GA->getOffset(), TF);
981
982 if (const BlockAddressSDNode *BA = dyn_cast<BlockAddressSDNode>(Op))
983 return DAG.getTargetBlockAddress(BA->getBlockAddress(), Op.getValueType(),
984 0, TF);
985
986 if (const ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(Op))
987 return DAG.getTargetConstantPool(CP->getConstVal(), CP->getValueType(0),
988 CP->getAlign(), CP->getOffset(), TF);
989
990 if (const ExternalSymbolSDNode *ES = dyn_cast<ExternalSymbolSDNode>(Op))
991 return DAG.getTargetExternalSymbol(ES->getSymbol(), ES->getValueType(0),
992 TF);
993
994 if (const JumpTableSDNode *JT = dyn_cast<JumpTableSDNode>(Op))
995 return DAG.getTargetJumpTable(JT->getIndex(), JT->getValueType(0), TF);
996
997 llvm_unreachable("Unhandled address SDNode");
998}
999
1000// Split Op into high and low parts according to HiTF and LoTF.
1001// Return an ADD node combining the parts.
1002SDValue VETargetLowering::makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF,
1003 SelectionDAG &DAG) const {
1004 SDLoc DL(Op);
1005 EVT VT = Op.getValueType();
1006 SDValue Hi = DAG.getNode(VEISD::Hi, DL, VT, withTargetFlags(Op, HiTF, DAG));
1007 SDValue Lo = DAG.getNode(VEISD::Lo, DL, VT, withTargetFlags(Op, LoTF, DAG));
1008 return DAG.getNode(ISD::ADD, DL, VT, Hi, Lo);
1009}
1010
1011// Build SDNodes for producing an address from a GlobalAddress, ConstantPool,
1012// or ExternalSymbol SDNode.
1014 SDLoc DL(Op);
1015 EVT PtrVT = Op.getValueType();
1016
1017 // Handle PIC mode first. VE needs a got load for every variable!
1018 if (isPositionIndependent()) {
1019 auto GlobalN = dyn_cast<GlobalAddressSDNode>(Op);
1020
1021 if (isa<ConstantPoolSDNode>(Op) || isa<JumpTableSDNode>(Op) ||
1022 (GlobalN && GlobalN->getGlobal()->hasLocalLinkage())) {
1023 // Create following instructions for local linkage PIC code.
1024 // lea %reg, label@gotoff_lo
1025 // and %reg, %reg, (32)0
1026 // lea.sl %reg, label@gotoff_hi(%reg, %got)
1029 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1030 return DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1031 }
1032 // Create following instructions for not local linkage PIC code.
1033 // lea %reg, label@got_lo
1034 // and %reg, %reg, (32)0
1035 // lea.sl %reg, label@got_hi(%reg)
1036 // ld %reg, (%reg, %got)
1039 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrVT);
1040 SDValue AbsAddr = DAG.getNode(ISD::ADD, DL, PtrVT, GlobalBase, HiLo);
1041 return DAG.getLoad(PtrVT, DL, DAG.getEntryNode(), AbsAddr,
1043 }
1044
1045 // This is one of the absolute code models.
1046 switch (getTargetMachine().getCodeModel()) {
1047 default:
1048 llvm_unreachable("Unsupported absolute code model");
1049 case CodeModel::Small:
1050 case CodeModel::Medium:
1051 case CodeModel::Large:
1052 // abs64.
1054 }
1055}
1056
1057/// Custom Lower {
1058
1059// The mappings for emitLeading/TrailingFence for VE is designed by following
1060// http://www.cl.cam.ac.uk/~pes20/cpp/cpp0xmappings.html
1062 Instruction *Inst,
1063 AtomicOrdering Ord) const {
1064 switch (Ord) {
1067 llvm_unreachable("Invalid fence: unordered/non-atomic");
1070 return nullptr; // Nothing to do
1073 return Builder.CreateFence(AtomicOrdering::Release);
1075 if (!Inst->hasAtomicStore())
1076 return nullptr; // Nothing to do
1078 }
1079 llvm_unreachable("Unknown fence ordering in emitLeadingFence");
1080}
1081
1083 Instruction *Inst,
1084 AtomicOrdering Ord) const {
1085 switch (Ord) {
1088 llvm_unreachable("Invalid fence: unordered/not-atomic");
1091 return nullptr; // Nothing to do
1094 return Builder.CreateFence(AtomicOrdering::Acquire);
1097 }
1098 llvm_unreachable("Unknown fence ordering in emitTrailingFence");
1099}
1100
1102 SelectionDAG &DAG) const {
1103 SDLoc DL(Op);
1104 AtomicOrdering FenceOrdering = static_cast<AtomicOrdering>(
1105 cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue());
1106 SyncScope::ID FenceSSID = static_cast<SyncScope::ID>(
1107 cast<ConstantSDNode>(Op.getOperand(2))->getZExtValue());
1108
1109 // VE uses Release consistency, so need a fence instruction if it is a
1110 // cross-thread fence.
1111 if (FenceSSID == SyncScope::System) {
1112 switch (FenceOrdering) {
1116 // No need to generate fencem instruction here.
1117 break;
1119 // Generate "fencem 2" as acquire fence.
1120 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1121 DAG.getTargetConstant(2, DL, MVT::i32),
1122 Op.getOperand(0)),
1123 0);
1125 // Generate "fencem 1" as release fence.
1126 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1127 DAG.getTargetConstant(1, DL, MVT::i32),
1128 Op.getOperand(0)),
1129 0);
1132 // Generate "fencem 3" as acq_rel and seq_cst fence.
1133 // FIXME: "fencem 3" doesn't wait for for PCIe deveices accesses,
1134 // so seq_cst may require more instruction for them.
1135 return SDValue(DAG.getMachineNode(VE::FENCEM, DL, MVT::Other,
1136 DAG.getTargetConstant(3, DL, MVT::i32),
1137 Op.getOperand(0)),
1138 0);
1139 }
1140 }
1141
1142 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
1143 return DAG.getNode(ISD::MEMBARRIER, DL, MVT::Other, Op.getOperand(0));
1144}
1145
1148 // We have TS1AM implementation for i8/i16/i32/i64, so use it.
1149 if (AI->getOperation() == AtomicRMWInst::Xchg) {
1151 }
1152 // FIXME: Support "ATMAM" instruction for LOAD_ADD/SUB/AND/OR.
1153
1154 // Otherwise, expand it using compare and exchange instruction to not call
1155 // __sync_fetch_and_* functions.
1157}
1158
1160 SDValue &Bits) {
1161 SDLoc DL(Op);
1162 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1163 SDValue Ptr = N->getOperand(1);
1164 SDValue Val = N->getOperand(2);
1165 EVT PtrVT = Ptr.getValueType();
1166 bool Byte = N->getMemoryVT() == MVT::i8;
1167 // Remainder = AND Ptr, 3
1168 // Flag = 1 << Remainder ; If Byte is true (1 byte swap flag)
1169 // Flag = 3 << Remainder ; If Byte is false (2 bytes swap flag)
1170 // Bits = Remainder << 3
1171 // NewVal = Val << Bits
1172 SDValue Const3 = DAG.getConstant(3, DL, PtrVT);
1173 SDValue Remainder = DAG.getNode(ISD::AND, DL, PtrVT, {Ptr, Const3});
1174 SDValue Mask = Byte ? DAG.getConstant(1, DL, MVT::i32)
1175 : DAG.getConstant(3, DL, MVT::i32);
1176 Flag = DAG.getNode(ISD::SHL, DL, MVT::i32, {Mask, Remainder});
1177 Bits = DAG.getNode(ISD::SHL, DL, PtrVT, {Remainder, Const3});
1178 return DAG.getNode(ISD::SHL, DL, Val.getValueType(), {Val, Bits});
1179}
1180
1182 SDValue Bits) {
1183 SDLoc DL(Op);
1184 EVT VT = Data.getValueType();
1185 bool Byte = cast<AtomicSDNode>(Op)->getMemoryVT() == MVT::i8;
1186 // NewData = Data >> Bits
1187 // Result = NewData & 0xff ; If Byte is true (1 byte)
1188 // Result = NewData & 0xffff ; If Byte is false (2 bytes)
1189
1190 SDValue NewData = DAG.getNode(ISD::SRL, DL, VT, Data, Bits);
1191 return DAG.getNode(ISD::AND, DL, VT,
1192 {NewData, DAG.getConstant(Byte ? 0xff : 0xffff, DL, VT)});
1193}
1194
1196 SelectionDAG &DAG) const {
1197 SDLoc DL(Op);
1198 AtomicSDNode *N = cast<AtomicSDNode>(Op);
1199
1200 if (N->getMemoryVT() == MVT::i8) {
1201 // For i8, use "ts1am"
1202 // Input:
1203 // ATOMIC_SWAP Ptr, Val, Order
1204 //
1205 // Output:
1206 // Remainder = AND Ptr, 3
1207 // Flag = 1 << Remainder ; 1 byte swap flag for TS1AM inst.
1208 // Bits = Remainder << 3
1209 // NewVal = Val << Bits
1210 //
1211 // Aligned = AND Ptr, -4
1212 // Data = TS1AM Aligned, Flag, NewVal
1213 //
1214 // NewData = Data >> Bits
1215 // Result = NewData & 0xff ; 1 byte result
1216 SDValue Flag;
1217 SDValue Bits;
1218 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1219
1220 SDValue Ptr = N->getOperand(1);
1221 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1222 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1223 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1224 DAG.getVTList(Op.getNode()->getValueType(0),
1225 Op.getNode()->getValueType(1)),
1226 {N->getChain(), Aligned, Flag, NewVal},
1227 N->getMemOperand());
1228
1229 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1230 SDValue Chain = TS1AM.getValue(1);
1231 return DAG.getMergeValues({Result, Chain}, DL);
1232 }
1233 if (N->getMemoryVT() == MVT::i16) {
1234 // For i16, use "ts1am"
1235 SDValue Flag;
1236 SDValue Bits;
1237 SDValue NewVal = prepareTS1AM(Op, DAG, Flag, Bits);
1238
1239 SDValue Ptr = N->getOperand(1);
1240 SDValue Aligned = DAG.getNode(ISD::AND, DL, Ptr.getValueType(),
1241 {Ptr, DAG.getConstant(-4, DL, MVT::i64)});
1242 SDValue TS1AM = DAG.getAtomic(VEISD::TS1AM, DL, N->getMemoryVT(),
1243 DAG.getVTList(Op.getNode()->getValueType(0),
1244 Op.getNode()->getValueType(1)),
1245 {N->getChain(), Aligned, Flag, NewVal},
1246 N->getMemOperand());
1247
1248 SDValue Result = finalizeTS1AM(Op, DAG, TS1AM, Bits);
1249 SDValue Chain = TS1AM.getValue(1);
1250 return DAG.getMergeValues({Result, Chain}, DL);
1251 }
1252 // Otherwise, let llvm legalize it.
1253 return Op;
1254}
1255
1257 SelectionDAG &DAG) const {
1258 return makeAddress(Op, DAG);
1259}
1260
1262 SelectionDAG &DAG) const {
1263 return makeAddress(Op, DAG);
1264}
1265
1267 SelectionDAG &DAG) const {
1268 return makeAddress(Op, DAG);
1269}
1270
1271SDValue
1273 SelectionDAG &DAG) const {
1274 SDLoc DL(Op);
1275
1276 // Generate the following code:
1277 // t1: ch,glue = callseq_start t0, 0, 0
1278 // t2: i64,ch,glue = VEISD::GETTLSADDR t1, label, t1:1
1279 // t3: ch,glue = callseq_end t2, 0, 0, t2:2
1280 // t4: i64,ch,glue = CopyFromReg t3, Register:i64 $sx0, t3:1
1281 SDValue Label = withTargetFlags(Op, 0, DAG);
1282 EVT PtrVT = Op.getValueType();
1283
1284 // Lowering the machine isd will make sure everything is in the right
1285 // location.
1286 SDValue Chain = DAG.getEntryNode();
1287 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
1288 const uint32_t *Mask = Subtarget->getRegisterInfo()->getCallPreservedMask(
1290 Chain = DAG.getCALLSEQ_START(Chain, 64, 0, DL);
1291 SDValue Args[] = {Chain, Label, DAG.getRegisterMask(Mask), Chain.getValue(1)};
1292 Chain = DAG.getNode(VEISD::GETTLSADDR, DL, NodeTys, Args);
1293 Chain = DAG.getCALLSEQ_END(Chain, 64, 0, Chain.getValue(1), DL);
1294 Chain = DAG.getCopyFromReg(Chain, DL, VE::SX0, PtrVT, Chain.getValue(1));
1295
1296 // GETTLSADDR will be codegen'ed as call. Inform MFI that function has calls.
1298 MFI.setHasCalls(true);
1299
1300 // Also generate code to prepare a GOT register if it is PIC.
1301 if (isPositionIndependent()) {
1303 Subtarget->getInstrInfo()->getGlobalBaseReg(&MF);
1304 }
1305
1306 return Chain;
1307}
1308
1310 SelectionDAG &DAG) const {
1311 // The current implementation of nld (2.26) doesn't allow local exec model
1312 // code described in VE-tls_v1.1.pdf (*1) as its input. Instead, we always
1313 // generate the general dynamic model code sequence.
1314 //
1315 // *1: https://www.nec.com/en/global/prod/hpc/aurora/document/VE-tls_v1.1.pdf
1316 return lowerToTLSGeneralDynamicModel(Op, DAG);
1317}
1318
1320 return makeAddress(Op, DAG);
1321}
1322
1323// Lower a f128 load into two f64 loads.
1325 SDLoc DL(Op);
1326 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1327 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1328 Align Alignment = LdNode->getAlign();
1329 if (Alignment > 8)
1330 Alignment = Align(8);
1331
1332 SDValue Lo64 =
1333 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), LdNode->getBasePtr(),
1334 LdNode->getPointerInfo(), Alignment,
1337 EVT AddrVT = LdNode->getBasePtr().getValueType();
1338 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, LdNode->getBasePtr(),
1339 DAG.getConstant(8, DL, AddrVT));
1340 SDValue Hi64 =
1341 DAG.getLoad(MVT::f64, DL, LdNode->getChain(), HiPtr,
1342 LdNode->getPointerInfo(), Alignment,
1345
1346 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1347 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1348
1349 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1350 SDNode *InFP128 =
1351 DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MVT::f128);
1352 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1353 SDValue(InFP128, 0), Hi64, SubRegEven);
1354 InFP128 = DAG.getMachineNode(TargetOpcode::INSERT_SUBREG, DL, MVT::f128,
1355 SDValue(InFP128, 0), Lo64, SubRegOdd);
1356 SDValue OutChains[2] = {SDValue(Lo64.getNode(), 1),
1357 SDValue(Hi64.getNode(), 1)};
1358 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1359 SDValue Ops[2] = {SDValue(InFP128, 0), OutChain};
1360 return DAG.getMergeValues(Ops, DL);
1361}
1362
1363// Lower a vXi1 load into following instructions
1364// LDrii %1, (,%addr)
1365// LVMxir %vm, 0, %1
1366// LDrii %2, 8(,%addr)
1367// LVMxir %vm, 0, %2
1368// ...
1370 SDLoc DL(Op);
1371 LoadSDNode *LdNode = dyn_cast<LoadSDNode>(Op.getNode());
1372 assert(LdNode && LdNode->getOffset().isUndef() && "Unexpected node type");
1373
1374 SDValue BasePtr = LdNode->getBasePtr();
1375 Align Alignment = LdNode->getAlign();
1376 if (Alignment > 8)
1377 Alignment = Align(8);
1378
1379 EVT AddrVT = BasePtr.getValueType();
1380 EVT MemVT = LdNode->getMemoryVT();
1381 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1382 SDValue OutChains[4];
1383 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1384 for (int i = 0; i < 4; ++i) {
1385 // Generate load dag and prepare chains.
1386 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1387 DAG.getConstant(8 * i, DL, AddrVT));
1388 SDValue Val =
1389 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1390 LdNode->getPointerInfo(), Alignment,
1393 OutChains[i] = SDValue(Val.getNode(), 1);
1394
1395 VM = DAG.getMachineNode(VE::LVMir_m, DL, MVT::i64,
1396 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1397 SDValue(VM, 0));
1398 }
1399 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1400 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1401 return DAG.getMergeValues(Ops, DL);
1402 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1403 SDValue OutChains[8];
1404 SDNode *VM = DAG.getMachineNode(TargetOpcode::IMPLICIT_DEF, DL, MemVT);
1405 for (int i = 0; i < 8; ++i) {
1406 // Generate load dag and prepare chains.
1407 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1408 DAG.getConstant(8 * i, DL, AddrVT));
1409 SDValue Val =
1410 DAG.getLoad(MVT::i64, DL, LdNode->getChain(), Addr,
1411 LdNode->getPointerInfo(), Alignment,
1414 OutChains[i] = SDValue(Val.getNode(), 1);
1415
1416 VM = DAG.getMachineNode(VE::LVMyir_y, DL, MVT::i64,
1417 DAG.getTargetConstant(i, DL, MVT::i64), Val,
1418 SDValue(VM, 0));
1419 }
1420 SDValue OutChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1421 SDValue Ops[2] = {SDValue(VM, 0), OutChain};
1422 return DAG.getMergeValues(Ops, DL);
1423 } else {
1424 // Otherwise, ask llvm to expand it.
1425 return SDValue();
1426 }
1427}
1428
1430 LoadSDNode *LdNode = cast<LoadSDNode>(Op.getNode());
1431
1432 EVT MemVT = LdNode->getMemoryVT();
1433
1434 // Dispatch to vector isel.
1435 if (MemVT.isVector() && !isMaskType(MemVT))
1436 return lowerToVVP(Op, DAG);
1437
1438 SDValue BasePtr = LdNode->getBasePtr();
1439 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1440 // Do not expand store instruction with frame index here because of
1441 // dependency problems. We expand it later in eliminateFrameIndex().
1442 return Op;
1443 }
1444
1445 if (MemVT == MVT::f128)
1446 return lowerLoadF128(Op, DAG);
1447 if (isMaskType(MemVT))
1448 return lowerLoadI1(Op, DAG);
1449
1450 return Op;
1451}
1452
1453// Lower a f128 store into two f64 stores.
1455 SDLoc DL(Op);
1456 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1457 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1458
1459 SDValue SubRegEven = DAG.getTargetConstant(VE::sub_even, DL, MVT::i32);
1460 SDValue SubRegOdd = DAG.getTargetConstant(VE::sub_odd, DL, MVT::i32);
1461
1462 SDNode *Hi64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1463 StNode->getValue(), SubRegEven);
1464 SDNode *Lo64 = DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, MVT::i64,
1465 StNode->getValue(), SubRegOdd);
1466
1467 Align Alignment = StNode->getAlign();
1468 if (Alignment > 8)
1469 Alignment = Align(8);
1470
1471 // VE stores Hi64 to 8(addr) and Lo64 to 0(addr)
1472 SDValue OutChains[2];
1473 OutChains[0] =
1474 DAG.getStore(StNode->getChain(), DL, SDValue(Lo64, 0),
1475 StNode->getBasePtr(), MachinePointerInfo(), Alignment,
1478 EVT AddrVT = StNode->getBasePtr().getValueType();
1479 SDValue HiPtr = DAG.getNode(ISD::ADD, DL, AddrVT, StNode->getBasePtr(),
1480 DAG.getConstant(8, DL, AddrVT));
1481 OutChains[1] =
1482 DAG.getStore(StNode->getChain(), DL, SDValue(Hi64, 0), HiPtr,
1483 MachinePointerInfo(), Alignment,
1486 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1487}
1488
1489// Lower a vXi1 store into following instructions
1490// SVMi %1, %vm, 0
1491// STrii %1, (,%addr)
1492// SVMi %2, %vm, 1
1493// STrii %2, 8(,%addr)
1494// ...
1496 SDLoc DL(Op);
1497 StoreSDNode *StNode = dyn_cast<StoreSDNode>(Op.getNode());
1498 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1499
1500 SDValue BasePtr = StNode->getBasePtr();
1501 Align Alignment = StNode->getAlign();
1502 if (Alignment > 8)
1503 Alignment = Align(8);
1504 EVT AddrVT = BasePtr.getValueType();
1505 EVT MemVT = StNode->getMemoryVT();
1506 if (MemVT == MVT::v256i1 || MemVT == MVT::v4i64) {
1507 SDValue OutChains[4];
1508 for (int i = 0; i < 4; ++i) {
1509 SDNode *V =
1510 DAG.getMachineNode(VE::SVMmi, DL, MVT::i64, StNode->getValue(),
1511 DAG.getTargetConstant(i, DL, MVT::i64));
1512 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1513 DAG.getConstant(8 * i, DL, AddrVT));
1514 OutChains[i] =
1515 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1516 MachinePointerInfo(), Alignment,
1519 }
1520 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1521 } else if (MemVT == MVT::v512i1 || MemVT == MVT::v8i64) {
1522 SDValue OutChains[8];
1523 for (int i = 0; i < 8; ++i) {
1524 SDNode *V =
1525 DAG.getMachineNode(VE::SVMyi, DL, MVT::i64, StNode->getValue(),
1526 DAG.getTargetConstant(i, DL, MVT::i64));
1527 SDValue Addr = DAG.getNode(ISD::ADD, DL, AddrVT, BasePtr,
1528 DAG.getConstant(8 * i, DL, AddrVT));
1529 OutChains[i] =
1530 DAG.getStore(StNode->getChain(), DL, SDValue(V, 0), Addr,
1531 MachinePointerInfo(), Alignment,
1534 }
1535 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, OutChains);
1536 } else {
1537 // Otherwise, ask llvm to expand it.
1538 return SDValue();
1539 }
1540}
1541
1543 StoreSDNode *StNode = cast<StoreSDNode>(Op.getNode());
1544 assert(StNode && StNode->getOffset().isUndef() && "Unexpected node type");
1545
1546 // always expand non-mask vector loads to VVP
1547 EVT MemVT = StNode->getMemoryVT();
1548 if (MemVT.isVector() && !isMaskType(MemVT))
1549 return lowerToVVP(Op, DAG);
1550
1551 SDValue BasePtr = StNode->getBasePtr();
1552 if (isa<FrameIndexSDNode>(BasePtr.getNode())) {
1553 // Do not expand store instruction with frame index here because of
1554 // dependency problems. We expand it later in eliminateFrameIndex().
1555 return Op;
1556 }
1557
1558 if (MemVT == MVT::f128)
1559 return lowerStoreF128(Op, DAG);
1560 if (isMaskType(MemVT))
1561 return lowerStoreI1(Op, DAG);
1562
1563 // Otherwise, ask llvm to expand it.
1564 return SDValue();
1565}
1566
1570 auto PtrVT = getPointerTy(DAG.getDataLayout());
1571
1572 // Need frame address to find the address of VarArgsFrameIndex.
1574
1575 // vastart just stores the address of the VarArgsFrameIndex slot into the
1576 // memory location argument.
1577 SDLoc DL(Op);
1578 SDValue Offset =
1579 DAG.getNode(ISD::ADD, DL, PtrVT, DAG.getRegister(VE::SX9, PtrVT),
1580 DAG.getIntPtrConstant(FuncInfo->getVarArgsFrameOffset(), DL));
1581 const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
1582 return DAG.getStore(Op.getOperand(0), DL, Offset, Op.getOperand(1),
1583 MachinePointerInfo(SV));
1584}
1585
1587 SDNode *Node = Op.getNode();
1588 EVT VT = Node->getValueType(0);
1589 SDValue InChain = Node->getOperand(0);
1590 SDValue VAListPtr = Node->getOperand(1);
1591 EVT PtrVT = VAListPtr.getValueType();
1592 const Value *SV = cast<SrcValueSDNode>(Node->getOperand(2))->getValue();
1593 SDLoc DL(Node);
1594 SDValue VAList =
1595 DAG.getLoad(PtrVT, DL, InChain, VAListPtr, MachinePointerInfo(SV));
1596 SDValue Chain = VAList.getValue(1);
1597 SDValue NextPtr;
1598
1599 if (VT == MVT::f128) {
1600 // VE f128 values must be stored with 16 bytes alignment. We don't
1601 // know the actual alignment of VAList, so we take alignment of it
1602 // dynamically.
1603 int Align = 16;
1604 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1605 DAG.getConstant(Align - 1, DL, PtrVT));
1606 VAList = DAG.getNode(ISD::AND, DL, PtrVT, VAList,
1607 DAG.getConstant(-Align, DL, PtrVT));
1608 // Increment the pointer, VAList, by 16 to the next vaarg.
1609 NextPtr =
1610 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(16, DL));
1611 } else if (VT == MVT::f32) {
1612 // float --> need special handling like below.
1613 // 0 4
1614 // +------+------+
1615 // | empty| float|
1616 // +------+------+
1617 // Increment the pointer, VAList, by 8 to the next vaarg.
1618 NextPtr =
1619 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1620 // Then, adjust VAList.
1621 unsigned InternalOffset = 4;
1622 VAList = DAG.getNode(ISD::ADD, DL, PtrVT, VAList,
1623 DAG.getConstant(InternalOffset, DL, PtrVT));
1624 } else {
1625 // Increment the pointer, VAList, by 8 to the next vaarg.
1626 NextPtr =
1627 DAG.getNode(ISD::ADD, DL, PtrVT, VAList, DAG.getIntPtrConstant(8, DL));
1628 }
1629
1630 // Store the incremented VAList to the legalized pointer.
1631 InChain = DAG.getStore(Chain, DL, NextPtr, VAListPtr, MachinePointerInfo(SV));
1632
1633 // Load the actual argument out of the pointer VAList.
1634 // We can't count on greater alignment than the word size.
1635 return DAG.getLoad(
1636 VT, DL, InChain, VAList, MachinePointerInfo(),
1637 Align(std::min(PtrVT.getSizeInBits(), VT.getSizeInBits()) / 8));
1638}
1639
1641 SelectionDAG &DAG) const {
1642 // Generate following code.
1643 // (void)__llvm_grow_stack(size);
1644 // ret = GETSTACKTOP; // pseudo instruction
1645 SDLoc DL(Op);
1646
1647 // Get the inputs.
1648 SDNode *Node = Op.getNode();
1649 SDValue Chain = Op.getOperand(0);
1650 SDValue Size = Op.getOperand(1);
1651 MaybeAlign Alignment(Op.getConstantOperandVal(2));
1652 EVT VT = Node->getValueType(0);
1653
1654 // Chain the dynamic stack allocation so that it doesn't modify the stack
1655 // pointer when other instructions are using the stack.
1656 Chain = DAG.getCALLSEQ_START(Chain, 0, 0, DL);
1657
1658 const TargetFrameLowering &TFI = *Subtarget->getFrameLowering();
1659 Align StackAlign = TFI.getStackAlign();
1660 bool NeedsAlign = Alignment.valueOrOne() > StackAlign;
1661
1662 // Prepare arguments
1665 Entry.Node = Size;
1666 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1667 Args.push_back(Entry);
1668 if (NeedsAlign) {
1669 Entry.Node = DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT);
1670 Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext());
1671 Args.push_back(Entry);
1672 }
1674
1675 EVT PtrVT = Op.getValueType();
1677 if (NeedsAlign) {
1678 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack_align", PtrVT, 0);
1679 } else {
1680 Callee = DAG.getTargetExternalSymbol("__ve_grow_stack", PtrVT, 0);
1681 }
1682
1684 CLI.setDebugLoc(DL)
1685 .setChain(Chain)
1686 .setCallee(CallingConv::PreserveAll, RetTy, Callee, std::move(Args))
1687 .setDiscardResult(true);
1688 std::pair<SDValue, SDValue> pair = LowerCallTo(CLI);
1689 Chain = pair.second;
1690 SDValue Result = DAG.getNode(VEISD::GETSTACKTOP, DL, VT, Chain);
1691 if (NeedsAlign) {
1692 Result = DAG.getNode(ISD::ADD, DL, VT, Result,
1693 DAG.getConstant((Alignment->value() - 1ULL), DL, VT));
1694 Result = DAG.getNode(ISD::AND, DL, VT, Result,
1695 DAG.getConstant(~(Alignment->value() - 1ULL), DL, VT));
1696 }
1697 // Chain = Result.getValue(1);
1698 Chain = DAG.getCALLSEQ_END(Chain, 0, 0, SDValue(), DL);
1699
1700 SDValue Ops[2] = {Result, Chain};
1701 return DAG.getMergeValues(Ops, DL);
1702}
1703
1705 SelectionDAG &DAG) const {
1706 SDLoc DL(Op);
1707 return DAG.getNode(VEISD::EH_SJLJ_LONGJMP, DL, MVT::Other, Op.getOperand(0),
1708 Op.getOperand(1));
1709}
1710
1712 SelectionDAG &DAG) const {
1713 SDLoc DL(Op);
1714 return DAG.getNode(VEISD::EH_SJLJ_SETJMP, DL,
1715 DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0),
1716 Op.getOperand(1));
1717}
1718
1720 SelectionDAG &DAG) const {
1721 SDLoc DL(Op);
1723 Op.getOperand(0));
1724}
1725
1727 const VETargetLowering &TLI,
1728 const VESubtarget *Subtarget) {
1729 SDLoc DL(Op);
1731 EVT PtrVT = TLI.getPointerTy(MF.getDataLayout());
1732
1733 MachineFrameInfo &MFI = MF.getFrameInfo();
1734 MFI.setFrameAddressIsTaken(true);
1735
1736 unsigned Depth = Op.getConstantOperandVal(0);
1737 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
1738 Register FrameReg = RegInfo->getFrameRegister(MF);
1739 SDValue FrameAddr =
1740 DAG.getCopyFromReg(DAG.getEntryNode(), DL, FrameReg, PtrVT);
1741 while (Depth--)
1742 FrameAddr = DAG.getLoad(Op.getValueType(), DL, DAG.getEntryNode(),
1743 FrameAddr, MachinePointerInfo());
1744 return FrameAddr;
1745}
1746
1748 const VETargetLowering &TLI,
1749 const VESubtarget *Subtarget) {
1751 MachineFrameInfo &MFI = MF.getFrameInfo();
1752 MFI.setReturnAddressIsTaken(true);
1753
1755 return SDValue();
1756
1757 SDValue FrameAddr = lowerFRAMEADDR(Op, DAG, TLI, Subtarget);
1758
1759 SDLoc DL(Op);
1760 EVT VT = Op.getValueType();
1761 SDValue Offset = DAG.getConstant(8, DL, VT);
1762 return DAG.getLoad(VT, DL, DAG.getEntryNode(),
1763 DAG.getNode(ISD::ADD, DL, VT, FrameAddr, Offset),
1765}
1766
1768 SelectionDAG &DAG) const {
1769 SDLoc DL(Op);
1770 unsigned IntNo = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
1771 switch (IntNo) {
1772 default: // Don't custom lower most intrinsics.
1773 return SDValue();
1774 case Intrinsic::eh_sjlj_lsda: {
1776 MVT VT = Op.getSimpleValueType();
1777 const VETargetMachine *TM =
1778 static_cast<const VETargetMachine *>(&DAG.getTarget());
1779
1780 // Create GCC_except_tableXX string. The real symbol for that will be
1781 // generated in EHStreamer::emitExceptionTable() later. So, we just
1782 // borrow it's name here.
1783 TM->getStrList()->push_back(std::string(
1784 (Twine("GCC_except_table") + Twine(MF.getFunctionNumber())).str()));
1785 SDValue Addr =
1786 DAG.getTargetExternalSymbol(TM->getStrList()->back().c_str(), VT, 0);
1787 if (isPositionIndependent()) {
1790 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, VT);
1791 return DAG.getNode(ISD::ADD, DL, VT, GlobalBase, Addr);
1792 }
1794 }
1795 }
1796}
1797
1798static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx) {
1799 if (!isa<BuildVectorSDNode>(N))
1800 return false;
1801 const auto *BVN = cast<BuildVectorSDNode>(N);
1802
1803 // Find first non-undef insertion.
1804 unsigned Idx;
1805 for (Idx = 0; Idx < BVN->getNumOperands(); ++Idx) {
1806 auto ElemV = BVN->getOperand(Idx);
1807 if (!ElemV->isUndef())
1808 break;
1809 }
1810 // Catch the (hypothetical) all-undef case.
1811 if (Idx == BVN->getNumOperands())
1812 return false;
1813 // Remember insertion.
1814 UniqueIdx = Idx++;
1815 // Verify that all other insertions are undef.
1816 for (; Idx < BVN->getNumOperands(); ++Idx) {
1817 auto ElemV = BVN->getOperand(Idx);
1818 if (!ElemV->isUndef())
1819 return false;
1820 }
1821 return true;
1822}
1823
1825 if (auto *BuildVec = dyn_cast<BuildVectorSDNode>(N)) {
1826 return BuildVec->getSplatValue();
1827 }
1828 return SDValue();
1829}
1830
1832 SelectionDAG &DAG) const {
1833 VECustomDAG CDAG(DAG, Op);
1834 MVT ResultVT = Op.getSimpleValueType();
1835
1836 // If there is just one element, expand to INSERT_VECTOR_ELT.
1837 unsigned UniqueIdx;
1838 if (getUniqueInsertion(Op.getNode(), UniqueIdx)) {
1839 SDValue AccuV = CDAG.getUNDEF(Op.getValueType());
1840 auto ElemV = Op->getOperand(UniqueIdx);
1841 SDValue IdxV = CDAG.getConstant(UniqueIdx, MVT::i64);
1842 return CDAG.getNode(ISD::INSERT_VECTOR_ELT, ResultVT, {AccuV, ElemV, IdxV});
1843 }
1844
1845 // Else emit a broadcast.
1846 if (SDValue ScalarV = getSplatValue(Op.getNode())) {
1847 unsigned NumEls = ResultVT.getVectorNumElements();
1848 auto AVL = CDAG.getConstant(NumEls, MVT::i32);
1849 return CDAG.getBroadcast(ResultVT, ScalarV, AVL);
1850 }
1851
1852 // Expand
1853 return SDValue();
1854}
1855
1858 // Custom legalization on VVP_* and VEC_* opcodes is required to pack-legalize
1859 // these operations (transform nodes such that their AVL parameter refers to
1860 // packs of 64bit, instead of number of elements.
1861
1862 // Packing opcodes are created with a pack-legal AVL (LEGALAVL). No need to
1863 // re-visit them.
1864 if (isPackingSupportOpcode(Op.getOpcode()))
1865 return Legal;
1866
1867 // Custom lower to legalize AVL for packed mode.
1868 if (isVVPOrVEC(Op.getOpcode()))
1869 return Custom;
1870 return Legal;
1871}
1872
1874 LLVM_DEBUG(dbgs() << "::LowerOperation"; Op->print(dbgs()););
1875 unsigned Opcode = Op.getOpcode();
1876
1877 /// Scalar isel.
1878 switch (Opcode) {
1879 case ISD::ATOMIC_FENCE:
1880 return lowerATOMIC_FENCE(Op, DAG);
1881 case ISD::ATOMIC_SWAP:
1882 return lowerATOMIC_SWAP(Op, DAG);
1883 case ISD::BlockAddress:
1884 return lowerBlockAddress(Op, DAG);
1885 case ISD::ConstantPool:
1886 return lowerConstantPool(Op, DAG);
1888 return lowerDYNAMIC_STACKALLOC(Op, DAG);
1890 return lowerEH_SJLJ_LONGJMP(Op, DAG);
1892 return lowerEH_SJLJ_SETJMP(Op, DAG);
1894 return lowerEH_SJLJ_SETUP_DISPATCH(Op, DAG);
1895 case ISD::FRAMEADDR:
1896 return lowerFRAMEADDR(Op, DAG, *this, Subtarget);
1897 case ISD::GlobalAddress:
1898 return lowerGlobalAddress(Op, DAG);
1900 return lowerGlobalTLSAddress(Op, DAG);
1902 return lowerINTRINSIC_WO_CHAIN(Op, DAG);
1903 case ISD::JumpTable:
1904 return lowerJumpTable(Op, DAG);
1905 case ISD::LOAD:
1906 return lowerLOAD(Op, DAG);
1907 case ISD::RETURNADDR:
1908 return lowerRETURNADDR(Op, DAG, *this, Subtarget);
1909 case ISD::BUILD_VECTOR:
1910 return lowerBUILD_VECTOR(Op, DAG);
1911 case ISD::STORE:
1912 return lowerSTORE(Op, DAG);
1913 case ISD::VASTART:
1914 return lowerVASTART(Op, DAG);
1915 case ISD::VAARG:
1916 return lowerVAARG(Op, DAG);
1917
1919 return lowerINSERT_VECTOR_ELT(Op, DAG);
1921 return lowerEXTRACT_VECTOR_ELT(Op, DAG);
1922 }
1923
1924 /// Vector isel.
1925 LLVM_DEBUG(dbgs() << "::LowerOperation_VVP"; Op->print(dbgs()););
1926 if (ISD::isVPOpcode(Opcode))
1927 return lowerToVVP(Op, DAG);
1928
1929 switch (Opcode) {
1930 default:
1931 llvm_unreachable("Should not custom lower this!");
1932
1933 // Legalize the AVL of this internal node.
1935#define ADD_VVP_OP(VVP_NAME, ...) case VEISD::VVP_NAME:
1936#include "VVPNodes.def"
1937 // AVL already legalized.
1938 if (getAnnotatedNodeAVL(Op).second)
1939 return Op;
1940 return legalizeInternalVectorOp(Op, DAG);
1941
1942 // Translate into a VEC_*/VVP_* layer operation.
1943 case ISD::MLOAD:
1944 case ISD::MSTORE:
1945#define ADD_VVP_OP(VVP_NAME, ISD_NAME) case ISD::ISD_NAME:
1946#include "VVPNodes.def"
1947 if (isMaskArithmetic(Op) && isPackedVectorType(Op.getValueType()))
1948 return splitMaskArithmetic(Op, DAG);
1949 return lowerToVVP(Op, DAG);
1950 }
1951}
1952/// } Custom Lower
1953
1956 SelectionDAG &DAG) const {
1957 switch (N->getOpcode()) {
1958 case ISD::ATOMIC_SWAP:
1959 // Let LLVM expand atomic swap instruction through LowerOperation.
1960 return;
1961 default:
1962 LLVM_DEBUG(N->dumpr(&DAG));
1963 llvm_unreachable("Do not know how to custom type legalize this operation!");
1964 }
1965}
1966
1967/// JumpTable for VE.
1968///
1969/// VE cannot generate relocatable symbol in jump table. VE cannot
1970/// generate expressions using symbols in both text segment and data
1971/// segment like below.
1972/// .4byte .LBB0_2-.LJTI0_0
1973/// So, we generate offset from the top of function like below as
1974/// a custom label.
1975/// .4byte .LBB0_2-<function name>
1976
1978 // Use custom label for PIC.
1981
1982 // Otherwise, use the normal jump table encoding heuristics.
1984}
1985
1987 const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB,
1988 unsigned Uid, MCContext &Ctx) const {
1990
1991 // Generate custom label for PIC like below.
1992 // .4bytes .LBB0_2-<function name>
1993 const auto *Value = MCSymbolRefExpr::create(MBB->getSymbol(), Ctx);
1994 MCSymbol *Sym = Ctx.getOrCreateSymbol(MBB->getParent()->getName().data());
1995 const auto *Base = MCSymbolRefExpr::create(Sym, Ctx);
1996 return MCBinaryExpr::createSub(Value, Base, Ctx);
1997}
1998
2000 SelectionDAG &DAG) const {
2002 SDLoc DL(Table);
2004 assert(Function != nullptr);
2005 auto PtrTy = getPointerTy(DAG.getDataLayout(), Function->getAddressSpace());
2006
2007 // In the jump table, we have following values in PIC mode.
2008 // .4bytes .LBB0_2-<function name>
2009 // We need to add this value and the address of this function to generate
2010 // .LBB0_2 label correctly under PIC mode. So, we want to generate following
2011 // instructions:
2012 // lea %reg, fun@gotoff_lo
2013 // and %reg, %reg, (32)0
2014 // lea.sl %reg, fun@gotoff_hi(%reg, %got)
2015 // In order to do so, we need to genarate correctly marked DAG node using
2016 // makeHiLoPair.
2017 SDValue Op = DAG.getGlobalAddress(Function, DL, PtrTy);
2020 SDValue GlobalBase = DAG.getNode(VEISD::GLOBAL_BASE_REG, DL, PtrTy);
2021 return DAG.getNode(ISD::ADD, DL, PtrTy, GlobalBase, HiLo);
2022}
2023
2026 MachineBasicBlock *TargetBB,
2027 const DebugLoc &DL) const {
2030 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2031
2032 const TargetRegisterClass *RC = &VE::I64RegClass;
2033 Register Tmp1 = MRI.createVirtualRegister(RC);
2034 Register Tmp2 = MRI.createVirtualRegister(RC);
2035 Register Result = MRI.createVirtualRegister(RC);
2036
2037 if (isPositionIndependent()) {
2038 // Create following instructions for local linkage PIC code.
2039 // lea %Tmp1, TargetBB@gotoff_lo
2040 // and %Tmp2, %Tmp1, (32)0
2041 // lea.sl %Result, TargetBB@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2042 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2043 .addImm(0)
2044 .addImm(0)
2046 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2047 .addReg(Tmp1, getKillRegState(true))
2048 .addImm(M0(32));
2049 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2050 .addReg(VE::SX15)
2051 .addReg(Tmp2, getKillRegState(true))
2053 } else {
2054 // Create following instructions for non-PIC code.
2055 // lea %Tmp1, TargetBB@lo
2056 // and %Tmp2, %Tmp1, (32)0
2057 // lea.sl %Result, TargetBB@hi(%Tmp2)
2058 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2059 .addImm(0)
2060 .addImm(0)
2061 .addMBB(TargetBB, VEMCExpr::VK_VE_LO32);
2062 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2063 .addReg(Tmp1, getKillRegState(true))
2064 .addImm(M0(32));
2065 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2066 .addReg(Tmp2, getKillRegState(true))
2067 .addImm(0)
2068 .addMBB(TargetBB, VEMCExpr::VK_VE_HI32);
2069 }
2070 return Result;
2071}
2072
2075 StringRef Symbol, const DebugLoc &DL,
2076 bool IsLocal = false,
2077 bool IsCall = false) const {
2080 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2081
2082 const TargetRegisterClass *RC = &VE::I64RegClass;
2083 Register Result = MRI.createVirtualRegister(RC);
2084
2085 if (isPositionIndependent()) {
2086 if (IsCall && !IsLocal) {
2087 // Create following instructions for non-local linkage PIC code function
2088 // calls. These instructions uses IC and magic number -24, so we expand
2089 // them in VEAsmPrinter.cpp from GETFUNPLT pseudo instruction.
2090 // lea %Reg, Symbol@plt_lo(-24)
2091 // and %Reg, %Reg, (32)0
2092 // sic %s16
2093 // lea.sl %Result, Symbol@plt_hi(%Reg, %s16) ; %s16 is PLT
2094 BuildMI(MBB, I, DL, TII->get(VE::GETFUNPLT), Result)
2095 .addExternalSymbol("abort");
2096 } else if (IsLocal) {
2097 Register Tmp1 = MRI.createVirtualRegister(RC);
2098 Register Tmp2 = MRI.createVirtualRegister(RC);
2099 // Create following instructions for local linkage PIC code.
2100 // lea %Tmp1, Symbol@gotoff_lo
2101 // and %Tmp2, %Tmp1, (32)0
2102 // lea.sl %Result, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2103 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2104 .addImm(0)
2105 .addImm(0)
2107 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2108 .addReg(Tmp1, getKillRegState(true))
2109 .addImm(M0(32));
2110 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Result)
2111 .addReg(VE::SX15)
2112 .addReg(Tmp2, getKillRegState(true))
2114 } else {
2115 Register Tmp1 = MRI.createVirtualRegister(RC);
2116 Register Tmp2 = MRI.createVirtualRegister(RC);
2117 // Create following instructions for not local linkage PIC code.
2118 // lea %Tmp1, Symbol@got_lo
2119 // and %Tmp2, %Tmp1, (32)0
2120 // lea.sl %Tmp3, Symbol@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2121 // ld %Result, 0(%Tmp3)
2122 Register Tmp3 = MRI.createVirtualRegister(RC);
2123 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2124 .addImm(0)
2125 .addImm(0)
2127 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2128 .addReg(Tmp1, getKillRegState(true))
2129 .addImm(M0(32));
2130 BuildMI(MBB, I, DL, TII->get(VE::LEASLrri), Tmp3)
2131 .addReg(VE::SX15)
2132 .addReg(Tmp2, getKillRegState(true))
2134 BuildMI(MBB, I, DL, TII->get(VE::LDrii), Result)
2135 .addReg(Tmp3, getKillRegState(true))
2136 .addImm(0)
2137 .addImm(0);
2138 }
2139 } else {
2140 Register Tmp1 = MRI.createVirtualRegister(RC);
2141 Register Tmp2 = MRI.createVirtualRegister(RC);
2142 // Create following instructions for non-PIC code.
2143 // lea %Tmp1, Symbol@lo
2144 // and %Tmp2, %Tmp1, (32)0
2145 // lea.sl %Result, Symbol@hi(%Tmp2)
2146 BuildMI(MBB, I, DL, TII->get(VE::LEAzii), Tmp1)
2147 .addImm(0)
2148 .addImm(0)
2149 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_LO32);
2150 BuildMI(MBB, I, DL, TII->get(VE::ANDrm), Tmp2)
2151 .addReg(Tmp1, getKillRegState(true))
2152 .addImm(M0(32));
2153 BuildMI(MBB, I, DL, TII->get(VE::LEASLrii), Result)
2154 .addReg(Tmp2, getKillRegState(true))
2155 .addImm(0)
2156 .addExternalSymbol(Symbol.data(), VEMCExpr::VK_VE_HI32);
2157 }
2158 return Result;
2159}
2160
2163 MachineBasicBlock *DispatchBB,
2164 int FI, int Offset) const {
2165 DebugLoc DL = MI.getDebugLoc();
2166 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2167
2168 Register LabelReg =
2170
2171 // Store an address of DispatchBB to a given jmpbuf[1] where has next IC
2172 // referenced by longjmp (throw) later.
2173 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2174 addFrameReference(MIB, FI, Offset); // jmpbuf[1]
2175 MIB.addReg(LabelReg, getKillRegState(true));
2176}
2177
2180 MachineBasicBlock *MBB) const {
2181 DebugLoc DL = MI.getDebugLoc();
2182 MachineFunction *MF = MBB->getParent();
2183 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2184 const TargetRegisterInfo *TRI = Subtarget->getRegisterInfo();
2186
2187 const BasicBlock *BB = MBB->getBasicBlock();
2189
2190 // Memory Reference.
2191 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2192 MI.memoperands_end());
2193 Register BufReg = MI.getOperand(1).getReg();
2194
2195 Register DstReg;
2196
2197 DstReg = MI.getOperand(0).getReg();
2198 const TargetRegisterClass *RC = MRI.getRegClass(DstReg);
2199 assert(TRI->isTypeLegalForClass(*RC, MVT::i32) && "Invalid destination!");
2200 (void)TRI;
2201 Register MainDestReg = MRI.createVirtualRegister(RC);
2202 Register RestoreDestReg = MRI.createVirtualRegister(RC);
2203
2204 // For `v = call @llvm.eh.sjlj.setjmp(buf)`, we generate following
2205 // instructions. SP/FP must be saved in jmpbuf before `llvm.eh.sjlj.setjmp`.
2206 //
2207 // ThisMBB:
2208 // buf[3] = %s17 iff %s17 is used as BP
2209 // buf[1] = RestoreMBB as IC after longjmp
2210 // # SjLjSetup RestoreMBB
2211 //
2212 // MainMBB:
2213 // v_main = 0
2214 //
2215 // SinkMBB:
2216 // v = phi(v_main, MainMBB, v_restore, RestoreMBB)
2217 // ...
2218 //
2219 // RestoreMBB:
2220 // %s17 = buf[3] = iff %s17 is used as BP
2221 // v_restore = 1
2222 // goto SinkMBB
2223
2224 MachineBasicBlock *ThisMBB = MBB;
2225 MachineBasicBlock *MainMBB = MF->CreateMachineBasicBlock(BB);
2226 MachineBasicBlock *SinkMBB = MF->CreateMachineBasicBlock(BB);
2227 MachineBasicBlock *RestoreMBB = MF->CreateMachineBasicBlock(BB);
2228 MF->insert(I, MainMBB);
2229 MF->insert(I, SinkMBB);
2230 MF->push_back(RestoreMBB);
2231 RestoreMBB->setMachineBlockAddressTaken();
2232
2233 // Transfer the remainder of BB and its successor edges to SinkMBB.
2234 SinkMBB->splice(SinkMBB->begin(), MBB,
2235 std::next(MachineBasicBlock::iterator(MI)), MBB->end());
2237
2238 // ThisMBB:
2239 Register LabelReg =
2241
2242 // Store BP in buf[3] iff this function is using BP.
2243 const VEFrameLowering *TFI = Subtarget->getFrameLowering();
2244 if (TFI->hasBP(*MF)) {
2245 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2246 MIB.addReg(BufReg);
2247 MIB.addImm(0);
2248 MIB.addImm(24);
2249 MIB.addReg(VE::SX17);
2250 MIB.setMemRefs(MMOs);
2251 }
2252
2253 // Store IP in buf[1].
2254 MachineInstrBuilder MIB = BuildMI(*MBB, MI, DL, TII->get(VE::STrii));
2255 MIB.add(MI.getOperand(1)); // we can preserve the kill flags here.
2256 MIB.addImm(0);
2257 MIB.addImm(8);
2258 MIB.addReg(LabelReg, getKillRegState(true));
2259 MIB.setMemRefs(MMOs);
2260
2261 // SP/FP are already stored in jmpbuf before `llvm.eh.sjlj.setjmp`.
2262
2263 // Insert setup.
2264 MIB =
2265 BuildMI(*ThisMBB, MI, DL, TII->get(VE::EH_SjLj_Setup)).addMBB(RestoreMBB);
2266
2267 const VERegisterInfo *RegInfo = Subtarget->getRegisterInfo();
2268 MIB.addRegMask(RegInfo->getNoPreservedMask());
2269 ThisMBB->addSuccessor(MainMBB);
2270 ThisMBB->addSuccessor(RestoreMBB);
2271
2272 // MainMBB:
2273 BuildMI(MainMBB, DL, TII->get(VE::LEAzii), MainDestReg)
2274 .addImm(0)
2275 .addImm(0)
2276 .addImm(0);
2277 MainMBB->addSuccessor(SinkMBB);
2278
2279 // SinkMBB:
2280 BuildMI(*SinkMBB, SinkMBB->begin(), DL, TII->get(VE::PHI), DstReg)
2281 .addReg(MainDestReg)
2282 .addMBB(MainMBB)
2283 .addReg(RestoreDestReg)
2284 .addMBB(RestoreMBB);
2285
2286 // RestoreMBB:
2287 // Restore BP from buf[3] iff this function is using BP. The address of
2288 // buf is in SX10.
2289 // FIXME: Better to not use SX10 here
2290 if (TFI->hasBP(*MF)) {
2292 BuildMI(RestoreMBB, DL, TII->get(VE::LDrii), VE::SX17);
2293 MIB.addReg(VE::SX10);
2294 MIB.addImm(0);
2295 MIB.addImm(24);
2296 MIB.setMemRefs(MMOs);
2297 }
2298 BuildMI(RestoreMBB, DL, TII->get(VE::LEAzii), RestoreDestReg)
2299 .addImm(0)
2300 .addImm(0)
2301 .addImm(1);
2302 BuildMI(RestoreMBB, DL, TII->get(VE::BRCFLa_t)).addMBB(SinkMBB);
2303 RestoreMBB->addSuccessor(SinkMBB);
2304
2305 MI.eraseFromParent();
2306 return SinkMBB;
2307}
2308
2311 MachineBasicBlock *MBB) const {
2312 DebugLoc DL = MI.getDebugLoc();
2313 MachineFunction *MF = MBB->getParent();
2314 const TargetInstrInfo *TII = Subtarget->getInstrInfo();
2316
2317 // Memory Reference.
2318 SmallVector<MachineMemOperand *, 2> MMOs(MI.memoperands_begin(),
2319 MI.memoperands_end());
2320 Register BufReg = MI.getOperand(0).getReg();
2321
2322 Register Tmp = MRI.createVirtualRegister(&VE::I64RegClass);
2323 // Since FP is only updated here but NOT referenced, it's treated as GPR.
2324 Register FP = VE::SX9;
2325 Register SP = VE::SX11;
2326
2328
2329 MachineBasicBlock *ThisMBB = MBB;
2330
2331 // For `call @llvm.eh.sjlj.longjmp(buf)`, we generate following instructions.
2332 //
2333 // ThisMBB:
2334 // %fp = load buf[0]
2335 // %jmp = load buf[1]
2336 // %s10 = buf ; Store an address of buf to SX10 for RestoreMBB
2337 // %sp = load buf[2] ; generated by llvm.eh.sjlj.setjmp.
2338 // jmp %jmp
2339
2340 // Reload FP.
2341 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), FP);
2342 MIB.addReg(BufReg);
2343 MIB.addImm(0);
2344 MIB.addImm(0);
2345 MIB.setMemRefs(MMOs);
2346
2347 // Reload IP.
2348 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), Tmp);
2349 MIB.addReg(BufReg);
2350 MIB.addImm(0);
2351 MIB.addImm(8);
2352 MIB.setMemRefs(MMOs);
2353
2354 // Copy BufReg to SX10 for later use in setjmp.
2355 // FIXME: Better to not use SX10 here
2356 BuildMI(*ThisMBB, MI, DL, TII->get(VE::ORri), VE::SX10)
2357 .addReg(BufReg)
2358 .addImm(0);
2359
2360 // Reload SP.
2361 MIB = BuildMI(*ThisMBB, MI, DL, TII->get(VE::LDrii), SP);
2362 MIB.add(MI.getOperand(0)); // we can preserve the kill flags here.
2363 MIB.addImm(0);
2364 MIB.addImm(16);
2365 MIB.setMemRefs(MMOs);
2366
2367 // Jump.
2368 BuildMI(*ThisMBB, MI, DL, TII->get(VE::BCFLari_t))
2369 .addReg(Tmp, getKillRegState(true))
2370 .addImm(0);
2371
2372 MI.eraseFromParent();
2373 return ThisMBB;
2374}
2375
2378 MachineBasicBlock *BB) const {
2379 DebugLoc DL = MI.getDebugLoc();
2380 MachineFunction *MF = BB->getParent();
2381 MachineFrameInfo &MFI = MF->getFrameInfo();
2383 const VEInstrInfo *TII = Subtarget->getInstrInfo();
2384 int FI = MFI.getFunctionContextIndex();
2385
2386 // Get a mapping of the call site numbers to all of the landing pads they're
2387 // associated with.
2389 unsigned MaxCSNum = 0;
2390 for (auto &MBB : *MF) {
2391 if (!MBB.isEHPad())
2392 continue;
2393
2394 MCSymbol *Sym = nullptr;
2395 for (const auto &MI : MBB) {
2396 if (MI.isDebugInstr())
2397 continue;
2398
2399 assert(MI.isEHLabel() && "expected EH_LABEL");
2400 Sym = MI.getOperand(0).getMCSymbol();
2401 break;
2402 }
2403
2404 if (!MF->hasCallSiteLandingPad(Sym))
2405 continue;
2406
2407 for (unsigned CSI : MF->getCallSiteLandingPad(Sym)) {
2408 CallSiteNumToLPad[CSI].push_back(&MBB);
2409 MaxCSNum = std::max(MaxCSNum, CSI);
2410 }
2411 }
2412
2413 // Get an ordered list of the machine basic blocks for the jump table.
2414 std::vector<MachineBasicBlock *> LPadList;
2416 LPadList.reserve(CallSiteNumToLPad.size());
2417
2418 for (unsigned CSI = 1; CSI <= MaxCSNum; ++CSI) {
2419 for (auto &LP : CallSiteNumToLPad[CSI]) {
2420 LPadList.push_back(LP);
2421 InvokeBBs.insert(LP->pred_begin(), LP->pred_end());
2422 }
2423 }
2424
2425 assert(!LPadList.empty() &&
2426 "No landing pad destinations for the dispatch jump table!");
2427
2428 // The %fn_context is allocated like below (from --print-after=sjljehprepare):
2429 // %fn_context = alloca { i8*, i64, [4 x i64], i8*, i8*, [5 x i8*] }
2430 //
2431 // This `[5 x i8*]` is jmpbuf, so jmpbuf[1] is FI+72.
2432 // First `i64` is callsite, so callsite is FI+8.
2433 static const int OffsetIC = 72;
2434 static const int OffsetCS = 8;
2435
2436 // Create the MBBs for the dispatch code like following:
2437 //
2438 // ThisMBB:
2439 // Prepare DispatchBB address and store it to buf[1].
2440 // ...
2441 //
2442 // DispatchBB:
2443 // %s15 = GETGOT iff isPositionIndependent
2444 // %callsite = load callsite
2445 // brgt.l.t #size of callsites, %callsite, DispContBB
2446 //
2447 // TrapBB:
2448 // Call abort.
2449 //
2450 // DispContBB:
2451 // %breg = address of jump table
2452 // %pc = load and calculate next pc from %breg and %callsite
2453 // jmp %pc
2454
2455 // Shove the dispatch's address into the return slot in the function context.
2456 MachineBasicBlock *DispatchBB = MF->CreateMachineBasicBlock();
2457 DispatchBB->setIsEHPad(true);
2458
2459 // Trap BB will causes trap like `assert(0)`.
2461 DispatchBB->addSuccessor(TrapBB);
2462
2463 MachineBasicBlock *DispContBB = MF->CreateMachineBasicBlock();
2464 DispatchBB->addSuccessor(DispContBB);
2465
2466 // Insert MBBs.
2467 MF->push_back(DispatchBB);
2468 MF->push_back(DispContBB);
2469 MF->push_back(TrapBB);
2470
2471 // Insert code to call abort in the TrapBB.
2472 Register Abort = prepareSymbol(*TrapBB, TrapBB->end(), "abort", DL,
2473 /* Local */ false, /* Call */ true);
2474 BuildMI(TrapBB, DL, TII->get(VE::BSICrii), VE::SX10)
2475 .addReg(Abort, getKillRegState(true))
2476 .addImm(0)
2477 .addImm(0);
2478
2479 // Insert code into the entry block that creates and registers the function
2480 // context.
2481 setupEntryBlockForSjLj(MI, BB, DispatchBB, FI, OffsetIC);
2482
2483 // Create the jump table and associated information
2484 unsigned JTE = getJumpTableEncoding();
2486 unsigned MJTI = JTI->createJumpTableIndex(LPadList);
2487
2488 const VERegisterInfo &RI = TII->getRegisterInfo();
2489 // Add a register mask with no preserved registers. This results in all
2490 // registers being marked as clobbered.
2491 BuildMI(DispatchBB, DL, TII->get(VE::NOP))
2493
2494 if (isPositionIndependent()) {
2495 // Force to generate GETGOT, since current implementation doesn't store GOT
2496 // register.
2497 BuildMI(DispatchBB, DL, TII->get(VE::GETGOT), VE::SX15);
2498 }
2499
2500 // IReg is used as an index in a memory operand and therefore can't be SP
2501 const TargetRegisterClass *RC = &VE::I64RegClass;
2502 Register IReg = MRI.createVirtualRegister(RC);
2503 addFrameReference(BuildMI(DispatchBB, DL, TII->get(VE::LDLZXrii), IReg), FI,
2504 OffsetCS);
2505 if (LPadList.size() < 64) {
2506 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLir_t))
2508 .addImm(LPadList.size())
2509 .addReg(IReg)
2510 .addMBB(TrapBB);
2511 } else {
2512 assert(LPadList.size() <= 0x7FFFFFFF && "Too large Landing Pad!");
2513 Register TmpReg = MRI.createVirtualRegister(RC);
2514 BuildMI(DispatchBB, DL, TII->get(VE::LEAzii), TmpReg)
2515 .addImm(0)
2516 .addImm(0)
2517 .addImm(LPadList.size());
2518 BuildMI(DispatchBB, DL, TII->get(VE::BRCFLrr_t))
2520 .addReg(TmpReg, getKillRegState(true))
2521 .addReg(IReg)
2522 .addMBB(TrapBB);
2523 }
2524
2525 Register BReg = MRI.createVirtualRegister(RC);
2526 Register Tmp1 = MRI.createVirtualRegister(RC);
2527 Register Tmp2 = MRI.createVirtualRegister(RC);
2528
2529 if (isPositionIndependent()) {
2530 // Create following instructions for local linkage PIC code.
2531 // lea %Tmp1, .LJTI0_0@gotoff_lo
2532 // and %Tmp2, %Tmp1, (32)0
2533 // lea.sl %BReg, .LJTI0_0@gotoff_hi(%Tmp2, %s15) ; %s15 is GOT
2534 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2535 .addImm(0)
2536 .addImm(0)
2538 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2539 .addReg(Tmp1, getKillRegState(true))
2540 .addImm(M0(32));
2541 BuildMI(DispContBB, DL, TII->get(VE::LEASLrri), BReg)
2542 .addReg(VE::SX15)
2543 .addReg(Tmp2, getKillRegState(true))
2545 } else {
2546 // Create following instructions for non-PIC code.
2547 // lea %Tmp1, .LJTI0_0@lo
2548 // and %Tmp2, %Tmp1, (32)0
2549 // lea.sl %BReg, .LJTI0_0@hi(%Tmp2)
2550 BuildMI(DispContBB, DL, TII->get(VE::LEAzii), Tmp1)
2551 .addImm(0)
2552 .addImm(0)
2554 BuildMI(DispContBB, DL, TII->get(VE::ANDrm), Tmp2)
2555 .addReg(Tmp1, getKillRegState(true))
2556 .addImm(M0(32));
2557 BuildMI(DispContBB, DL, TII->get(VE::LEASLrii), BReg)
2558 .addReg(Tmp2, getKillRegState(true))
2559 .addImm(0)
2561 }
2562
2563 switch (JTE) {
2565 // Generate simple block address code for no-PIC model.
2566 // sll %Tmp1, %IReg, 3
2567 // lds %TReg, 0(%Tmp1, %BReg)
2568 // bcfla %TReg
2569
2570 Register TReg = MRI.createVirtualRegister(RC);
2571 Register Tmp1 = MRI.createVirtualRegister(RC);
2572
2573 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2574 .addReg(IReg, getKillRegState(true))
2575 .addImm(3);
2576 BuildMI(DispContBB, DL, TII->get(VE::LDrri), TReg)
2577 .addReg(BReg, getKillRegState(true))
2578 .addReg(Tmp1, getKillRegState(true))
2579 .addImm(0);
2580 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2581 .addReg(TReg, getKillRegState(true))
2582 .addImm(0);
2583 break;
2584 }
2586 // Generate block address code using differences from the function pointer
2587 // for PIC model.
2588 // sll %Tmp1, %IReg, 2
2589 // ldl.zx %OReg, 0(%Tmp1, %BReg)
2590 // Prepare function address in BReg2.
2591 // adds.l %TReg, %BReg2, %OReg
2592 // bcfla %TReg
2593
2595 Register OReg = MRI.createVirtualRegister(RC);
2596 Register TReg = MRI.createVirtualRegister(RC);
2597 Register Tmp1 = MRI.createVirtualRegister(RC);
2598
2599 BuildMI(DispContBB, DL, TII->get(VE::SLLri), Tmp1)
2600 .addReg(IReg, getKillRegState(true))
2601 .addImm(2);
2602 BuildMI(DispContBB, DL, TII->get(VE::LDLZXrri), OReg)
2603 .addReg(BReg, getKillRegState(true))
2604 .addReg(Tmp1, getKillRegState(true))
2605 .addImm(0);
2606 Register BReg2 =
2607 prepareSymbol(*DispContBB, DispContBB->end(),
2608 DispContBB->getParent()->getName(), DL, /* Local */ true);
2609 BuildMI(DispContBB, DL, TII->get(VE::ADDSLrr), TReg)
2610 .addReg(OReg, getKillRegState(true))
2611 .addReg(BReg2, getKillRegState(true));
2612 BuildMI(DispContBB, DL, TII->get(VE::BCFLari_t))
2613 .addReg(TReg, getKillRegState(true))
2614 .addImm(0);
2615 break;
2616 }
2617 default:
2618 llvm_unreachable("Unexpected jump table encoding");
2619 }
2620
2621 // Add the jump table entries as successors to the MBB.
2623 for (auto &LP : LPadList)
2624 if (SeenMBBs.insert(LP).second)
2625 DispContBB->addSuccessor(LP);
2626
2627 // N.B. the order the invoke BBs are processed in doesn't matter here.
2629 const MCPhysReg *SavedRegs = MF->getRegInfo().getCalleeSavedRegs();
2630 for (MachineBasicBlock *MBB : InvokeBBs) {
2631 // Remove the landing pad successor from the invoke block and replace it
2632 // with the new dispatch block.
2633 // Keep a copy of Successors since it's modified inside the loop.
2635 MBB->succ_rend());
2636 // FIXME: Avoid quadratic complexity.
2637 for (auto *MBBS : Successors) {
2638 if (MBBS->isEHPad()) {
2639 MBB->removeSuccessor(MBBS);
2640 MBBLPads.push_back(MBBS);
2641 }
2642 }
2643
2644 MBB->addSuccessor(DispatchBB);
2645
2646 // Find the invoke call and mark all of the callee-saved registers as
2647 // 'implicit defined' so that they're spilled. This prevents code from
2648 // moving instructions to before the EH block, where they will never be
2649 // executed.
2650 for (auto &II : reverse(*MBB)) {
2651 if (!II.isCall())
2652 continue;
2653
2655 for (auto &MOp : II.operands())
2656 if (MOp.isReg())
2657 DefRegs[MOp.getReg()] = true;
2658
2659 MachineInstrBuilder MIB(*MF, &II);
2660 for (unsigned RI = 0; SavedRegs[RI]; ++RI) {
2661 Register Reg = SavedRegs[RI];
2662 if (!DefRegs[Reg])
2664 }
2665
2666 break;
2667 }
2668 }
2669
2670 // Mark all former landing pads as non-landing pads. The dispatch is the only
2671 // landing pad now.
2672 for (auto &LP : MBBLPads)
2673 LP->setIsEHPad(false);
2674
2675 // The instruction is gone now.
2676 MI.eraseFromParent();
2677 return BB;
2678}
2679
2682 MachineBasicBlock *BB) const {
2683 switch (MI.getOpcode()) {
2684 default:
2685 llvm_unreachable("Unknown Custom Instruction!");
2686 case VE::EH_SjLj_LongJmp:
2687 return emitEHSjLjLongJmp(MI, BB);
2688 case VE::EH_SjLj_SetJmp:
2689 return emitEHSjLjSetJmp(MI, BB);
2690 case VE::EH_SjLj_Setup_Dispatch:
2691 return emitSjLjDispatchBlock(MI, BB);
2692 }
2693}
2694
2695static bool isSimm7(SDValue V) {
2696 EVT VT = V.getValueType();
2697 if (VT.isVector())
2698 return false;
2699
2700 if (VT.isInteger()) {
2701 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2702 return isInt<7>(C->getSExtValue());
2703 } else if (VT.isFloatingPoint()) {
2704 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2705 if (VT == MVT::f32 || VT == MVT::f64) {
2706 const APInt &Imm = C->getValueAPF().bitcastToAPInt();
2707 uint64_t Val = Imm.getSExtValue();
2708 if (Imm.getBitWidth() == 32)
2709 Val <<= 32; // Immediate value of float place at higher bits on VE.
2710 return isInt<7>(Val);
2711 }
2712 }
2713 }
2714 return false;
2715}
2716
2717static bool isMImm(SDValue V) {
2718 EVT VT = V.getValueType();
2719 if (VT.isVector())
2720 return false;
2721
2722 if (VT.isInteger()) {
2723 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(V))
2724 return isMImmVal(getImmVal(C));
2725 } else if (VT.isFloatingPoint()) {
2726 if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(V)) {
2727 if (VT == MVT::f32) {
2728 // Float value places at higher bits, so ignore lower 32 bits.
2729 return isMImm32Val(getFpImmVal(C) >> 32);
2730 } else if (VT == MVT::f64) {
2731 return isMImmVal(getFpImmVal(C));
2732 }
2733 }
2734 }
2735 return false;
2736}
2737
2738static unsigned decideComp(EVT SrcVT, ISD::CondCode CC) {
2739 if (SrcVT.isFloatingPoint()) {
2740 if (SrcVT == MVT::f128)
2741 return VEISD::CMPQ;
2742 return VEISD::CMPF;
2743 }
2744 return isSignedIntSetCC(CC) ? VEISD::CMPI : VEISD::CMPU;
2745}
2746
2747static EVT decideCompType(EVT SrcVT) {
2748 if (SrcVT == MVT::f128)
2749 return MVT::f64;
2750 return SrcVT;
2751}
2752
2754 bool WithCMov) {
2755 if (SrcVT.isFloatingPoint()) {
2756 // For the case of floating point setcc, only unordered comparison
2757 // or general comparison with -enable-no-nans-fp-math option reach
2758 // here, so it is safe even if values are NaN. Only f128 doesn't
2759 // safe since VE uses f64 result of f128 comparison.
2760 return SrcVT != MVT::f128;
2761 }
2762 if (isIntEqualitySetCC(CC)) {
2763 // For the case of equal or not equal, it is safe without comparison with 0.
2764 return true;
2765 }
2766 if (WithCMov) {
2767 // For the case of integer setcc with cmov, all signed comparison with 0
2768 // are safe.
2769 return isSignedIntSetCC(CC);
2770 }
2771 // For the case of integer setcc, only signed 64 bits comparison is safe.
2772 // For unsigned, "CMPU 0x80000000, 0" has to be greater than 0, but it becomes
2773 // less than 0 witout CMPU. For 32 bits, other half of 32 bits are
2774 // uncoditional, so it is not safe too without CMPI..
2775 return isSignedIntSetCC(CC) && SrcVT == MVT::i64;
2776}
2777
2779 ISD::CondCode CC, bool WithCMov,
2780 const SDLoc &DL, SelectionDAG &DAG) {
2781 // Compare values. If RHS is 0 and it is safe to calculate without
2782 // comparison, we don't generate an instruction for comparison.
2783 EVT CompVT = decideCompType(VT);
2784 if (CompVT == VT && safeWithoutCompWithNull(VT, CC, WithCMov) &&
2786 return LHS;
2787 }
2788 return DAG.getNode(decideComp(VT, CC), DL, CompVT, LHS, RHS);
2789}
2790
2792 DAGCombinerInfo &DCI) const {
2793 assert(N->getOpcode() == ISD::SELECT &&
2794 "Should be called with a SELECT node");
2796 SDValue Cond = N->getOperand(0);
2797 SDValue True = N->getOperand(1);
2798 SDValue False = N->getOperand(2);
2799
2800 // We handle only scalar SELECT.
2801 EVT VT = N->getValueType(0);
2802 if (VT.isVector())
2803 return SDValue();
2804
2805 // Peform combineSelect after leagalize DAG.
2806 if (!DCI.isAfterLegalizeDAG())
2807 return SDValue();
2808
2809 EVT VT0 = Cond.getValueType();
2810 if (isMImm(True)) {
2811 // VE's condition move can handle MImm in True clause, so nothing to do.
2812 } else if (isMImm(False)) {
2813 // VE's condition move can handle MImm in True clause, so swap True and
2814 // False clauses if False has MImm value. And, update condition code.
2815 std::swap(True, False);
2816 CC = getSetCCInverse(CC, VT0);
2817 }
2818
2819 SDLoc DL(N);
2820 SelectionDAG &DAG = DCI.DAG;
2821 VECC::CondCode VECCVal;
2822 if (VT0.isFloatingPoint()) {
2823 VECCVal = fpCondCode2Fcc(CC);
2824 } else {
2825 VECCVal = intCondCode2Icc(CC);
2826 }
2827 SDValue Ops[] = {Cond, True, False,
2828 DAG.getConstant(VECCVal, DL, MVT::i32)};
2829 return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2830}
2831
2833 DAGCombinerInfo &DCI) const {
2834 assert(N->getOpcode() == ISD::SELECT_CC &&
2835 "Should be called with a SELECT_CC node");
2836 ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
2837 SDValue LHS = N->getOperand(0);
2838 SDValue RHS = N->getOperand(1);
2839 SDValue True = N->getOperand(2);
2840 SDValue False = N->getOperand(3);
2841
2842 // We handle only scalar SELECT_CC.
2843 EVT VT = N->getValueType(0);
2844 if (VT.isVector())
2845 return SDValue();
2846
2847 // Peform combineSelectCC after leagalize DAG.
2848 if (!DCI.isAfterLegalizeDAG())
2849 return SDValue();
2850
2851 // We handle only i32/i64/f32/f64/f128 comparisons.
2852 EVT LHSVT = LHS.getValueType();
2853 assert(LHSVT == RHS.getValueType());
2854 switch (LHSVT.getSimpleVT().SimpleTy) {
2855 case MVT::i32:
2856 case MVT::i64:
2857 case MVT::f32:
2858 case MVT::f64:
2859 case MVT::f128:
2860 break;
2861 default:
2862 // Return SDValue to let llvm handle other types.
2863 return SDValue();
2864 }
2865
2866 if (isMImm(RHS)) {
2867 // VE's comparison can handle MImm in RHS, so nothing to do.
2868 } else if (isSimm7(RHS)) {
2869 // VE's comparison can handle Simm7 in LHS, so swap LHS and RHS, and
2870 // update condition code.
2871 std::swap(LHS, RHS);
2872 CC = getSetCCSwappedOperands(CC);
2873 }
2874 if (isMImm(True)) {
2875 // VE's condition move can handle MImm in True clause, so nothing to do.
2876 } else if (isMImm(False)) {
2877 // VE's condition move can handle MImm in True clause, so swap True and
2878 // False clauses if False has MImm value. And, update condition code.
2879 std::swap(True, False);
2880 CC = getSetCCInverse(CC, LHSVT);
2881 }
2882
2883 SDLoc DL(N);
2884 SelectionDAG &DAG = DCI.DAG;
2885
2886 bool WithCMov = true;
2887 SDValue CompNode = generateComparison(LHSVT, LHS, RHS, CC, WithCMov, DL, DAG);
2888
2889 VECC::CondCode VECCVal;
2890 if (LHSVT.isFloatingPoint()) {
2891 VECCVal = fpCondCode2Fcc(CC);
2892 } else {
2893 VECCVal = intCondCode2Icc(CC);
2894 }
2895 SDValue Ops[] = {CompNode, True, False,
2896 DAG.getConstant(VECCVal, DL, MVT::i32)};
2897 return DAG.getNode(VEISD::CMOV, DL, VT, Ops);
2898}
2899
2900static bool isI32InsnAllUses(const SDNode *User, const SDNode *N);
2901static bool isI32Insn(const SDNode *User, const SDNode *N) {
2902 switch (User->getOpcode()) {
2903 default:
2904 return false;
2905 case ISD::ADD:
2906 case ISD::SUB:
2907 case ISD::MUL:
2908 case ISD::SDIV:
2909 case ISD::UDIV:
2910 case ISD::SETCC:
2911 case ISD::SMIN:
2912 case ISD::SMAX:
2913 case ISD::SHL:
2914 case ISD::SRA:
2915 case ISD::BSWAP:
2916 case ISD::SINT_TO_FP:
2917 case ISD::UINT_TO_FP:
2918 case ISD::BR_CC:
2919 case ISD::BITCAST:
2921 case ISD::ATOMIC_SWAP:
2922 case VEISD::CMPU:
2923 case VEISD::CMPI:
2924 return true;
2925 case ISD::SRL:
2926 if (N->getOperand(0).getOpcode() != ISD::SRL)
2927 return true;
2928 // (srl (trunc (srl ...))) may be optimized by combining srl, so
2929 // doesn't optimize trunc now.
2930 return false;
2931 case ISD::SELECT_CC:
2932 if (User->getOperand(2).getNode() != N &&
2933 User->getOperand(3).getNode() != N)
2934 return true;
2935 return isI32InsnAllUses(User, N);
2936 case VEISD::CMOV:
2937 // CMOV in (cmov (trunc ...), true, false, int-comparison) is safe.
2938 // However, trunc in true or false clauses is not safe.
2939 if (User->getOperand(1).getNode() != N &&
2940 User->getOperand(2).getNode() != N &&
2941 isa<ConstantSDNode>(User->getOperand(3))) {
2942 VECC::CondCode VECCVal = static_cast<VECC::CondCode>(
2943 cast<ConstantSDNode>(User->getOperand(3))->getZExtValue());
2944 return isIntVECondCode(VECCVal);
2945 }
2946 [[fallthrough]];
2947 case ISD::AND:
2948 case ISD::OR:
2949 case ISD::XOR:
2950 case ISD::SELECT:
2951 case ISD::CopyToReg:
2952 // Check all use of selections, bit operations, and copies. If all of them
2953 // are safe, optimize truncate to extract_subreg.
2954 return isI32InsnAllUses(User, N);
2955 }
2956}
2957
2958static bool isI32InsnAllUses(const SDNode *User, const SDNode *N) {
2959 // Check all use of User node. If all of them are safe, optimize
2960 // truncate to extract_subreg.
2961 for (const SDNode *U : User->uses()) {
2962 switch (U->getOpcode()) {
2963 default:
2964 // If the use is an instruction which treats the source operand as i32,
2965 // it is safe to avoid truncate here.
2966 if (isI32Insn(U, N))
2967 continue;
2968 break;
2969 case ISD::ANY_EXTEND:
2970 case ISD::SIGN_EXTEND:
2971 case ISD::ZERO_EXTEND: {
2972 // Special optimizations to the combination of ext and trunc.
2973 // (ext ... (select ... (trunc ...))) is safe to avoid truncate here
2974 // since this truncate instruction clears higher 32 bits which is filled
2975 // by one of ext instructions later.
2976 assert(N->getValueType(0) == MVT::i32 &&
2977 "find truncate to not i32 integer");
2978 if (User->getOpcode() == ISD::SELECT_CC ||
2979 User->getOpcode() == ISD::SELECT || User->getOpcode() == VEISD::CMOV)
2980 continue;
2981 break;
2982 }
2983 }
2984 return false;
2985 }
2986 return true;
2987}
2988
2989// Optimize TRUNCATE in DAG combining. Optimizing it in CUSTOM lower is
2990// sometime too early. Optimizing it in DAG pattern matching in VEInstrInfo.td
2991// is sometime too late. So, doing it at here.
2993 DAGCombinerInfo &DCI) const {
2994 assert(N->getOpcode() == ISD::TRUNCATE &&
2995 "Should be called with a TRUNCATE node");
2996
2997 SelectionDAG &DAG = DCI.DAG;
2998 SDLoc DL(N);
2999 EVT VT = N->getValueType(0);
3000
3001 // We prefer to do this when all types are legal.
3002 if (!DCI.isAfterLegalizeDAG())
3003 return SDValue();
3004
3005 // Skip combine TRUNCATE atm if the operand of TRUNCATE might be a constant.
3006 if (N->getOperand(0)->getOpcode() == ISD::SELECT_CC &&
3007 isa<ConstantSDNode>(N->getOperand(0)->getOperand(0)) &&
3008 isa<ConstantSDNode>(N->getOperand(0)->getOperand(1)))
3009 return SDValue();
3010
3011 // Check all use of this TRUNCATE.
3012 for (const SDNode *User : N->uses()) {
3013 // Make sure that we're not going to replace TRUNCATE for non i32
3014 // instructions.
3015 //
3016 // FIXME: Although we could sometimes handle this, and it does occur in
3017 // practice that one of the condition inputs to the select is also one of
3018 // the outputs, we currently can't deal with this.
3019 if (isI32Insn(User, N))
3020 continue;
3021
3022 return SDValue();
3023 }
3024
3025 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3026 return SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, VT,
3027 N->getOperand(0), SubI32),
3028 0);
3029}
3030
3032 DAGCombinerInfo &DCI) const {
3033 switch (N->getOpcode()) {
3034 default:
3035 break;
3036 case ISD::SELECT:
3037 return combineSelect(N, DCI);
3038 case ISD::SELECT_CC:
3039 return combineSelectCC(N, DCI);
3040 case ISD::TRUNCATE:
3041 return combineTRUNCATE(N, DCI);
3042 }
3043
3044 return SDValue();
3045}
3046
3047//===----------------------------------------------------------------------===//
3048// VE Inline Assembly Support
3049//===----------------------------------------------------------------------===//
3050
3053 if (Constraint.size() == 1) {
3054 switch (Constraint[0]) {
3055 default:
3056 break;
3057 case 'v': // vector registers
3058 return C_RegisterClass;
3059 }
3060 }
3061 return TargetLowering::getConstraintType(Constraint);
3062}
3063
3064std::pair<unsigned, const TargetRegisterClass *>
3066 StringRef Constraint,
3067 MVT VT) const {
3068 const TargetRegisterClass *RC = nullptr;
3069 if (Constraint.size() == 1) {
3070 switch (Constraint[0]) {
3071 default:
3072 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3073 case 'r':
3074 RC = &VE::I64RegClass;
3075 break;
3076 case 'v':
3077 RC = &VE::V64RegClass;
3078 break;
3079 }
3080 return std::make_pair(0U, RC);
3081 }
3082
3083 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
3084}
3085
3086//===----------------------------------------------------------------------===//
3087// VE Target Optimization Support
3088//===----------------------------------------------------------------------===//
3089
3091 // Specify 8 for PIC model to relieve the impact of PIC load instructions.
3092 if (isJumpTableRelative())
3093 return 8;
3094
3096}
3097
3099 EVT VT = Y.getValueType();
3100
3101 // VE doesn't have vector and not instruction.
3102 if (VT.isVector())
3103 return false;
3104
3105 // VE allows different immediate values for X and Y where ~X & Y.
3106 // Only simm7 works for X, and only mimm works for Y on VE. However, this
3107 // function is used to check whether an immediate value is OK for and-not
3108 // instruction as both X and Y. Generating additional instruction to
3109 // retrieve an immediate value is no good since the purpose of this
3110 // function is to convert a series of 3 instructions to another series of
3111 // 3 instructions with better parallelism. Therefore, we return false
3112 // for all immediate values now.
3113 // FIXME: Change hasAndNot function to have two operands to make it work
3114 // correctly with Aurora VE.
3115 if (isa<ConstantSDNode>(Y))
3116 return false;
3117
3118 // It's ok for generic registers.
3119 return true;
3120}
3121
3123 SelectionDAG &DAG) const {
3124 assert(Op.getOpcode() == ISD::EXTRACT_VECTOR_ELT && "Unknown opcode!");
3125 MVT VT = Op.getOperand(0).getSimpleValueType();
3126
3127 // Special treatment for packed V64 types.
3128 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3129 (void)VT;
3130 // Example of codes:
3131 // %packed_v = extractelt %vr, %idx / 2
3132 // %v = %packed_v >> (%idx % 2 * 32)
3133 // %res = %v & 0xffffffff
3134
3135 SDValue Vec = Op.getOperand(0);
3136 SDValue Idx = Op.getOperand(1);
3137 SDLoc DL(Op);
3138 SDValue Result = Op;
3139 if (false /* Idx->isConstant() */) {
3140 // TODO: optimized implementation using constant values
3141 } else {
3142 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3143 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3144 SDValue PackedElt =
3145 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3146 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3147 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3148 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3149 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3150 PackedElt = DAG.getNode(ISD::SRL, DL, MVT::i64, {PackedElt, Shift});
3151 SDValue Mask = DAG.getConstant(0xFFFFFFFFL, DL, MVT::i64);
3152 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3153 SDValue SubI32 = DAG.getTargetConstant(VE::sub_i32, DL, MVT::i32);
3154 Result = SDValue(DAG.getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL,
3155 MVT::i32, PackedElt, SubI32),
3156 0);
3157
3158 if (Op.getSimpleValueType() == MVT::f32) {
3159 Result = DAG.getBitcast(MVT::f32, Result);
3160 } else {
3161 assert(Op.getSimpleValueType() == MVT::i32);
3162 }
3163 }
3164 return Result;
3165}
3166
3168 SelectionDAG &DAG) const {
3169 assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
3170 MVT VT = Op.getOperand(0).getSimpleValueType();
3171
3172 // Special treatment for packed V64 types.
3173 assert(VT == MVT::v512i32 || VT == MVT::v512f32);
3174 (void)VT;
3175 // The v512i32 and v512f32 starts from upper bits (0..31). This "upper
3176 // bits" required `val << 32` from C implementation's point of view.
3177 //
3178 // Example of codes:
3179 // %packed_elt = extractelt %vr, (%idx >> 1)
3180 // %shift = ((%idx & 1) ^ 1) << 5
3181 // %packed_elt &= 0xffffffff00000000 >> shift
3182 // %packed_elt |= (zext %val) << shift
3183 // %vr = insertelt %vr, %packed_elt, (%idx >> 1)
3184
3185 SDLoc DL(Op);
3186 SDValue Vec = Op.getOperand(0);
3187 SDValue Val = Op.getOperand(1);
3188 SDValue Idx = Op.getOperand(2);
3189 if (Idx.getSimpleValueType() == MVT::i32)
3191 if (Val.getSimpleValueType() == MVT::f32)
3192 Val = DAG.getBitcast(MVT::i32, Val);
3194 Val = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i64, Val);
3195
3196 SDValue Result = Op;
3197 if (false /* Idx->isConstant()*/) {
3198 // TODO: optimized implementation using constant values
3199 } else {
3200 SDValue Const1 = DAG.getConstant(1, DL, MVT::i64);
3201 SDValue HalfIdx = DAG.getNode(ISD::SRL, DL, MVT::i64, {Idx, Const1});
3202 SDValue PackedElt =
3203 SDValue(DAG.getMachineNode(VE::LVSvr, DL, MVT::i64, {Vec, HalfIdx}), 0);
3204 SDValue AndIdx = DAG.getNode(ISD::AND, DL, MVT::i64, {Idx, Const1});
3205 SDValue Shift = DAG.getNode(ISD::XOR, DL, MVT::i64, {AndIdx, Const1});
3206 SDValue Const5 = DAG.getConstant(5, DL, MVT::i64);
3207 Shift = DAG.getNode(ISD::SHL, DL, MVT::i64, {Shift, Const5});
3208 SDValue Mask = DAG.getConstant(0xFFFFFFFF00000000L, DL, MVT::i64);
3209 Mask = DAG.getNode(ISD::SRL, DL, MVT::i64, {Mask, Shift});
3210 PackedElt = DAG.getNode(ISD::AND, DL, MVT::i64, {PackedElt, Mask});
3211 Val = DAG.getNode(ISD::SHL, DL, MVT::i64, {Val, Shift});
3212 PackedElt = DAG.getNode(ISD::OR, DL, MVT::i64, {PackedElt, Val});
3213 Result =
3214 SDValue(DAG.getMachineNode(VE::LSVrr_v, DL, Vec.getSimpleValueType(),
3215 {HalfIdx, PackedElt, Vec}),
3216 0);
3217 }
3218 return Result;
3219}
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
amdgpu Simplify well known AMD library false FunctionCallee Callee
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Function Alias Analysis Results
assume Assume Builder
SmallVector< MachineOperand, 4 > Cond
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
return RetTy
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Addr
uint64_t Size
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define RegName(no)
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
Module.h This file contains the declarations for the Module class.
LLVMContext & Context
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
const char LLVMTargetMachineRef TM
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static unsigned decideComp(EVT SrcVT, ISD::CondCode CC)
static bool isSimm7(SDValue V)
CCAssignFn * getParamCC(CallingConv::ID CallConv, bool IsVarArg)
static SDValue lowerLoadF128(SDValue Op, SelectionDAG &DAG)
static bool isMImm(SDValue V)
static SDValue prepareTS1AM(SDValue Op, SelectionDAG &DAG, SDValue &Flag, SDValue &Bits)
CCAssignFn * getReturnCC(CallingConv::ID CallConv)
static bool safeWithoutCompWithNull(EVT SrcVT, ISD::CondCode CC, bool WithCMov)
static bool isI32InsnAllUses(const SDNode *User, const SDNode *N)
static SDValue lowerLoadI1(SDValue Op, SelectionDAG &DAG)
static SDValue generateComparison(EVT VT, SDValue LHS, SDValue RHS, ISD::CondCode CC, bool WithCMov, const SDLoc &DL, SelectionDAG &DAG)
static EVT decideCompType(EVT SrcVT)
static bool isI32Insn(const SDNode *User, const SDNode *N)
static SDValue lowerFRAMEADDR(SDValue Op, SelectionDAG &DAG, const VETargetLowering &TLI, const VESubtarget *Subtarget)
static const MVT AllMaskVTs[]
static bool getUniqueInsertion(SDNode *N, unsigned &UniqueIdx)
static SDValue lowerRETURNADDR(SDValue Op, SelectionDAG &DAG, const VETargetLowering &TLI, const VESubtarget *Subtarget)
static const MVT AllVectorVTs[]
static const MVT AllPackedVTs[]
static SDValue finalizeTS1AM(SDValue Op, SelectionDAG &DAG, SDValue Data, SDValue Bits)
static SDValue lowerStoreF128(SDValue Op, SelectionDAG &DAG)
static SDValue lowerStoreI1(SDValue Op, SelectionDAG &DAG)
#define TARGET_NODE_CASE(NAME)
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition: APInt.h:75
an instruction that atomically reads a memory location, combines it with another value,...
Definition: Instructions.h:718
BinOp getOperation() const
Definition: Instructions.h:812
This is an SDNode representing atomic operations.
LLVM Basic Block Representation.
Definition: BasicBlock.h:56
CCState - This class holds information needed while lowering arguments and return values.
void AnalyzeCallResult(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeCallResult - Analyze the return values of a call, incorporating info about the passed values i...
unsigned AllocateStack(unsigned Size, Align Alignment)
AllocateStack - Allocate a chunk of stack space with the specified size and alignment.
unsigned getNextStackOffset() const
getNextStackOffset - Return the next stack offset such that all stack slots satisfy their alignment r...
bool CheckReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
CheckReturn - Analyze the return values of a function, returning true if the return can be performed ...
void AnalyzeReturn(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeReturn - Analyze the returned values of a return, incorporating info about the result values i...
void AnalyzeCallOperands(const SmallVectorImpl< ISD::OutputArg > &Outs, CCAssignFn Fn)
AnalyzeCallOperands - Analyze the outgoing arguments to a call, incorporating info about the passed v...
void AnalyzeFormalArguments(const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn Fn)
AnalyzeFormalArguments - Analyze an array of argument values, incorporating info about the formals in...
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
unsigned getLocMemOffset() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
A debug info location.
Definition: DebugLoc.h:33
unsigned size() const
Definition: DenseMap.h:99
unsigned getAddressSpace() const
Definition: GlobalValue.h:201
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:652
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:94
bool hasAtomicStore() const LLVM_READONLY
Return true if this atomic instruction stores to memory.
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
This class is used to represent ISD::LOAD nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
static const MCBinaryExpr * createSub(const MCExpr *LHS, const MCExpr *RHS, MCContext &Ctx)
Definition: MCExpr.h:610
Context object for machine code objects.
Definition: MCContext.h:76
MCSymbol * getOrCreateSymbol(const Twine &Name)
Lookup the symbol inside with the specified Name.
Definition: MCContext.cpp:201
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:35
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:386
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:41
Machine Value Type.
SimpleValueType SimpleTy
uint64_t getScalarSizeInBits() const
unsigned getVectorNumElements() const
static auto integer_valuetypes()
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
static auto vector_valuetypes()
MVT getVectorElementType() const
static MVT getIntegerVT(unsigned BitWidth)
static auto fp_valuetypes()
void transferSuccessorsAndUpdatePHIs(MachineBasicBlock *FromMBB)
Transfers all the successors, as in transferSuccessors, and update PHI operands in the successor bloc...
bool isEHPad() const
Returns true if the block is a landing pad.
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
const BasicBlock * getBasicBlock() const
Return the LLVM basic block that this instance corresponded to originally.
void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
void removeSuccessor(MachineBasicBlock *Succ, bool NormalizeSuccProbs=false)
Remove successor from the successors list of this MachineBasicBlock.
succ_reverse_iterator succ_rbegin()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
void splice(iterator Where, MachineBasicBlock *Other, iterator From)
Take an instruction from MBB 'Other' at the position From, and insert it into this MBB right before '...
succ_reverse_iterator succ_rend()
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
void setIsEHPad(bool V=true)
Indicates the block is a landing pad.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setFrameAddressIsTaken(bool T)
void setReturnAddressIsTaken(bool s)
int getFunctionContextIndex() const
Return the index for the function context object.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *bb=nullptr)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
unsigned getFunctionNumber() const
getFunctionNumber - Return a unique ID for the current function.
MachineJumpTableInfo * getOrCreateJumpTableInfo(unsigned JTEntryKind)
getOrCreateJumpTableInfo - Get the JumpTableInfo for this function, if it does already exist,...
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
void push_back(MachineBasicBlock *MBB)
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
bool hasCallSiteLandingPad(MCSymbol *Sym)
Return true if the landing pad Eh symbol has an associated call site.
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
void insert(iterator MBBI, MachineBasicBlock *MBB)
SmallVectorImpl< unsigned > & getCallSiteLandingPad(MCSymbol *Sym)
Get the call site indexes for a landing pad EH symbol.
const MachineInstrBuilder & setMemRefs(ArrayRef< MachineMemOperand * > MMOs) const
const MachineInstrBuilder & addExternalSymbol(const char *FnName, unsigned TargetFlags=0) const
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addRegMask(const uint32_t *Mask) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addJumpTableIndex(unsigned Idx, unsigned TargetFlags=0) const
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned createJumpTableIndex(const std::vector< MachineBasicBlock * > &DestBBs)
createJumpTableIndex - Create a new jump table.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_BlockAddress
EK_BlockAddress - Each entry is a plain address of block, e.g.: .word LBB123.
Flags
Flags values. These may be or'd together.
@ MOVolatile
The memory access is volatile.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const MCPhysReg * getCalleeSavedRegs() const
Returns list of callee saved registers.
Align getAlign() const
bool isVolatile() const
const MachinePointerInfo & getPointerInfo() const
const SDValue & getChain() const
EVT getMemoryVT() const
Return the type of the in-memory value.
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
Represents one node in the SelectionDAG.
unsigned getOpcode() const
Return the SelectionDAG opcode value for this node.
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
bool isUndef() const
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:225
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:721
SDValue getMergeValues(ArrayRef< SDValue > Ops, const SDLoc &dl)
Create a MERGE_VALUES node from the given operands.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
MachineSDNode * getMachineNode(unsigned Opcode, const SDLoc &dl, EVT VT)
These are used for target selectors to create a new node with specified return type(s),...
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getAtomic(unsigned Opcode, const SDLoc &dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO)
Gets a node for an atomic op, produces result (if relevant) and chain and takes 2 operands.
SDValue getTargetJumpTable(int JTI, EVT VT, unsigned TargetFlags=0)
Definition: SelectionDAG.h:731
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:472
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, bool isTargetGA=false, unsigned TargetFlags=0)
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:473
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:773
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:675
SDValue getTargetBlockAddress(const BlockAddress *BA, EVT VT, int64_t Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:768
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:469
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:799
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getRegisterMask(const uint32_t *RegMask)
LLVMContext * getContext() const
Definition: SelectionDAG.h:485
SDValue getTargetExternalSymbol(const char *Sym, EVT VT, unsigned TargetFlags=0)
SDValue getTargetConstantPool(const Constant *C, EVT VT, MaybeAlign Align=std::nullopt, int Offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:737
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:554
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:365
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:450
bool empty() const
Definition: SmallVector.h:94
size_t size() const
Definition: SmallVector.h:91
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
This class is used to represent ISD::STORE nodes.
const SDValue & getBasePtr() const
const SDValue & getOffset() const
const SDValue & getValue() const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr size_t size() const
size - Get the string size.
Definition: StringRef.h:137
const char * data() const
data - Get a pointer to the start of the string (which may not be null terminated).
Definition: StringRef.h:131
A switch()-like statement whose cases are string literals.
Definition: StringSwitch.h:44
StringSwitch & Case(StringLiteral S, T Value)
Definition: StringSwitch.h:69
R Default(T Value)
Definition: StringSwitch.h:182
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
TargetInstrInfo - Interface to description of machine instruction set.
void setBooleanVectorContents(BooleanContent Ty)
Specify how the target extends the result of a vector boolean value from a vector of i1 to a wider ty...
void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action)
Indicate that the specified operation does not work with the specified type and indicate what to do a...
LegalizeAction
This enum indicates whether operations are valid for a target, and if not, what action should be used...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
void setMinStackArgumentAlignment(Align Alignment)
Set the minimum stack alignment of an argument.
virtual unsigned getMinimumJumpTableEntries() const
Return lower limit for number of blocks in a jump table.
const TargetMachine & getTargetMachine() const
void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits)
Set the maximum atomic operation size supported by the backend.
void setMinFunctionAlignment(Align Alignment)
Set the target's minimum function alignment.
void setBooleanContents(BooleanContent Ty)
Specify how the target extends the result of integer and floating point boolean values from i1 to a w...
void computeRegisterProperties(const TargetRegisterInfo *TRI)
Once all of the register classes are added, this allows us to compute derived properties we expose.
void addRegisterClass(MVT VT, const TargetRegisterClass *RC)
Add the specified register class as an available regclass for the specified value type.
void setSupportsUnalignedAtomics(bool UnalignedSupported)
Sets whether unaligned atomic operations are supported.
virtual bool isJumpTableRelative() const
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
void setLibcallName(RTLIB::Libcall Call, const char *Name)
Rename the default libcall routine name for the specified libcall.
void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified truncating store does not work with the specified type and indicate what ...
void setMinCmpXchgSizeInBits(unsigned SizeInBits)
Sets the minimum cmpxchg or ll/sc size supported by the backend.
void setStackPointerRegisterToSaveRestore(Register R)
If set to a physical register, this specifies the register that llvm.savestack/llvm....
AtomicExpansionKind
Enum that specifies what an atomic load/AtomicRMWInst is expanded to, if at all.
void setTargetDAGCombine(ArrayRef< ISD::NodeType > NTs)
Targets should invoke this method for each target independent node that they want to provide a custom...
void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, LegalizeAction Action)
Indicate that the specified load with extension does not work with the specified type and indicate wh...
std::vector< ArgListEntry > ArgListTy
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual ConstraintType getConstraintType(StringRef Constraint) const
Given a constraint, return the type of constraint it is for this target.
std::pair< SDValue, SDValue > LowerCallTo(CallLoweringInfo &CLI) const
This function lowers an abstract call to a function into an actual call.
bool isPositionIndependent() const
virtual std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const
Given a physical register constraint (e.g.
bool verifyReturnAddressArgumentIsConstant(SDValue Op, SelectionDAG &DAG) const
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
Primary interface to the complete machine description for the target machine.
Definition: TargetMachine.h:78
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition: Twine.h:81
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
static Type * getVoidTy(LLVMContext &C)
Value * getOperand(unsigned i) const
Definition: User.h:169
SDValue getBroadcast(EVT ResultVT, SDValue Scalar, SDValue AVL) const
SDValue getNode(unsigned OC, SDVTList VTL, ArrayRef< SDValue > OpV, std::optional< SDNodeFlags > Flags=std::nullopt) const
getNode {
Definition: VECustomDAG.h:156
SDValue getUNDEF(EVT VT) const
Definition: VECustomDAG.h:180
SDValue getConstant(uint64_t Val, EVT VT, bool IsTarget=false, bool IsOpaque=false) const
bool hasBP(const MachineFunction &MF) const
Register getGlobalBaseReg(MachineFunction *MF) const
} Optimization
@ VK_VE_GOTOFF_HI32
Definition: VEMCExpr.h:34
@ VK_VE_GOTOFF_LO32
Definition: VEMCExpr.h:35
bool enableVPU() const
Definition: VESubtarget.h:65
unsigned getRsaSize() const
Get the size of RSA, return address, and frame pointer as described in VEFrameLowering....
Definition: VESubtarget.h:79
const VEInstrInfo * getInstrInfo() const override
Definition: VESubtarget.h:51
const VEFrameLowering * getFrameLowering() const override
Definition: VESubtarget.h:52
const VERegisterInfo * getRegisterInfo() const override
Definition: VESubtarget.h:55
SDValue splitMaskArithmetic(SDValue Op, SelectionDAG &DAG) const
SDValue lowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) const
std::pair< unsigned, const TargetRegisterClass * > getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override
Given a physical register constraint (e.g.
const char * getTargetNodeName(unsigned Opcode) const override
This method returns the name of a target specific DAG node.
bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const override
Return true if folding a constant offset with the given GlobalAddress is legal.
SDValue lowerToVVP(SDValue Op, SelectionDAG &DAG) const
} Custom Inserter
SDValue lowerJumpTable(SDValue Op, SelectionDAG &DAG) const
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
getSetCCResultType - Return the ISD::SETCC ValueType
SDValue lowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const
bool hasAndNot(SDValue Y) const override
Return true if the target has a bitwise and-not operation: X = ~A & B This can be used to simplify se...
SDValue lowerVAARG(SDValue Op, SelectionDAG &DAG) const
SDValue combineSelect(SDNode *N, DAGCombinerInfo &DCI) const
bool isFPImmLegal(const APFloat &Imm, EVT VT, bool ForCodeSize) const override
isFPImmLegal - Returns true if the target can instruction select the specified FP immediate natively.
VETargetLowering(const TargetMachine &TM, const VESubtarget &STI)
Instruction * emitLeadingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Custom Lower {.
SDValue lowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG) const
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &ArgsFlags, LLVMContext &Context) const override
This hook should be implemented to check whether the return values described by the Outs array can fi...
SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const override
This callback is invoked for operations that are unsupported by the target, which are registered to u...
SDValue lowerLOAD(SDValue Op, SelectionDAG &DAG) const
MachineBasicBlock * emitEHSjLjLongJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const override
} VVPLowering
TargetLoweringBase::AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *AI) const override
Returns how the IR-level AtomicExpand pass should expand the given AtomicRMW, if at all.
SDValue lowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const
SDValue combineSelectCC(SDNode *N, DAGCombinerInfo &DCI) const
SDValue lowerEH_SJLJ_SETUP_DISPATCH(SDValue Op, SelectionDAG &DAG) const
unsigned getMinimumJumpTableEntries() const override
} Inline Assembly
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const
SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::OutputArg > &Outs, const SmallVectorImpl< SDValue > &OutVals, const SDLoc &dl, SelectionDAG &DAG) const override
This hook must be implemented to lower outgoing return values, described by the Outs array,...
MachineBasicBlock * emitSjLjDispatchBlock(MachineInstr &MI, MachineBasicBlock *BB) const
Instruction * emitTrailingFence(IRBuilderBase &Builder, Instruction *Inst, AtomicOrdering Ord) const override
Register prepareMBB(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, MachineBasicBlock *TargetBB, const DebugLoc &DL) const
void setupEntryBlockForSjLj(MachineInstr &MI, MachineBasicBlock *MBB, MachineBasicBlock *DispatchBB, int FI, int Offset) const
SDValue lowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const
MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const override
Custom Inserter {.
MachineBasicBlock * emitEHSjLjSetJmp(MachineInstr &MI, MachineBasicBlock *MBB) const
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align A, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const
SDValue lowerSTORE(SDValue Op, SelectionDAG &DAG) const
TargetLoweringBase::LegalizeAction getCustomOperationAction(SDNode &) const override
Custom Lower {.
SDValue makeAddress(SDValue Op, SelectionDAG &DAG) const
SDValue lowerConstantPool(SDValue Op, SelectionDAG &DAG) const
SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const
SDValue legalizeInternalVectorOp(SDValue Op, SelectionDAG &DAG) const
Register prepareSymbol(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, StringRef Symbol, const DebugLoc &DL, bool IsLocal, bool IsCall) const
SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl< ISD::InputArg > &Ins, const SDLoc &dl, SelectionDAG &DAG, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower the incoming (formal) arguments, described by the Ins array,...
void ReplaceNodeResults(SDNode *N, SmallVectorImpl< SDValue > &Results, SelectionDAG &DAG) const override
} Custom Lower
SDValue lowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) const
SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl< SDValue > &InVals) const override
This hook must be implemented to lower calls into the specified DAG.
SDValue withTargetFlags(SDValue Op, unsigned TF, SelectionDAG &DAG) const
} Custom DAGCombine
SDValue combineTRUNCATE(SDNode *N, DAGCombinerInfo &DCI) const
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned Uid, MCContext &Ctx) const override
SDValue lowerVASTART(SDValue Op, SelectionDAG &DAG) const
unsigned getJumpTableEncoding() const override
JumpTable for VE.
SDValue lowerATOMIC_SWAP(SDValue Op, SelectionDAG &DAG) const
SDValue lowerBlockAddress(SDValue Op, SelectionDAG &DAG) const
Register getRegisterByName(const char *RegName, LLT VT, const MachineFunction &MF) const override
Return the register ID of the name passed in.
SDValue makeHiLoPair(SDValue Op, unsigned HiTF, unsigned LoTF, SelectionDAG &DAG) const
ConstraintType getConstraintType(StringRef Constraint) const override
Inline Assembly {.
SDValue lowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
SDValue lowerToTLSGeneralDynamicModel(SDValue Op, SelectionDAG &DAG) const
LLVM Value Representation.
Definition: Value.h:74
iterator_range< use_iterator > uses()
Definition: Value.h:376
self_iterator getIterator()
Definition: ilist_node.h:82
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ SETCC
SetCC operator - This evaluates to a true value iff the condition is true.
Definition: ISDOpcodes.h:749
@ STACKRESTORE
STACKRESTORE has two operands, an input chain and a pointer to restore to it returns an output chain.
Definition: ISDOpcodes.h:1069
@ STACKSAVE
STACKSAVE - STACKSAVE has one operand, an input chain.
Definition: ISDOpcodes.h:1065
@ CTLZ_ZERO_UNDEF
Definition: ISDOpcodes.h:722
@ VECREDUCE_SMIN
Definition: ISDOpcodes.h:1303
@ EH_SJLJ_LONGJMP
OUTCHAIN = EH_SJLJ_LONGJMP(INCHAIN, buffer) This corresponds to the eh.sjlj.longjmp intrinsic.
Definition: ISDOpcodes.h:147
@ SMUL_LOHI
SMUL_LOHI/UMUL_LOHI - Multiply two integers of type iN, producing a signed/unsigned value of type i[2...
Definition: ISDOpcodes.h:250
@ ATOMIC_LOAD_NAND
Definition: ISDOpcodes.h:1206
@ BSWAP
Byte Swap and Counting operators.
Definition: ISDOpcodes.h:713
@ VAEND
VAEND, VASTART - VAEND and VASTART have three operands: an input chain, pointer, and a SRCVALUE.
Definition: ISDOpcodes.h:1098
@ ConstantFP
Definition: ISDOpcodes.h:77
@ ATOMIC_LOAD_MAX
Definition: ISDOpcodes.h:1208
@ ATOMIC_LOAD_UMIN
Definition: ISDOpcodes.h:1209
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:239
@ LOAD
LOAD and STORE have token chains as their first operand, then the same operands as an LLVM load/store...
Definition: ISDOpcodes.h:978
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:779
@ FMA
FMA - Perform a * b + c with no intermediate rounding step.
Definition: ISDOpcodes.h:482
@ RETURNADDR
Definition: ISDOpcodes.h:95
@ EH_SJLJ_SETUP_DISPATCH
OUTCHAIN = EH_SJLJ_SETUP_DISPATCH(INCHAIN) The target initializes the dispatch table here.
Definition: ISDOpcodes.h:151
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ ATOMIC_CMP_SWAP_WITH_SUCCESS
Val, Success, OUTCHAIN = ATOMIC_CMP_SWAP_WITH_SUCCESS(INCHAIN, ptr, cmp, swap) N.b.
Definition: ISDOpcodes.h:1191
@ SINT_TO_FP
[SU]INT_TO_FP - These operators convert integers (whose interpreted sign depends on the first letter)...
Definition: ISDOpcodes.h:786
@ MEMBARRIER
MEMBARRIER - Compiler barrier only; generate a no-op.
Definition: ISDOpcodes.h:1165
@ ATOMIC_FENCE
OUTCHAIN = ATOMIC_FENCE(INCHAIN, ordering, scope) This corresponds to the fence instruction.
Definition: ISDOpcodes.h:1170
@ SDIVREM
SDIVREM/UDIVREM - Divide two integers and produce both a quotient and remainder result.
Definition: ISDOpcodes.h:255
@ VECREDUCE_SMAX
Definition: ISDOpcodes.h:1302
@ FP16_TO_FP
FP16_TO_FP, FP_TO_FP16 - These operators are used to perform promotions and truncation for half-preci...
Definition: ISDOpcodes.h:908
@ ATOMIC_LOAD_OR
Definition: ISDOpcodes.h:1204
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:898
@ ATOMIC_LOAD_XOR
Definition: ISDOpcodes.h:1205
@ GlobalTLSAddress
Definition: ISDOpcodes.h:79
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:773
@ FNEG
Perform various unary floating-point operations inspired by libm.
Definition: ISDOpcodes.h:923
@ BR_CC
BR_CC - Conditional branch.
Definition: ISDOpcodes.h:1020
@ ATOMIC_LOAD_MIN
Definition: ISDOpcodes.h:1207
@ BR_JT
BR_JT - Jumptable branch.
Definition: ISDOpcodes.h:1003
@ SELECT
Select(COND, TRUEVAL, FALSEVAL).
Definition: ISDOpcodes.h:726
@ VECREDUCE_UMAX
Definition: ISDOpcodes.h:1304
@ VACOPY
VACOPY - VACOPY has 5 operands: an input chain, a destination pointer, a source pointer,...
Definition: ISDOpcodes.h:1094
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:208
@ VECREDUCE_ADD
Integer reductions may have a result type larger than the vector element type.
Definition: ISDOpcodes.h:1297
@ MULHU
MULHU/MULHS - Multiply high - Multiply two integers of type iN, producing an unsigned/signed value of...
Definition: ISDOpcodes.h:650
@ SHL
Shift and rotation operations.
Definition: ISDOpcodes.h:704
@ ATOMIC_LOAD_CLR
Definition: ISDOpcodes.h:1203
@ ATOMIC_LOAD_AND
Definition: ISDOpcodes.h:1202
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:534
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...