LLVM 20.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86CallingConv.h"
16#include "X86FrameLowering.h"
17#include "X86ISelLowering.h"
18#include "X86InstrBuilder.h"
20#include "X86TargetMachine.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Module.h"
29
30#define DEBUG_TYPE "x86-isel"
31
32using namespace llvm;
33
34STATISTIC(NumTailCalls, "Number of tail calls");
35
36/// Call this when the user attempts to do something unsupported, like
37/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38/// report_fatal_error, so calling code should attempt to recover without
39/// crashing.
40static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41 const char *Msg) {
43 DAG.getContext()->diagnose(
45}
46
47/// Returns true if a CC can dynamically exclude a register from the list of
48/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49/// the return registers.
51 switch (CC) {
52 default:
53 return false;
57 return true;
58 }
59}
60
61/// Returns true if a CC can dynamically exclude a register from the list of
62/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63/// the parameters.
66}
67
68static std::pair<MVT, unsigned>
70 const X86Subtarget &Subtarget) {
71 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72 // convention is one that uses k registers.
73 if (NumElts == 2)
74 return {MVT::v2i64, 1};
75 if (NumElts == 4)
76 return {MVT::v4i32, 1};
77 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
79 return {MVT::v8i16, 1};
80 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
82 return {MVT::v16i8, 1};
83 // v32i1 passes in ymm unless we have BWI and the calling convention is
84 // regcall.
85 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
86 return {MVT::v32i8, 1};
87 // Split v64i1 vectors if we don't have v64i8 available.
88 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89 if (Subtarget.useAVX512Regs())
90 return {MVT::v64i8, 1};
91 return {MVT::v32i8, 2};
92 }
93
94 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
96 NumElts > 64)
97 return {MVT::i8, NumElts};
98
100}
101
104 EVT VT) const {
105 if (VT.isVector()) {
106 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107 unsigned NumElts = VT.getVectorNumElements();
108
109 MVT RegisterVT;
110 unsigned NumRegisters;
111 std::tie(RegisterVT, NumRegisters) =
112 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114 return RegisterVT;
115 }
116
117 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
118 return MVT::v8f16;
119 }
120
121 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
123 !Subtarget.hasX87())
124 return MVT::i32;
125
126 if (isTypeLegal(MVT::f16)) {
127 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
129 Context, CC, VT.changeVectorElementType(MVT::f16));
130
131 if (VT == MVT::bf16)
132 return MVT::f16;
133 }
134
136}
137
140 EVT VT) const {
141 if (VT.isVector()) {
142 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
143 unsigned NumElts = VT.getVectorNumElements();
144
145 MVT RegisterVT;
146 unsigned NumRegisters;
147 std::tie(RegisterVT, NumRegisters) =
148 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
149 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
150 return NumRegisters;
151 }
152
153 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
154 return 1;
155 }
156
157 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
158 // x87 is disabled.
159 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
160 if (VT == MVT::f64)
161 return 2;
162 if (VT == MVT::f80)
163 return 3;
164 }
165
166 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
167 isTypeLegal(MVT::f16))
168 return getNumRegistersForCallingConv(Context, CC,
169 VT.changeVectorElementType(MVT::f16));
170
172}
173
175 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
176 unsigned &NumIntermediates, MVT &RegisterVT) const {
177 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
178 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
179 Subtarget.hasAVX512() &&
181 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
182 VT.getVectorNumElements() > 64)) {
183 RegisterVT = MVT::i8;
184 IntermediateVT = MVT::i1;
185 NumIntermediates = VT.getVectorNumElements();
186 return NumIntermediates;
187 }
188
189 // Split v64i1 vectors if we don't have v64i8 available.
190 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
192 RegisterVT = MVT::v32i8;
193 IntermediateVT = MVT::v32i1;
194 NumIntermediates = 2;
195 return 2;
196 }
197
198 // Split vNbf16 vectors according to vNf16.
199 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
200 isTypeLegal(MVT::f16))
201 VT = VT.changeVectorElementType(MVT::f16);
202
203 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
204 NumIntermediates, RegisterVT);
205}
206
208 LLVMContext& Context,
209 EVT VT) const {
210 if (!VT.isVector())
211 return MVT::i8;
212
213 if (Subtarget.hasAVX512()) {
214 // Figure out what this type will be legalized to.
215 EVT LegalVT = VT;
216 while (getTypeAction(Context, LegalVT) != TypeLegal)
217 LegalVT = getTypeToTransformTo(Context, LegalVT);
218
219 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
220 if (LegalVT.getSimpleVT().is512BitVector())
221 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
222
223 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
224 // If we legalized to less than a 512-bit vector, then we will use a vXi1
225 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
226 // vXi16/vXi8.
227 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
228 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
229 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
230 }
231 }
232
234}
235
236/// Helper for getByValTypeAlignment to determine
237/// the desired ByVal argument alignment.
238static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
239 if (MaxAlign == 16)
240 return;
241 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
242 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
243 MaxAlign = Align(16);
244 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
245 Align EltAlign;
246 getMaxByValAlign(ATy->getElementType(), EltAlign);
247 if (EltAlign > MaxAlign)
248 MaxAlign = EltAlign;
249 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
250 for (auto *EltTy : STy->elements()) {
251 Align EltAlign;
252 getMaxByValAlign(EltTy, EltAlign);
253 if (EltAlign > MaxAlign)
254 MaxAlign = EltAlign;
255 if (MaxAlign == 16)
256 break;
257 }
258 }
259}
260
261/// Return the desired alignment for ByVal aggregate
262/// function arguments in the caller parameter area. For X86, aggregates
263/// that contain SSE vectors are placed at 16-byte boundaries while the rest
264/// are at 4-byte boundaries.
266 const DataLayout &DL) const {
267 if (Subtarget.is64Bit())
268 return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
269
270 Align Alignment(4);
271 if (Subtarget.hasSSE1())
272 getMaxByValAlign(Ty, Alignment);
273 return Alignment;
274}
275
276/// It returns EVT::Other if the type should be determined using generic
277/// target-independent logic.
278/// For vector ops we check that the overall size isn't larger than our
279/// preferred vector width.
281 const MemOp &Op, const AttributeList &FuncAttributes) const {
282 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
283 if (Op.size() >= 16 &&
284 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
285 // FIXME: Check if unaligned 64-byte accesses are slow.
286 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
287 (Subtarget.getPreferVectorWidth() >= 512)) {
288 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
289 }
290 // FIXME: Check if unaligned 32-byte accesses are slow.
291 if (Op.size() >= 32 && Subtarget.hasAVX() &&
292 Subtarget.useLight256BitInstructions()) {
293 // Although this isn't a well-supported type for AVX1, we'll let
294 // legalization and shuffle lowering produce the optimal codegen. If we
295 // choose an optimal type with a vector element larger than a byte,
296 // getMemsetStores() may create an intermediate splat (using an integer
297 // multiply) before we splat as a vector.
298 return MVT::v32i8;
299 }
300 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
301 return MVT::v16i8;
302 // TODO: Can SSE1 handle a byte vector?
303 // If we have SSE1 registers we should be able to use them.
304 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
305 (Subtarget.getPreferVectorWidth() >= 128))
306 return MVT::v4f32;
307 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
308 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
309 // Do not use f64 to lower memcpy if source is string constant. It's
310 // better to use i32 to avoid the loads.
311 // Also, do not use f64 to lower memset unless this is a memset of zeros.
312 // The gymnastics of splatting a byte value into an XMM register and then
313 // only using 8-byte stores (because this is a CPU with slow unaligned
314 // 16-byte accesses) makes that a loser.
315 return MVT::f64;
316 }
317 }
318 // This is a compromise. If we reach here, unaligned accesses may be slow on
319 // this target. However, creating smaller, aligned accesses could be even
320 // slower and would certainly be a lot more code.
321 if (Subtarget.is64Bit() && Op.size() >= 8)
322 return MVT::i64;
323 return MVT::i32;
324}
325
327 if (VT == MVT::f32)
328 return Subtarget.hasSSE1();
329 if (VT == MVT::f64)
330 return Subtarget.hasSSE2();
331 return true;
332}
333
334static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
335 return (8 * Alignment.value()) % SizeInBits == 0;
336}
337
339 if (isBitAligned(Alignment, VT.getSizeInBits()))
340 return true;
341 switch (VT.getSizeInBits()) {
342 default:
343 // 8-byte and under are always assumed to be fast.
344 return true;
345 case 128:
346 return !Subtarget.isUnalignedMem16Slow();
347 case 256:
348 return !Subtarget.isUnalignedMem32Slow();
349 // TODO: What about AVX-512 (512-bit) accesses?
350 }
351}
352
354 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
355 unsigned *Fast) const {
356 if (Fast)
357 *Fast = isMemoryAccessFast(VT, Alignment);
358 // NonTemporal vector memory ops must be aligned.
359 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
360 // NT loads can only be vector aligned, so if its less aligned than the
361 // minimum vector size (which we can split the vector down to), we might as
362 // well use a regular unaligned vector load.
363 // We don't have any NT loads pre-SSE41.
364 if (!!(Flags & MachineMemOperand::MOLoad))
365 return (Alignment < 16 || !Subtarget.hasSSE41());
366 return false;
367 }
368 // Misaligned accesses of any size are always allowed.
369 return true;
370}
371
373 const DataLayout &DL, EVT VT,
374 unsigned AddrSpace, Align Alignment,
376 unsigned *Fast) const {
377 if (Fast)
378 *Fast = isMemoryAccessFast(VT, Alignment);
379 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
380 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
381 /*Fast=*/nullptr))
382 return true;
383 // NonTemporal vector memory ops are special, and must be aligned.
384 if (!isBitAligned(Alignment, VT.getSizeInBits()))
385 return false;
386 switch (VT.getSizeInBits()) {
387 case 128:
388 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
389 return true;
390 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
391 return true;
392 return false;
393 case 256:
394 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
395 return true;
396 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
397 return true;
398 return false;
399 case 512:
400 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
401 return true;
402 return false;
403 default:
404 return false; // Don't have NonTemporal vector memory ops of this size.
405 }
406 }
407 return true;
408}
409
410/// Return the entry encoding for a jump table in the
411/// current function. The returned value is a member of the
412/// MachineJumpTableInfo::JTEntryKind enum.
414 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
415 // symbol.
416 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
418 if (isPositionIndependent() &&
420 !Subtarget.isTargetCOFF())
422
423 // Otherwise, use the normal jump table encoding heuristics.
425}
426
428 return Subtarget.useSoftFloat();
429}
430
432 ArgListTy &Args) const {
433
434 // Only relabel X86-32 for C / Stdcall CCs.
435 if (Subtarget.is64Bit())
436 return;
438 return;
439 unsigned ParamRegs = 0;
440 if (auto *M = MF->getFunction().getParent())
441 ParamRegs = M->getNumberRegisterParameters();
442
443 // Mark the first N int arguments as having reg
444 for (auto &Arg : Args) {
445 Type *T = Arg.Ty;
446 if (T->isIntOrPtrTy())
447 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
448 unsigned numRegs = 1;
449 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
450 numRegs = 2;
451 if (ParamRegs < numRegs)
452 return;
453 ParamRegs -= numRegs;
454 Arg.IsInReg = true;
455 }
456 }
457}
458
459const MCExpr *
461 const MachineBasicBlock *MBB,
462 unsigned uid,MCContext &Ctx) const{
464 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
465 // entries.
468}
469
470/// Returns relocation base for the given PIC jumptable.
472 SelectionDAG &DAG) const {
473 if (!Subtarget.is64Bit())
474 // This doesn't have SDLoc associated with it, but is not really the
475 // same as a Register.
476 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
478 return Table;
479}
480
481/// This returns the relocation base for the given PIC jumptable,
482/// the same as getPICJumpTableRelocBase, but as an MCExpr.
484getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
485 MCContext &Ctx) const {
486 // X86-64 uses RIP relative addressing based on the jump table label.
487 if (Subtarget.isPICStyleRIPRel() ||
488 (Subtarget.is64Bit() &&
491
492 // Otherwise, the reference is relative to the PIC base.
493 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
494}
495
496std::pair<const TargetRegisterClass *, uint8_t>
498 MVT VT) const {
499 const TargetRegisterClass *RRC = nullptr;
500 uint8_t Cost = 1;
501 switch (VT.SimpleTy) {
502 default:
504 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
505 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
506 break;
507 case MVT::x86mmx:
508 RRC = &X86::VR64RegClass;
509 break;
510 case MVT::f32: case MVT::f64:
511 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
512 case MVT::v4f32: case MVT::v2f64:
513 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
514 case MVT::v8f32: case MVT::v4f64:
515 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
516 case MVT::v16f32: case MVT::v8f64:
517 RRC = &X86::VR128XRegClass;
518 break;
519 }
520 return std::make_pair(RRC, Cost);
521}
522
523unsigned X86TargetLowering::getAddressSpace() const {
524 if (Subtarget.is64Bit())
526 : X86AS::FS;
527 return X86AS::GS;
528}
529
530static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
531 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
532 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
533}
534
536 int Offset, unsigned AddressSpace) {
538 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
540}
541
543 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
544 // tcbhead_t; use it instead of the usual global variable (see
545 // sysdeps/{i386,x86_64}/nptl/tls.h)
546 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
547 unsigned AddressSpace = getAddressSpace();
548
549 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
550 if (Subtarget.isTargetFuchsia())
551 return SegmentOffset(IRB, 0x10, AddressSpace);
552
553 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
554 // Specially, some users may customize the base reg and offset.
555 int Offset = M->getStackProtectorGuardOffset();
556 // If we don't set -stack-protector-guard-offset value:
557 // %fs:0x28, unless we're using a Kernel code model, in which case
558 // it's %gs:0x28. gs:0x14 on i386.
559 if (Offset == INT_MAX)
560 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
561
562 StringRef GuardReg = M->getStackProtectorGuardReg();
563 if (GuardReg == "fs")
565 else if (GuardReg == "gs")
567
568 // Use symbol guard if user specify.
569 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
570 if (!GuardSymb.empty()) {
571 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
572 if (!GV) {
573 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
574 : Type::getInt32Ty(M->getContext());
575 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
576 nullptr, GuardSymb, nullptr,
578 if (!Subtarget.isTargetDarwin())
579 GV->setDSOLocal(M->getDirectAccessExternalData());
580 }
581 return GV;
582 }
583
584 return SegmentOffset(IRB, Offset, AddressSpace);
585 }
587}
588
590 // MSVC CRT provides functionalities for stack protection.
591 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
593 // MSVC CRT has a global variable holding security cookie.
594 M.getOrInsertGlobal("__security_cookie",
595 PointerType::getUnqual(M.getContext()));
596
597 // MSVC CRT has a function to validate security cookie.
598 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
599 "__security_check_cookie", Type::getVoidTy(M.getContext()),
600 PointerType::getUnqual(M.getContext()));
601 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
602 F->setCallingConv(CallingConv::X86_FastCall);
603 F->addParamAttr(0, Attribute::AttrKind::InReg);
604 }
605 return;
606 }
607
608 StringRef GuardMode = M.getStackProtectorGuard();
609
610 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
611 if ((GuardMode == "tls" || GuardMode.empty()) &&
613 return;
615}
616
618 // MSVC CRT has a global variable holding security cookie.
619 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
621 return M.getGlobalVariable("__security_cookie");
622 }
624}
625
627 // MSVC CRT has a function to validate security cookie.
628 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
630 return M.getFunction("__security_check_cookie");
631 }
633}
634
635Value *
637 // Android provides a fixed TLS slot for the SafeStack pointer. See the
638 // definition of TLS_SLOT_SAFESTACK in
639 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
640 if (Subtarget.isTargetAndroid()) {
641 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
642 // %gs:0x24 on i386
643 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
644 return SegmentOffset(IRB, Offset, getAddressSpace());
645 }
646
647 // Fuchsia is similar.
648 if (Subtarget.isTargetFuchsia()) {
649 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
650 return SegmentOffset(IRB, 0x18, getAddressSpace());
651 }
652
654}
655
656//===----------------------------------------------------------------------===//
657// Return Value Calling Convention Implementation
658//===----------------------------------------------------------------------===//
659
660bool X86TargetLowering::CanLowerReturn(
661 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
662 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
664 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
665 return CCInfo.CheckReturn(Outs, RetCC_X86);
666}
667
668const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
669 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
670 return ScratchRegs;
671}
672
673ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
674 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
675 return RCRegs;
676}
677
678/// Lowers masks values (v*i1) to the local register values
679/// \returns DAG node after lowering to register type
680static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
681 const SDLoc &DL, SelectionDAG &DAG) {
682 EVT ValVT = ValArg.getValueType();
683
684 if (ValVT == MVT::v1i1)
685 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
686 DAG.getIntPtrConstant(0, DL));
687
688 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
689 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
690 // Two stage lowering might be required
691 // bitcast: v8i1 -> i8 / v16i1 -> i16
692 // anyextend: i8 -> i32 / i16 -> i32
693 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
694 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
695 if (ValLoc == MVT::i32)
696 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
697 return ValToCopy;
698 }
699
700 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
701 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
702 // One stage lowering is required
703 // bitcast: v32i1 -> i32 / v64i1 -> i64
704 return DAG.getBitcast(ValLoc, ValArg);
705 }
706
707 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
708}
709
710/// Breaks v64i1 value into two registers and adds the new node to the DAG
712 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
713 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
714 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
715 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
716 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
717 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
718 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
719 "The value should reside in two registers");
720
721 // Before splitting the value we cast it to i64
722 Arg = DAG.getBitcast(MVT::i64, Arg);
723
724 // Splitting the value into two i32 types
725 SDValue Lo, Hi;
726 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
727
728 // Attach the two i32 types into corresponding registers
729 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
730 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
731}
732
734X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
735 bool isVarArg,
737 const SmallVectorImpl<SDValue> &OutVals,
738 const SDLoc &dl, SelectionDAG &DAG) const {
741
742 // In some cases we need to disable registers from the default CSR list.
743 // For example, when they are used as return registers (preserve_* and X86's
744 // regcall) or for argument passing (X86's regcall).
745 bool ShouldDisableCalleeSavedRegister =
746 shouldDisableRetRegFromCSR(CallConv) ||
747 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
748
749 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
750 report_fatal_error("X86 interrupts may not return any value");
751
753 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
754 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
755
757 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
758 ++I, ++OutsIndex) {
759 CCValAssign &VA = RVLocs[I];
760 assert(VA.isRegLoc() && "Can only return in registers!");
761
762 // Add the register to the CalleeSaveDisableRegs list.
763 if (ShouldDisableCalleeSavedRegister)
765
766 SDValue ValToCopy = OutVals[OutsIndex];
767 EVT ValVT = ValToCopy.getValueType();
768
769 // Promote values to the appropriate types.
770 if (VA.getLocInfo() == CCValAssign::SExt)
771 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
772 else if (VA.getLocInfo() == CCValAssign::ZExt)
773 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
774 else if (VA.getLocInfo() == CCValAssign::AExt) {
775 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
776 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
777 else
778 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
779 }
780 else if (VA.getLocInfo() == CCValAssign::BCvt)
781 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
782
784 "Unexpected FP-extend for return value.");
785
786 // Report an error if we have attempted to return a value via an XMM
787 // register and SSE was disabled.
788 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
789 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
790 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
791 } else if (!Subtarget.hasSSE2() &&
792 X86::FR64XRegClass.contains(VA.getLocReg()) &&
793 ValVT == MVT::f64) {
794 // When returning a double via an XMM register, report an error if SSE2 is
795 // not enabled.
796 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
797 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
798 }
799
800 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
801 // the RET instruction and handled by the FP Stackifier.
802 if (VA.getLocReg() == X86::FP0 ||
803 VA.getLocReg() == X86::FP1) {
804 // If this is a copy from an xmm register to ST(0), use an FPExtend to
805 // change the value to the FP stack register class.
807 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
808 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
809 // Don't emit a copytoreg.
810 continue;
811 }
812
813 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
814 // which is returned in RAX / RDX.
815 if (Subtarget.is64Bit()) {
816 if (ValVT == MVT::x86mmx) {
817 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
818 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
819 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
820 ValToCopy);
821 // If we don't have SSE2 available, convert to v4f32 so the generated
822 // register is legal.
823 if (!Subtarget.hasSSE2())
824 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
825 }
826 }
827 }
828
829 if (VA.needsCustom()) {
830 assert(VA.getValVT() == MVT::v64i1 &&
831 "Currently the only custom case is when we split v64i1 to 2 regs");
832
833 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
834 Subtarget);
835
836 // Add the second register to the CalleeSaveDisableRegs list.
837 if (ShouldDisableCalleeSavedRegister)
838 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
839 } else {
840 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
841 }
842 }
843
844 SDValue Glue;
846 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
847 // Operand #1 = Bytes To Pop
848 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
849 MVT::i32));
850
851 // Copy the result values into the output registers.
852 for (auto &RetVal : RetVals) {
853 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
854 RetOps.push_back(RetVal.second);
855 continue; // Don't emit a copytoreg.
856 }
857
858 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
859 Glue = Chain.getValue(1);
860 RetOps.push_back(
861 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
862 }
863
864 // Swift calling convention does not require we copy the sret argument
865 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
866
867 // All x86 ABIs require that for returning structs by value we copy
868 // the sret argument into %rax/%eax (depending on ABI) for the return.
869 // We saved the argument into a virtual register in the entry block,
870 // so now we copy the value out and into %rax/%eax.
871 //
872 // Checking Function.hasStructRetAttr() here is insufficient because the IR
873 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
874 // false, then an sret argument may be implicitly inserted in the SelDAG. In
875 // either case FuncInfo->setSRetReturnReg() will have been called.
876 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
877 // When we have both sret and another return value, we should use the
878 // original Chain stored in RetOps[0], instead of the current Chain updated
879 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
880
881 // For the case of sret and another return value, we have
882 // Chain_0 at the function entry
883 // Chain_1 = getCopyToReg(Chain_0) in the above loop
884 // If we use Chain_1 in getCopyFromReg, we will have
885 // Val = getCopyFromReg(Chain_1)
886 // Chain_2 = getCopyToReg(Chain_1, Val) from below
887
888 // getCopyToReg(Chain_0) will be glued together with
889 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
890 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
891 // Data dependency from Unit B to Unit A due to usage of Val in
892 // getCopyToReg(Chain_1, Val)
893 // Chain dependency from Unit A to Unit B
894
895 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
896 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
898
899 Register RetValReg
900 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
901 X86::RAX : X86::EAX;
902 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
903 Glue = Chain.getValue(1);
904
905 // RAX/EAX now acts like a return value.
906 RetOps.push_back(
907 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
908
909 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
910 // this however for preserve_most/preserve_all to minimize the number of
911 // callee-saved registers for these CCs.
912 if (ShouldDisableCalleeSavedRegister &&
913 CallConv != CallingConv::PreserveAll &&
914 CallConv != CallingConv::PreserveMost)
916 }
917
918 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
919 const MCPhysReg *I =
920 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
921 if (I) {
922 for (; *I; ++I) {
923 if (X86::GR64RegClass.contains(*I))
924 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
925 else
926 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
927 }
928 }
929
930 RetOps[0] = Chain; // Update chain.
931
932 // Add the glue if we have it.
933 if (Glue.getNode())
934 RetOps.push_back(Glue);
935
937 if (CallConv == CallingConv::X86_INTR)
938 opcode = X86ISD::IRET;
939 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
940}
941
942bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
943 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
944 return false;
945
946 SDValue TCChain = Chain;
947 SDNode *Copy = *N->user_begin();
948 if (Copy->getOpcode() == ISD::CopyToReg) {
949 // If the copy has a glue operand, we conservatively assume it isn't safe to
950 // perform a tail call.
951 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
952 return false;
953 TCChain = Copy->getOperand(0);
954 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
955 return false;
956
957 bool HasRet = false;
958 for (const SDNode *U : Copy->users()) {
959 if (U->getOpcode() != X86ISD::RET_GLUE)
960 return false;
961 // If we are returning more than one value, we can definitely
962 // not make a tail call see PR19530
963 if (U->getNumOperands() > 4)
964 return false;
965 if (U->getNumOperands() == 4 &&
966 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
967 return false;
968 HasRet = true;
969 }
970
971 if (!HasRet)
972 return false;
973
974 Chain = TCChain;
975 return true;
976}
977
978EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
979 ISD::NodeType ExtendKind) const {
980 MVT ReturnMVT = MVT::i32;
981
982 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
983 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
984 // The ABI does not require i1, i8 or i16 to be extended.
985 //
986 // On Darwin, there is code in the wild relying on Clang's old behaviour of
987 // always extending i8/i16 return values, so keep doing that for now.
988 // (PR26665).
989 ReturnMVT = MVT::i8;
990 }
991
992 EVT MinVT = getRegisterType(Context, ReturnMVT);
993 return VT.bitsLT(MinVT) ? MinVT : VT;
994}
995
996/// Reads two 32 bit registers and creates a 64 bit mask value.
997/// \param VA The current 32 bit value that need to be assigned.
998/// \param NextVA The next 32 bit value that need to be assigned.
999/// \param Root The parent DAG node.
1000/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1001/// glue purposes. In the case the DAG is already using
1002/// physical register instead of virtual, we should glue
1003/// our new SDValue to InGlue SDvalue.
1004/// \return a new SDvalue of size 64bit.
1006 SDValue &Root, SelectionDAG &DAG,
1007 const SDLoc &DL, const X86Subtarget &Subtarget,
1008 SDValue *InGlue = nullptr) {
1009 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1010 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1011 assert(VA.getValVT() == MVT::v64i1 &&
1012 "Expecting first location of 64 bit width type");
1013 assert(NextVA.getValVT() == VA.getValVT() &&
1014 "The locations should have the same type");
1015 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1016 "The values should reside in two registers");
1017
1018 SDValue Lo, Hi;
1019 SDValue ArgValueLo, ArgValueHi;
1020
1022 const TargetRegisterClass *RC = &X86::GR32RegClass;
1023
1024 // Read a 32 bit value from the registers.
1025 if (nullptr == InGlue) {
1026 // When no physical register is present,
1027 // create an intermediate virtual register.
1028 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1029 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1030 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1031 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1032 } else {
1033 // When a physical register is available read the value from it and glue
1034 // the reads together.
1035 ArgValueLo =
1036 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1037 *InGlue = ArgValueLo.getValue(2);
1038 ArgValueHi =
1039 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1040 *InGlue = ArgValueHi.getValue(2);
1041 }
1042
1043 // Convert the i32 type into v32i1 type.
1044 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1045
1046 // Convert the i32 type into v32i1 type.
1047 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1048
1049 // Concatenate the two values together.
1050 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1051}
1052
1053/// The function will lower a register of various sizes (8/16/32/64)
1054/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1055/// \returns a DAG node contains the operand after lowering to mask type.
1056static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1057 const EVT &ValLoc, const SDLoc &DL,
1058 SelectionDAG &DAG) {
1059 SDValue ValReturned = ValArg;
1060
1061 if (ValVT == MVT::v1i1)
1062 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1063
1064 if (ValVT == MVT::v64i1) {
1065 // In 32 bit machine, this case is handled by getv64i1Argument
1066 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1067 // In 64 bit machine, There is no need to truncate the value only bitcast
1068 } else {
1069 MVT MaskLenVT;
1070 switch (ValVT.getSimpleVT().SimpleTy) {
1071 case MVT::v8i1:
1072 MaskLenVT = MVT::i8;
1073 break;
1074 case MVT::v16i1:
1075 MaskLenVT = MVT::i16;
1076 break;
1077 case MVT::v32i1:
1078 MaskLenVT = MVT::i32;
1079 break;
1080 default:
1081 llvm_unreachable("Expecting a vector of i1 types");
1082 }
1083
1084 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1085 }
1086 return DAG.getBitcast(ValVT, ValReturned);
1087}
1088
1089/// Lower the result values of a call into the
1090/// appropriate copies out of appropriate physical registers.
1091///
1092SDValue X86TargetLowering::LowerCallResult(
1093 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1094 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1096 uint32_t *RegMask) const {
1097
1098 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1099 // Assign locations to each value returned by this call.
1101 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1102 *DAG.getContext());
1103 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1104
1105 // Copy all of the result registers out of their specified physreg.
1106 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1107 ++I, ++InsIndex) {
1108 CCValAssign &VA = RVLocs[I];
1109 EVT CopyVT = VA.getLocVT();
1110
1111 // In some calling conventions we need to remove the used registers
1112 // from the register mask.
1113 if (RegMask) {
1114 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1115 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1116 }
1117
1118 // Report an error if there was an attempt to return FP values via XMM
1119 // registers.
1120 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1121 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1122 if (VA.getLocReg() == X86::XMM1)
1123 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1124 else
1125 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1126 } else if (!Subtarget.hasSSE2() &&
1127 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1128 CopyVT == MVT::f64) {
1129 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1130 if (VA.getLocReg() == X86::XMM1)
1131 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1132 else
1133 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1134 }
1135
1136 // If we prefer to use the value in xmm registers, copy it out as f80 and
1137 // use a truncate to move it from fp stack reg to xmm reg.
1138 bool RoundAfterCopy = false;
1139 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1141 if (!Subtarget.hasX87())
1142 report_fatal_error("X87 register return with X87 disabled");
1143 CopyVT = MVT::f80;
1144 RoundAfterCopy = (CopyVT != VA.getLocVT());
1145 }
1146
1147 SDValue Val;
1148 if (VA.needsCustom()) {
1149 assert(VA.getValVT() == MVT::v64i1 &&
1150 "Currently the only custom case is when we split v64i1 to 2 regs");
1151 Val =
1152 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1153 } else {
1154 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1155 .getValue(1);
1156 Val = Chain.getValue(0);
1157 InGlue = Chain.getValue(2);
1158 }
1159
1160 if (RoundAfterCopy)
1161 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1162 // This truncation won't change the value.
1163 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1164
1165 if (VA.isExtInLoc()) {
1166 if (VA.getValVT().isVector() &&
1167 VA.getValVT().getScalarType() == MVT::i1 &&
1168 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1169 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1170 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1171 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1172 } else
1173 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1174 }
1175
1176 if (VA.getLocInfo() == CCValAssign::BCvt)
1177 Val = DAG.getBitcast(VA.getValVT(), Val);
1178
1179 InVals.push_back(Val);
1180 }
1181
1182 return Chain;
1183}
1184
1185//===----------------------------------------------------------------------===//
1186// C & StdCall & Fast Calling Convention implementation
1187//===----------------------------------------------------------------------===//
1188// StdCall calling convention seems to be standard for many Windows' API
1189// routines and around. It differs from C calling convention just a little:
1190// callee should clean up the stack, not caller. Symbols should be also
1191// decorated in some fancy way :) It doesn't support any vector arguments.
1192// For info on fast calling convention see Fast Calling Convention (tail call)
1193// implementation LowerX86_32FastCCCallTo.
1194
1195/// Determines whether Args, either a set of outgoing arguments to a call, or a
1196/// set of incoming args of a call, contains an sret pointer that the callee
1197/// pops
1198template <typename T>
1199static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1200 const X86Subtarget &Subtarget) {
1201 // Not C++20 (yet), so no concepts available.
1202 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1203 std::is_same_v<T, ISD::InputArg>,
1204 "requires ISD::OutputArg or ISD::InputArg");
1205
1206 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1207 // for most compilations.
1208 if (!Subtarget.is32Bit())
1209 return false;
1210
1211 if (Args.empty())
1212 return false;
1213
1214 // Most calls do not have an sret argument, check the arg next.
1215 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1216 if (!Flags.isSRet() || Flags.isInReg())
1217 return false;
1218
1219 // The MSVCabi does not pop the sret.
1220 if (Subtarget.getTargetTriple().isOSMSVCRT())
1221 return false;
1222
1223 // MCUs don't pop the sret
1224 if (Subtarget.isTargetMCU())
1225 return false;
1226
1227 // Callee pops argument
1228 return true;
1229}
1230
1231/// Make a copy of an aggregate at address specified by "Src" to address
1232/// "Dst" with size and alignment information specified by the specific
1233/// parameter attribute. The copy will be passed as a byval function parameter.
1235 SDValue Chain, ISD::ArgFlagsTy Flags,
1236 SelectionDAG &DAG, const SDLoc &dl) {
1237 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1238
1239 return DAG.getMemcpy(
1240 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1241 /*isVolatile*/ false, /*AlwaysInline=*/true,
1242 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1243}
1244
1245/// Return true if the calling convention is one that we can guarantee TCO for.
1247 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1250}
1251
1252/// Return true if we might ever do TCO for calls with this calling convention.
1254 switch (CC) {
1255 // C calling conventions:
1256 case CallingConv::C:
1257 case CallingConv::Win64:
1260 // Callee pop conventions:
1265 // Swift:
1266 case CallingConv::Swift:
1267 return true;
1268 default:
1269 return canGuaranteeTCO(CC);
1270 }
1271}
1272
1273/// Return true if the function is being made into a tailcall target by
1274/// changing its ABI.
1275static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1276 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1278}
1279
1280bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1281 if (!CI->isTailCall())
1282 return false;
1283
1284 CallingConv::ID CalleeCC = CI->getCallingConv();
1285 if (!mayTailCallThisCC(CalleeCC))
1286 return false;
1287
1288 return true;
1289}
1290
1291SDValue
1292X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1294 const SDLoc &dl, SelectionDAG &DAG,
1295 const CCValAssign &VA,
1296 MachineFrameInfo &MFI, unsigned i) const {
1297 // Create the nodes corresponding to a load from this parameter slot.
1298 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1299 bool AlwaysUseMutable = shouldGuaranteeTCO(
1300 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1301 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1302 EVT ValVT;
1303 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1304
1305 // If value is passed by pointer we have address passed instead of the value
1306 // itself. No need to extend if the mask value and location share the same
1307 // absolute size.
1308 bool ExtendedInMem =
1309 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1311
1312 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1313 ValVT = VA.getLocVT();
1314 else
1315 ValVT = VA.getValVT();
1316
1317 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1318 // changed with more analysis.
1319 // In case of tail call optimization mark all arguments mutable. Since they
1320 // could be overwritten by lowering of arguments in case of a tail call.
1321 if (Flags.isByVal()) {
1322 unsigned Bytes = Flags.getByValSize();
1323 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1324
1325 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1326 // can be improved with deeper analysis.
1327 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1328 /*isAliased=*/true);
1329 return DAG.getFrameIndex(FI, PtrVT);
1330 }
1331
1332 EVT ArgVT = Ins[i].ArgVT;
1333
1334 // If this is a vector that has been split into multiple parts, don't elide
1335 // the copy. The layout on the stack may not match the packed in-memory
1336 // layout.
1337 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1338
1339 // This is an argument in memory. We might be able to perform copy elision.
1340 // If the argument is passed directly in memory without any extension, then we
1341 // can perform copy elision. Large vector types, for example, may be passed
1342 // indirectly by pointer.
1343 if (Flags.isCopyElisionCandidate() &&
1344 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1345 !ScalarizedVector) {
1346 SDValue PartAddr;
1347 if (Ins[i].PartOffset == 0) {
1348 // If this is a one-part value or the first part of a multi-part value,
1349 // create a stack object for the entire argument value type and return a
1350 // load from our portion of it. This assumes that if the first part of an
1351 // argument is in memory, the rest will also be in memory.
1352 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1353 /*IsImmutable=*/false);
1354 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1355 return DAG.getLoad(
1356 ValVT, dl, Chain, PartAddr,
1358 }
1359
1360 // This is not the first piece of an argument in memory. See if there is
1361 // already a fixed stack object including this offset. If so, assume it
1362 // was created by the PartOffset == 0 branch above and create a load from
1363 // the appropriate offset into it.
1364 int64_t PartBegin = VA.getLocMemOffset();
1365 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1366 int FI = MFI.getObjectIndexBegin();
1367 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1368 int64_t ObjBegin = MFI.getObjectOffset(FI);
1369 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1370 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1371 break;
1372 }
1373 if (MFI.isFixedObjectIndex(FI)) {
1374 SDValue Addr =
1375 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1376 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1377 return DAG.getLoad(ValVT, dl, Chain, Addr,
1379 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1380 }
1381 }
1382
1383 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1384 VA.getLocMemOffset(), isImmutable);
1385
1386 // Set SExt or ZExt flag.
1387 if (VA.getLocInfo() == CCValAssign::ZExt) {
1388 MFI.setObjectZExt(FI, true);
1389 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1390 MFI.setObjectSExt(FI, true);
1391 }
1392
1393 MaybeAlign Alignment;
1394 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1395 ValVT != MVT::f80)
1396 Alignment = MaybeAlign(4);
1397 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1398 SDValue Val = DAG.getLoad(
1399 ValVT, dl, Chain, FIN,
1401 Alignment);
1402 return ExtendedInMem
1403 ? (VA.getValVT().isVector()
1404 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1405 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1406 : Val;
1407}
1408
1409// FIXME: Get this from tablegen.
1411 const X86Subtarget &Subtarget) {
1412 assert(Subtarget.is64Bit());
1413
1414 if (Subtarget.isCallingConvWin64(CallConv)) {
1415 static const MCPhysReg GPR64ArgRegsWin64[] = {
1416 X86::RCX, X86::RDX, X86::R8, X86::R9
1417 };
1418 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1419 }
1420
1421 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1422 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1423 };
1424 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1425}
1426
1427// FIXME: Get this from tablegen.
1429 CallingConv::ID CallConv,
1430 const X86Subtarget &Subtarget) {
1431 assert(Subtarget.is64Bit());
1432 if (Subtarget.isCallingConvWin64(CallConv)) {
1433 // The XMM registers which might contain var arg parameters are shadowed
1434 // in their paired GPR. So we only need to save the GPR to their home
1435 // slots.
1436 // TODO: __vectorcall will change this.
1437 return {};
1438 }
1439
1440 bool isSoftFloat = Subtarget.useSoftFloat();
1441 if (isSoftFloat || !Subtarget.hasSSE1())
1442 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1443 // registers.
1444 return {};
1445
1446 static const MCPhysReg XMMArgRegs64Bit[] = {
1447 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1448 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1449 };
1450 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1451}
1452
1453#ifndef NDEBUG
1455 return llvm::is_sorted(
1456 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1457 return A.getValNo() < B.getValNo();
1458 });
1459}
1460#endif
1461
1462namespace {
1463/// This is a helper class for lowering variable arguments parameters.
1464class VarArgsLoweringHelper {
1465public:
1466 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1467 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1468 CallingConv::ID CallConv, CCState &CCInfo)
1469 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1470 TheMachineFunction(DAG.getMachineFunction()),
1471 TheFunction(TheMachineFunction.getFunction()),
1472 FrameInfo(TheMachineFunction.getFrameInfo()),
1473 FrameLowering(*Subtarget.getFrameLowering()),
1474 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1475 CCInfo(CCInfo) {}
1476
1477 // Lower variable arguments parameters.
1478 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1479
1480private:
1481 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1482
1483 void forwardMustTailParameters(SDValue &Chain);
1484
1485 bool is64Bit() const { return Subtarget.is64Bit(); }
1486 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1487
1488 X86MachineFunctionInfo *FuncInfo;
1489 const SDLoc &DL;
1490 SelectionDAG &DAG;
1491 const X86Subtarget &Subtarget;
1492 MachineFunction &TheMachineFunction;
1493 const Function &TheFunction;
1494 MachineFrameInfo &FrameInfo;
1495 const TargetFrameLowering &FrameLowering;
1496 const TargetLowering &TargLowering;
1497 CallingConv::ID CallConv;
1498 CCState &CCInfo;
1499};
1500} // namespace
1501
1502void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1503 SDValue &Chain, unsigned StackSize) {
1504 // If the function takes variable number of arguments, make a frame index for
1505 // the start of the first vararg value... for expansion of llvm.va_start. We
1506 // can skip this if there are no va_start calls.
1507 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1508 CallConv != CallingConv::X86_ThisCall)) {
1509 FuncInfo->setVarArgsFrameIndex(
1510 FrameInfo.CreateFixedObject(1, StackSize, true));
1511 }
1512
1513 // 64-bit calling conventions support varargs and register parameters, so we
1514 // have to do extra work to spill them in the prologue.
1515 if (is64Bit()) {
1516 // Find the first unallocated argument registers.
1517 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1518 ArrayRef<MCPhysReg> ArgXMMs =
1519 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1520 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1521 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1522
1523 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1524 "SSE register cannot be used when SSE is disabled!");
1525
1526 if (isWin64()) {
1527 // Get to the caller-allocated home save location. Add 8 to account
1528 // for the return address.
1529 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1530 FuncInfo->setRegSaveFrameIndex(
1531 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1532 // Fixup to set vararg frame on shadow area (4 x i64).
1533 if (NumIntRegs < 4)
1534 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1535 } else {
1536 // For X86-64, if there are vararg parameters that are passed via
1537 // registers, then we must store them to their spots on the stack so
1538 // they may be loaded by dereferencing the result of va_next.
1539 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1540 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1541 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1542 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1543 }
1544
1546 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1547 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1548 // keeping live input value
1549 SDValue ALVal; // if applicable keeps SDValue for %al register
1550
1551 // Gather all the live in physical registers.
1552 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1553 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1554 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1555 }
1556 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1557 if (!AvailableXmms.empty()) {
1558 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1559 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1560 for (MCPhysReg Reg : AvailableXmms) {
1561 // FastRegisterAllocator spills virtual registers at basic
1562 // block boundary. That leads to usages of xmm registers
1563 // outside of check for %al. Pass physical registers to
1564 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1565 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1566 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1567 }
1568 }
1569
1570 // Store the integer parameter registers.
1572 SDValue RSFIN =
1573 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1574 TargLowering.getPointerTy(DAG.getDataLayout()));
1575 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1576 for (SDValue Val : LiveGPRs) {
1577 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1578 TargLowering.getPointerTy(DAG.getDataLayout()),
1579 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1580 SDValue Store =
1581 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1583 DAG.getMachineFunction(),
1584 FuncInfo->getRegSaveFrameIndex(), Offset));
1585 MemOps.push_back(Store);
1586 Offset += 8;
1587 }
1588
1589 // Now store the XMM (fp + vector) parameter registers.
1590 if (!LiveXMMRegs.empty()) {
1591 SmallVector<SDValue, 12> SaveXMMOps;
1592 SaveXMMOps.push_back(Chain);
1593 SaveXMMOps.push_back(ALVal);
1594 SaveXMMOps.push_back(RSFIN);
1595 SaveXMMOps.push_back(
1596 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1597 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1598 MachineMemOperand *StoreMMO =
1601 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1602 Offset),
1605 DL, DAG.getVTList(MVT::Other),
1606 SaveXMMOps, MVT::i8, StoreMMO));
1607 }
1608
1609 if (!MemOps.empty())
1610 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1611 }
1612}
1613
1614void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1615 // Find the largest legal vector type.
1616 MVT VecVT = MVT::Other;
1617 // FIXME: Only some x86_32 calling conventions support AVX512.
1618 if (Subtarget.useAVX512Regs() &&
1619 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1620 CallConv == CallingConv::Intel_OCL_BI)))
1621 VecVT = MVT::v16f32;
1622 else if (Subtarget.hasAVX())
1623 VecVT = MVT::v8f32;
1624 else if (Subtarget.hasSSE2())
1625 VecVT = MVT::v4f32;
1626
1627 // We forward some GPRs and some vector types.
1628 SmallVector<MVT, 2> RegParmTypes;
1629 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1630 RegParmTypes.push_back(IntVT);
1631 if (VecVT != MVT::Other)
1632 RegParmTypes.push_back(VecVT);
1633
1634 // Compute the set of forwarded registers. The rest are scratch.
1636 FuncInfo->getForwardedMustTailRegParms();
1637 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1638
1639 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1640 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1641 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1642 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1643 }
1644
1645 // Copy all forwards from physical to virtual registers.
1646 for (ForwardedRegister &FR : Forwards) {
1647 // FIXME: Can we use a less constrained schedule?
1648 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1649 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1650 TargLowering.getRegClassFor(FR.VT));
1651 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1652 }
1653}
1654
1655void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1656 unsigned StackSize) {
1657 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1658 // If necessary, it would be set into the correct value later.
1659 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1660 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1661
1662 if (FrameInfo.hasVAStart())
1663 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1664
1665 if (FrameInfo.hasMustTailInVarArgFunc())
1666 forwardMustTailParameters(Chain);
1667}
1668
1669SDValue X86TargetLowering::LowerFormalArguments(
1670 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1671 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1672 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1675
1676 const Function &F = MF.getFunction();
1677 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1678 F.getName() == "main")
1679 FuncInfo->setForceFramePointer(true);
1680
1681 MachineFrameInfo &MFI = MF.getFrameInfo();
1682 bool Is64Bit = Subtarget.is64Bit();
1683 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1684
1685 assert(
1686 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1687 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1688
1689 // Assign locations to all of the incoming arguments.
1691 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1692
1693 // Allocate shadow area for Win64.
1694 if (IsWin64)
1695 CCInfo.AllocateStack(32, Align(8));
1696
1697 CCInfo.AnalyzeArguments(Ins, CC_X86);
1698
1699 // In vectorcall calling convention a second pass is required for the HVA
1700 // types.
1701 if (CallingConv::X86_VectorCall == CallConv) {
1702 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1703 }
1704
1705 // The next loop assumes that the locations are in the same order of the
1706 // input arguments.
1707 assert(isSortedByValueNo(ArgLocs) &&
1708 "Argument Location list must be sorted before lowering");
1709
1710 SDValue ArgValue;
1711 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1712 ++I, ++InsIndex) {
1713 assert(InsIndex < Ins.size() && "Invalid Ins index");
1714 CCValAssign &VA = ArgLocs[I];
1715
1716 if (VA.isRegLoc()) {
1717 EVT RegVT = VA.getLocVT();
1718 if (VA.needsCustom()) {
1719 assert(
1720 VA.getValVT() == MVT::v64i1 &&
1721 "Currently the only custom case is when we split v64i1 to 2 regs");
1722
1723 // v64i1 values, in regcall calling convention, that are
1724 // compiled to 32 bit arch, are split up into two registers.
1725 ArgValue =
1726 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1727 } else {
1728 const TargetRegisterClass *RC;
1729 if (RegVT == MVT::i8)
1730 RC = &X86::GR8RegClass;
1731 else if (RegVT == MVT::i16)
1732 RC = &X86::GR16RegClass;
1733 else if (RegVT == MVT::i32)
1734 RC = &X86::GR32RegClass;
1735 else if (Is64Bit && RegVT == MVT::i64)
1736 RC = &X86::GR64RegClass;
1737 else if (RegVT == MVT::f16)
1738 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1739 else if (RegVT == MVT::f32)
1740 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1741 else if (RegVT == MVT::f64)
1742 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1743 else if (RegVT == MVT::f80)
1744 RC = &X86::RFP80RegClass;
1745 else if (RegVT == MVT::f128)
1746 RC = &X86::VR128RegClass;
1747 else if (RegVT.is512BitVector())
1748 RC = &X86::VR512RegClass;
1749 else if (RegVT.is256BitVector())
1750 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1751 else if (RegVT.is128BitVector())
1752 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1753 else if (RegVT == MVT::x86mmx)
1754 RC = &X86::VR64RegClass;
1755 else if (RegVT == MVT::v1i1)
1756 RC = &X86::VK1RegClass;
1757 else if (RegVT == MVT::v8i1)
1758 RC = &X86::VK8RegClass;
1759 else if (RegVT == MVT::v16i1)
1760 RC = &X86::VK16RegClass;
1761 else if (RegVT == MVT::v32i1)
1762 RC = &X86::VK32RegClass;
1763 else if (RegVT == MVT::v64i1)
1764 RC = &X86::VK64RegClass;
1765 else
1766 llvm_unreachable("Unknown argument type!");
1767
1768 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1769 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1770 }
1771
1772 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1773 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1774 // right size.
1775 if (VA.getLocInfo() == CCValAssign::SExt)
1776 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1777 DAG.getValueType(VA.getValVT()));
1778 else if (VA.getLocInfo() == CCValAssign::ZExt)
1779 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1780 DAG.getValueType(VA.getValVT()));
1781 else if (VA.getLocInfo() == CCValAssign::BCvt)
1782 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1783
1784 if (VA.isExtInLoc()) {
1785 // Handle MMX values passed in XMM regs.
1786 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1787 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1788 else if (VA.getValVT().isVector() &&
1789 VA.getValVT().getScalarType() == MVT::i1 &&
1790 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1791 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1792 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1793 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1794 } else
1795 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1796 }
1797 } else {
1798 assert(VA.isMemLoc());
1799 ArgValue =
1800 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1801 }
1802
1803 // If value is passed via pointer - do a load.
1804 if (VA.getLocInfo() == CCValAssign::Indirect &&
1805 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1806 ArgValue =
1807 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1808 }
1809
1810 InVals.push_back(ArgValue);
1811 }
1812
1813 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1814 if (Ins[I].Flags.isSwiftAsync()) {
1815 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1816 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1817 X86FI->setHasSwiftAsyncContext(true);
1818 else {
1819 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1820 int FI =
1821 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1822 X86FI->setSwiftAsyncContextFrameIdx(FI);
1823 SDValue St = DAG.getStore(
1824 DAG.getEntryNode(), dl, InVals[I],
1825 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1827 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1828 }
1829 }
1830
1831 // Swift calling convention does not require we copy the sret argument
1832 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1833 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1834 continue;
1835
1836 // All x86 ABIs require that for returning structs by value we copy the
1837 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1838 // the argument into a virtual register so that we can access it from the
1839 // return points.
1840 if (Ins[I].Flags.isSRet()) {
1841 assert(!FuncInfo->getSRetReturnReg() &&
1842 "SRet return has already been set");
1843 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1844 Register Reg =
1846 FuncInfo->setSRetReturnReg(Reg);
1847 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1848 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1849 break;
1850 }
1851 }
1852
1853 unsigned StackSize = CCInfo.getStackSize();
1854 // Align stack specially for tail calls.
1855 if (shouldGuaranteeTCO(CallConv,
1857 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1858
1859 if (IsVarArg)
1860 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1861 .lowerVarArgsParameters(Chain, StackSize);
1862
1863 // Some CCs need callee pop.
1864 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1866 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1867 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1868 // X86 interrupts must pop the error code (and the alignment padding) if
1869 // present.
1870 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1871 } else {
1872 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1873 // If this is an sret function, the return should pop the hidden pointer.
1874 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1875 FuncInfo->setBytesToPopOnReturn(4);
1876 }
1877
1878 if (!Is64Bit) {
1879 // RegSaveFrameIndex is X86-64 only.
1880 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1881 }
1882
1883 FuncInfo->setArgumentStackSize(StackSize);
1884
1885 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1886 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1887 if (Personality == EHPersonality::CoreCLR) {
1888 assert(Is64Bit);
1889 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1890 // that we'd prefer this slot be allocated towards the bottom of the frame
1891 // (i.e. near the stack pointer after allocating the frame). Every
1892 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1893 // offset from the bottom of this and each funclet's frame must be the
1894 // same, so the size of funclets' (mostly empty) frames is dictated by
1895 // how far this slot is from the bottom (since they allocate just enough
1896 // space to accommodate holding this slot at the correct offset).
1897 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1898 EHInfo->PSPSymFrameIdx = PSPSymFI;
1899 }
1900 }
1901
1902 if (shouldDisableArgRegFromCSR(CallConv) ||
1903 F.hasFnAttribute("no_caller_saved_registers")) {
1905 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1906 MRI.disableCalleeSavedRegister(Pair.first);
1907 }
1908
1909 if (CallingConv::PreserveNone == CallConv)
1910 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1911 if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1912 Ins[I].Flags.isSwiftError()) {
1913 errorUnsupported(DAG, dl,
1914 "Swift attributes can't be used with preserve_none");
1915 break;
1916 }
1917 }
1918
1919 return Chain;
1920}
1921
1922SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1923 SDValue Arg, const SDLoc &dl,
1924 SelectionDAG &DAG,
1925 const CCValAssign &VA,
1926 ISD::ArgFlagsTy Flags,
1927 bool isByVal) const {
1928 unsigned LocMemOffset = VA.getLocMemOffset();
1929 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1930 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1931 StackPtr, PtrOff);
1932 if (isByVal)
1933 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1934
1935 MaybeAlign Alignment;
1936 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1937 Arg.getSimpleValueType() != MVT::f80)
1938 Alignment = MaybeAlign(4);
1939 return DAG.getStore(
1940 Chain, dl, Arg, PtrOff,
1942 Alignment);
1943}
1944
1945/// Emit a load of return address if tail call
1946/// optimization is performed and it is required.
1947SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1948 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1949 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1950 // Adjust the Return address stack slot.
1951 EVT VT = getPointerTy(DAG.getDataLayout());
1952 OutRetAddr = getReturnAddressFrameIndex(DAG);
1953
1954 // Load the "old" Return address.
1955 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1956 return SDValue(OutRetAddr.getNode(), 1);
1957}
1958
1959/// Emit a store of the return address if tail call
1960/// optimization is performed and it is required (FPDiff!=0).
1962 SDValue Chain, SDValue RetAddrFrIdx,
1963 EVT PtrVT, unsigned SlotSize,
1964 int FPDiff, const SDLoc &dl) {
1965 // Store the return address to the appropriate stack slot.
1966 if (!FPDiff) return Chain;
1967 // Calculate the new stack slot for the return address.
1968 int NewReturnAddrFI =
1969 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1970 false);
1971 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1972 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1974 DAG.getMachineFunction(), NewReturnAddrFI));
1975 return Chain;
1976}
1977
1978/// Returns a vector_shuffle mask for an movs{s|d}, movd
1979/// operation of specified width.
1980SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1981 SDValue V1, SDValue V2) const {
1982 unsigned NumElems = VT.getVectorNumElements();
1984 Mask.push_back(NumElems);
1985 for (unsigned i = 1; i != NumElems; ++i)
1986 Mask.push_back(i);
1987 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
1988}
1989
1990SDValue
1991X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1992 SmallVectorImpl<SDValue> &InVals) const {
1993 SelectionDAG &DAG = CLI.DAG;
1994 SDLoc &dl = CLI.DL;
1996 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1998 SDValue Chain = CLI.Chain;
1999 SDValue Callee = CLI.Callee;
2000 CallingConv::ID CallConv = CLI.CallConv;
2001 bool &isTailCall = CLI.IsTailCall;
2002 bool isVarArg = CLI.IsVarArg;
2003 const auto *CB = CLI.CB;
2004
2006 bool Is64Bit = Subtarget.is64Bit();
2007 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2008 bool IsSibcall = false;
2009 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2010 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2011 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2013 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2014 CB->hasFnAttr("no_caller_saved_registers"));
2015 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2016 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2017 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2018 const Module *M = MF.getFunction().getParent();
2019 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2020
2022 if (CallConv == CallingConv::X86_INTR)
2023 report_fatal_error("X86 interrupts may not be called directly");
2024
2025 // Analyze operands of the call, assigning locations to each operand.
2027 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2028
2029 // Allocate shadow area for Win64.
2030 if (IsWin64)
2031 CCInfo.AllocateStack(32, Align(8));
2032
2033 CCInfo.AnalyzeArguments(Outs, CC_X86);
2034
2035 // In vectorcall calling convention a second pass is required for the HVA
2036 // types.
2037 if (CallingConv::X86_VectorCall == CallConv) {
2038 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2039 }
2040
2041 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2042 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2043 // If we are using a GOT, disable tail calls to external symbols with
2044 // default visibility. Tail calling such a symbol requires using a GOT
2045 // relocation, which forces early binding of the symbol. This breaks code
2046 // that require lazy function symbol resolution. Using musttail or
2047 // GuaranteedTailCallOpt will override this.
2048 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2049 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2050 G->getGlobal()->hasDefaultVisibility()))
2051 isTailCall = false;
2052 }
2053
2054 if (isTailCall && !IsMustTail) {
2055 // Check if it's really possible to do a tail call.
2056 isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2057 IsCalleePopSRet);
2058
2059 // Sibcalls are automatically detected tailcalls which do not require
2060 // ABI changes.
2061 if (!IsGuaranteeTCO && isTailCall)
2062 IsSibcall = true;
2063
2064 if (isTailCall)
2065 ++NumTailCalls;
2066 }
2067
2068 if (IsMustTail && !isTailCall)
2069 report_fatal_error("failed to perform tail call elimination on a call "
2070 "site marked musttail");
2071
2072 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2073 "Var args not supported with calling convention fastcc, ghc or hipe");
2074
2075 // Get a count of how many bytes are to be pushed on the stack.
2076 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2077 if (IsSibcall)
2078 // This is a sibcall. The memory operands are available in caller's
2079 // own caller's stack.
2080 NumBytes = 0;
2081 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2082 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2083
2084 int FPDiff = 0;
2085 if (isTailCall &&
2086 shouldGuaranteeTCO(CallConv,
2088 // Lower arguments at fp - stackoffset + fpdiff.
2089 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2090
2091 FPDiff = NumBytesCallerPushed - NumBytes;
2092
2093 // Set the delta of movement of the returnaddr stackslot.
2094 // But only set if delta is greater than previous delta.
2095 if (FPDiff < X86Info->getTCReturnAddrDelta())
2096 X86Info->setTCReturnAddrDelta(FPDiff);
2097 }
2098
2099 unsigned NumBytesToPush = NumBytes;
2100 unsigned NumBytesToPop = NumBytes;
2101
2102 // If we have an inalloca argument, all stack space has already been allocated
2103 // for us and be right at the top of the stack. We don't support multiple
2104 // arguments passed in memory when using inalloca.
2105 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2106 NumBytesToPush = 0;
2107 if (!ArgLocs.back().isMemLoc())
2108 report_fatal_error("cannot use inalloca attribute on a register "
2109 "parameter");
2110 if (ArgLocs.back().getLocMemOffset() != 0)
2111 report_fatal_error("any parameter with the inalloca attribute must be "
2112 "the only memory argument");
2113 } else if (CLI.IsPreallocated) {
2114 assert(ArgLocs.back().isMemLoc() &&
2115 "cannot use preallocated attribute on a register "
2116 "parameter");
2117 SmallVector<size_t, 4> PreallocatedOffsets;
2118 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2119 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2120 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2121 }
2122 }
2124 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2125 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2126 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2127 NumBytesToPush = 0;
2128 }
2129
2130 if (!IsSibcall && !IsMustTail)
2131 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2132 NumBytes - NumBytesToPush, dl);
2133
2134 SDValue RetAddrFrIdx;
2135 // Load return address for tail calls.
2136 if (isTailCall && FPDiff)
2137 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2138 Is64Bit, FPDiff, dl);
2139
2141 SmallVector<SDValue, 8> MemOpChains;
2143
2144 // The next loop assumes that the locations are in the same order of the
2145 // input arguments.
2146 assert(isSortedByValueNo(ArgLocs) &&
2147 "Argument Location list must be sorted before lowering");
2148
2149 // Walk the register/memloc assignments, inserting copies/loads. In the case
2150 // of tail call optimization arguments are handle later.
2151 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2152 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2153 ++I, ++OutIndex) {
2154 assert(OutIndex < Outs.size() && "Invalid Out index");
2155 // Skip inalloca/preallocated arguments, they have already been written.
2156 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2157 if (Flags.isInAlloca() || Flags.isPreallocated())
2158 continue;
2159
2160 CCValAssign &VA = ArgLocs[I];
2161 EVT RegVT = VA.getLocVT();
2162 SDValue Arg = OutVals[OutIndex];
2163 bool isByVal = Flags.isByVal();
2164
2165 // Promote the value if needed.
2166 switch (VA.getLocInfo()) {
2167 default: llvm_unreachable("Unknown loc info!");
2168 case CCValAssign::Full: break;
2169 case CCValAssign::SExt:
2170 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2171 break;
2172 case CCValAssign::ZExt:
2173 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2174 break;
2175 case CCValAssign::AExt:
2176 if (Arg.getValueType().isVector() &&
2177 Arg.getValueType().getVectorElementType() == MVT::i1)
2178 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2179 else if (RegVT.is128BitVector()) {
2180 // Special case: passing MMX values in XMM registers.
2181 Arg = DAG.getBitcast(MVT::i64, Arg);
2182 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2183 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2184 } else
2185 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2186 break;
2187 case CCValAssign::BCvt:
2188 Arg = DAG.getBitcast(RegVT, Arg);
2189 break;
2190 case CCValAssign::Indirect: {
2191 if (isByVal) {
2192 // Memcpy the argument to a temporary stack slot to prevent
2193 // the caller from seeing any modifications the callee may make
2194 // as guaranteed by the `byval` attribute.
2195 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2196 Flags.getByValSize(),
2197 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2198 SDValue StackSlot =
2199 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2200 Chain =
2201 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2202 // From now on treat this as a regular pointer
2203 Arg = StackSlot;
2204 isByVal = false;
2205 } else {
2206 // Store the argument.
2207 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2208 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2209 Chain = DAG.getStore(
2210 Chain, dl, Arg, SpillSlot,
2212 Arg = SpillSlot;
2213 }
2214 break;
2215 }
2216 }
2217
2218 if (VA.needsCustom()) {
2219 assert(VA.getValVT() == MVT::v64i1 &&
2220 "Currently the only custom case is when we split v64i1 to 2 regs");
2221 // Split v64i1 value into two registers
2222 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2223 } else if (VA.isRegLoc()) {
2224 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2225 const TargetOptions &Options = DAG.getTarget().Options;
2226 if (Options.EmitCallSiteInfo)
2227 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2228 if (isVarArg && IsWin64) {
2229 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2230 // shadow reg if callee is a varargs function.
2231 Register ShadowReg;
2232 switch (VA.getLocReg()) {
2233 case X86::XMM0: ShadowReg = X86::RCX; break;
2234 case X86::XMM1: ShadowReg = X86::RDX; break;
2235 case X86::XMM2: ShadowReg = X86::R8; break;
2236 case X86::XMM3: ShadowReg = X86::R9; break;
2237 }
2238 if (ShadowReg)
2239 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2240 }
2241 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2242 assert(VA.isMemLoc());
2243 if (!StackPtr.getNode())
2244 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2246 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2247 dl, DAG, VA, Flags, isByVal));
2248 }
2249 }
2250
2251 if (!MemOpChains.empty())
2252 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2253
2254 if (Subtarget.isPICStyleGOT()) {
2255 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2256 // GOT pointer (except regcall).
2257 if (!isTailCall) {
2258 // Indirect call with RegCall calling convertion may use up all the
2259 // general registers, so it is not suitable to bind EBX reister for
2260 // GOT address, just let register allocator handle it.
2261 if (CallConv != CallingConv::X86_RegCall)
2262 RegsToPass.push_back(std::make_pair(
2263 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2264 getPointerTy(DAG.getDataLayout()))));
2265 } else {
2266 // If we are tail calling and generating PIC/GOT style code load the
2267 // address of the callee into ECX. The value in ecx is used as target of
2268 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2269 // for tail calls on PIC/GOT architectures. Normally we would just put the
2270 // address of GOT into ebx and then call target@PLT. But for tail calls
2271 // ebx would be restored (since ebx is callee saved) before jumping to the
2272 // target@PLT.
2273
2274 // Note: The actual moving to ECX is done further down.
2275 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2276 if (G && !G->getGlobal()->hasLocalLinkage() &&
2277 G->getGlobal()->hasDefaultVisibility())
2278 Callee = LowerGlobalAddress(Callee, DAG);
2279 else if (isa<ExternalSymbolSDNode>(Callee))
2280 Callee = LowerExternalSymbol(Callee, DAG);
2281 }
2282 }
2283
2284 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2285 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2286 // From AMD64 ABI document:
2287 // For calls that may call functions that use varargs or stdargs
2288 // (prototype-less calls or calls to functions containing ellipsis (...) in
2289 // the declaration) %al is used as hidden argument to specify the number
2290 // of SSE registers used. The contents of %al do not need to match exactly
2291 // the number of registers, but must be an ubound on the number of SSE
2292 // registers used and is in the range 0 - 8 inclusive.
2293
2294 // Count the number of XMM registers allocated.
2295 static const MCPhysReg XMMArgRegs[] = {
2296 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2297 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2298 };
2299 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2300 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2301 && "SSE registers cannot be used when SSE is disabled");
2302 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2303 DAG.getConstant(NumXMMRegs, dl,
2304 MVT::i8)));
2305 }
2306
2307 if (isVarArg && IsMustTail) {
2308 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2309 for (const auto &F : Forwards) {
2310 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2311 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2312 }
2313 }
2314
2315 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2316 // don't need this because the eligibility check rejects calls that require
2317 // shuffling arguments passed in memory.
2318 if (!IsSibcall && isTailCall) {
2319 // Force all the incoming stack arguments to be loaded from the stack
2320 // before any new outgoing arguments are stored to the stack, because the
2321 // outgoing stack slots may alias the incoming argument stack slots, and
2322 // the alias isn't otherwise explicit. This is slightly more conservative
2323 // than necessary, because it means that each store effectively depends
2324 // on every argument instead of just those arguments it would clobber.
2325 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2326
2327 SmallVector<SDValue, 8> MemOpChains2;
2328 SDValue FIN;
2329 int FI = 0;
2330 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2331 ++I, ++OutsIndex) {
2332 CCValAssign &VA = ArgLocs[I];
2333
2334 if (VA.isRegLoc()) {
2335 if (VA.needsCustom()) {
2336 assert((CallConv == CallingConv::X86_RegCall) &&
2337 "Expecting custom case only in regcall calling convention");
2338 // This means that we are in special case where one argument was
2339 // passed through two register locations - Skip the next location
2340 ++I;
2341 }
2342
2343 continue;
2344 }
2345
2346 assert(VA.isMemLoc());
2347 SDValue Arg = OutVals[OutsIndex];
2348 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2349 // Skip inalloca/preallocated arguments. They don't require any work.
2350 if (Flags.isInAlloca() || Flags.isPreallocated())
2351 continue;
2352 // Create frame index.
2353 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2354 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2355 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2356 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2357
2358 if (Flags.isByVal()) {
2359 // Copy relative to framepointer.
2361 if (!StackPtr.getNode())
2362 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2365 StackPtr, Source);
2366
2367 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2368 ArgChain,
2369 Flags, DAG, dl));
2370 } else {
2371 // Store relative to framepointer.
2372 MemOpChains2.push_back(DAG.getStore(
2373 ArgChain, dl, Arg, FIN,
2375 }
2376 }
2377
2378 if (!MemOpChains2.empty())
2379 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2380
2381 // Store the return address to the appropriate stack slot.
2382 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2384 RegInfo->getSlotSize(), FPDiff, dl);
2385 }
2386
2387 // Build a sequence of copy-to-reg nodes chained together with token chain
2388 // and glue operands which copy the outgoing args into registers.
2389 SDValue InGlue;
2390 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2391 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2392 RegsToPass[i].second, InGlue);
2393 InGlue = Chain.getValue(1);
2394 }
2395
2396 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2397 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2398 // In the 64-bit large code model, we have to make all calls
2399 // through a register, since the call instruction's 32-bit
2400 // pc-relative offset may not be large enough to hold the whole
2401 // address.
2402 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2403 Callee->getOpcode() == ISD::ExternalSymbol) {
2404 // Lower direct calls to global addresses and external symbols. Setting
2405 // ForCall to true here has the effect of removing WrapperRIP when possible
2406 // to allow direct calls to be selected without first materializing the
2407 // address into a register.
2408 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2409 } else if (Subtarget.isTarget64BitILP32() &&
2410 Callee.getValueType() == MVT::i32) {
2411 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2412 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2413 }
2414
2416
2417 if (!IsSibcall && isTailCall && !IsMustTail) {
2418 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2419 InGlue = Chain.getValue(1);
2420 }
2421
2422 Ops.push_back(Chain);
2423 Ops.push_back(Callee);
2424
2425 if (isTailCall)
2426 Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2427
2428 // Add argument registers to the end of the list so that they are known live
2429 // into the call.
2430 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2431 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2432 RegsToPass[i].second.getValueType()));
2433
2434 // Add a register mask operand representing the call-preserved registers.
2435 const uint32_t *Mask = [&]() {
2436 auto AdaptedCC = CallConv;
2437 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2438 // use X86_INTR calling convention because it has the same CSR mask
2439 // (same preserved registers).
2440 if (HasNCSR)
2442 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2443 // to use the CSR_NoRegs_RegMask.
2444 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2445 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2446 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2447 }();
2448 assert(Mask && "Missing call preserved mask for calling convention");
2449
2450 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2451 X86Info->setFPClobberedByCall(true);
2452 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2453 X86Info->setFPClobberedByInvoke(true);
2454 }
2455 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2456 X86Info->setBPClobberedByCall(true);
2457 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2458 X86Info->setBPClobberedByInvoke(true);
2459 }
2460
2461 // If this is an invoke in a 32-bit function using a funclet-based
2462 // personality, assume the function clobbers all registers. If an exception
2463 // is thrown, the runtime will not restore CSRs.
2464 // FIXME: Model this more precisely so that we can register allocate across
2465 // the normal edge and spill and fill across the exceptional edge.
2466 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2467 const Function &CallerFn = MF.getFunction();
2468 EHPersonality Pers =
2469 CallerFn.hasPersonalityFn()
2472 if (isFuncletEHPersonality(Pers))
2473 Mask = RegInfo->getNoPreservedMask();
2474 }
2475
2476 // Define a new register mask from the existing mask.
2477 uint32_t *RegMask = nullptr;
2478
2479 // In some calling conventions we need to remove the used physical registers
2480 // from the reg mask. Create a new RegMask for such calling conventions.
2481 // RegMask for calling conventions that disable only return registers (e.g.
2482 // preserve_most) will be modified later in LowerCallResult.
2483 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2484 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2485 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2486
2487 // Allocate a new Reg Mask and copy Mask.
2488 RegMask = MF.allocateRegMask();
2489 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2490 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2491
2492 // Make sure all sub registers of the argument registers are reset
2493 // in the RegMask.
2494 if (ShouldDisableArgRegs) {
2495 for (auto const &RegPair : RegsToPass)
2496 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2497 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2498 }
2499
2500 // Create the RegMask Operand according to our updated mask.
2501 Ops.push_back(DAG.getRegisterMask(RegMask));
2502 } else {
2503 // Create the RegMask Operand according to the static mask.
2504 Ops.push_back(DAG.getRegisterMask(Mask));
2505 }
2506
2507 if (InGlue.getNode())
2508 Ops.push_back(InGlue);
2509
2510 if (isTailCall) {
2511 // We used to do:
2512 //// If this is the first return lowered for this function, add the regs
2513 //// to the liveout set for the function.
2514 // This isn't right, although it's probably harmless on x86; liveouts
2515 // should be computed from returns not tail calls. Consider a void
2516 // function making a tail call to a function returning int.
2518 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops);
2519
2520 if (IsCFICall)
2521 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2522
2523 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2524 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2525 return Ret;
2526 }
2527
2528 // Returns a chain & a glue for retval copy to use.
2529 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2530 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2531 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2532 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2533 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2534 // expanded to the call, directly followed by a special marker sequence and
2535 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2536 assert(!isTailCall &&
2537 "tail calls cannot be marked with clang.arc.attachedcall");
2538 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2539
2540 // Add a target global address for the retainRV/claimRV runtime function
2541 // just before the call target.
2543 auto PtrVT = getPointerTy(DAG.getDataLayout());
2544 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2545 Ops.insert(Ops.begin() + 1, GA);
2546 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2547 } else {
2548 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2549 }
2550
2551 if (IsCFICall)
2552 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2553
2554 InGlue = Chain.getValue(1);
2555 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2556 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2557
2558 // Save heapallocsite metadata.
2559 if (CLI.CB)
2560 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2561 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2562
2563 // Create the CALLSEQ_END node.
2564 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2565 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2567 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2568 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2569 // If this call passes a struct-return pointer, the callee
2570 // pops that struct pointer.
2571 NumBytesForCalleeToPop = 4;
2572
2573 // Returns a glue for retval copy to use.
2574 if (!IsSibcall) {
2575 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2576 InGlue, dl);
2577 InGlue = Chain.getValue(1);
2578 }
2579
2580 if (CallingConv::PreserveNone == CallConv)
2581 for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2582 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2583 Outs[I].Flags.isSwiftError()) {
2584 errorUnsupported(DAG, dl,
2585 "Swift attributes can't be used with preserve_none");
2586 break;
2587 }
2588 }
2589
2590 // Handle result values, copying them out of physregs into vregs that we
2591 // return.
2592 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2593 InVals, RegMask);
2594}
2595
2596//===----------------------------------------------------------------------===//
2597// Fast Calling Convention (tail call) implementation
2598//===----------------------------------------------------------------------===//
2599
2600// Like std call, callee cleans arguments, convention except that ECX is
2601// reserved for storing the tail called function address. Only 2 registers are
2602// free for argument passing (inreg). Tail call optimization is performed
2603// provided:
2604// * tailcallopt is enabled
2605// * caller/callee are fastcc
2606// On X86_64 architecture with GOT-style position independent code only local
2607// (within module) calls are supported at the moment.
2608// To keep the stack aligned according to platform abi the function
2609// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2610// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2611// If a tail called function callee has more arguments than the caller the
2612// caller needs to make sure that there is room to move the RETADDR to. This is
2613// achieved by reserving an area the size of the argument delta right after the
2614// original RETADDR, but before the saved framepointer or the spilled registers
2615// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2616// stack layout:
2617// arg1
2618// arg2
2619// RETADDR
2620// [ new RETADDR
2621// move area ]
2622// (possible EBP)
2623// ESI
2624// EDI
2625// local1 ..
2626
2627/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2628/// requirement.
2629unsigned
2630X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2631 SelectionDAG &DAG) const {
2632 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2633 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2634 assert(StackSize % SlotSize == 0 &&
2635 "StackSize must be a multiple of SlotSize");
2636 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2637}
2638
2639/// Return true if the given stack call argument is already available in the
2640/// same position (relatively) of the caller's incoming argument stack.
2641static
2644 const X86InstrInfo *TII, const CCValAssign &VA) {
2645 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2646
2647 for (;;) {
2648 // Look through nodes that don't alter the bits of the incoming value.
2649 unsigned Op = Arg.getOpcode();
2650 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2651 Op == ISD::AssertZext) {
2652 Arg = Arg.getOperand(0);
2653 continue;
2654 }
2655 if (Op == ISD::TRUNCATE) {
2656 const SDValue &TruncInput = Arg.getOperand(0);
2657 if (TruncInput.getOpcode() == ISD::AssertZext &&
2658 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2659 Arg.getValueType()) {
2660 Arg = TruncInput.getOperand(0);
2661 continue;
2662 }
2663 }
2664 break;
2665 }
2666
2667 int FI = INT_MAX;
2668 if (Arg.getOpcode() == ISD::CopyFromReg) {
2669 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2670 if (!VR.isVirtual())
2671 return false;
2672 MachineInstr *Def = MRI->getVRegDef(VR);
2673 if (!Def)
2674 return false;
2675 if (!Flags.isByVal()) {
2676 if (!TII->isLoadFromStackSlot(*Def, FI))
2677 return false;
2678 } else {
2679 unsigned Opcode = Def->getOpcode();
2680 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2681 Opcode == X86::LEA64_32r) &&
2682 Def->getOperand(1).isFI()) {
2683 FI = Def->getOperand(1).getIndex();
2684 Bytes = Flags.getByValSize();
2685 } else
2686 return false;
2687 }
2688 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2689 if (Flags.isByVal())
2690 // ByVal argument is passed in as a pointer but it's now being
2691 // dereferenced. e.g.
2692 // define @foo(%struct.X* %A) {
2693 // tail call @bar(%struct.X* byval %A)
2694 // }
2695 return false;
2696 SDValue Ptr = Ld->getBasePtr();
2697 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2698 if (!FINode)
2699 return false;
2700 FI = FINode->getIndex();
2701 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2702 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2703 FI = FINode->getIndex();
2704 Bytes = Flags.getByValSize();
2705 } else
2706 return false;
2707
2708 assert(FI != INT_MAX);
2709 if (!MFI.isFixedObjectIndex(FI))
2710 return false;
2711
2712 if (Offset != MFI.getObjectOffset(FI))
2713 return false;
2714
2715 // If this is not byval, check that the argument stack object is immutable.
2716 // inalloca and argument copy elision can create mutable argument stack
2717 // objects. Byval objects can be mutated, but a byval call intends to pass the
2718 // mutated memory.
2719 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2720 return false;
2721
2722 if (VA.getLocVT().getFixedSizeInBits() >
2724 // If the argument location is wider than the argument type, check that any
2725 // extension flags match.
2726 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2727 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2728 return false;
2729 }
2730 }
2731
2732 return Bytes == MFI.getObjectSize(FI);
2733}
2734
2735/// Check whether the call is eligible for tail call optimization. Targets
2736/// that want to do tail call optimization should implement this function.
2737/// Note that the x86 backend does not check musttail calls for eligibility! The
2738/// rest of x86 tail call lowering must be prepared to forward arguments of any
2739/// type.
2740bool X86TargetLowering::IsEligibleForTailCallOptimization(
2742 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2743 SelectionDAG &DAG = CLI.DAG;
2744 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2745 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2747 SDValue Callee = CLI.Callee;
2748 CallingConv::ID CalleeCC = CLI.CallConv;
2749 bool isVarArg = CLI.IsVarArg;
2750
2751 if (!mayTailCallThisCC(CalleeCC))
2752 return false;
2753
2754 // If -tailcallopt is specified, make fastcc functions tail-callable.
2756 const Function &CallerF = MF.getFunction();
2757
2758 // If the function return type is x86_fp80 and the callee return type is not,
2759 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2760 // perform a tailcall optimization here.
2761 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2762 return false;
2763
2764 CallingConv::ID CallerCC = CallerF.getCallingConv();
2765 bool CCMatch = CallerCC == CalleeCC;
2766 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2767 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2768 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2769 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2770
2771 // Win64 functions have extra shadow space for argument homing. Don't do the
2772 // sibcall if the caller and callee have mismatched expectations for this
2773 // space.
2774 if (IsCalleeWin64 != IsCallerWin64)
2775 return false;
2776
2777 if (IsGuaranteeTCO) {
2778 if (canGuaranteeTCO(CalleeCC) && CCMatch)
2779 return true;
2780 return false;
2781 }
2782
2783 // Look for obvious safe cases to perform tail call optimization that do not
2784 // require ABI changes. This is what gcc calls sibcall.
2785
2786 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2787 // emit a special epilogue.
2788 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2789 if (RegInfo->hasStackRealignment(MF))
2790 return false;
2791
2792 // Also avoid sibcall optimization if we're an sret return fn and the callee
2793 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2794 // insufficient.
2796 // For a compatible tail call the callee must return our sret pointer. So it
2797 // needs to be (a) an sret function itself and (b) we pass our sret as its
2798 // sret. Condition #b is harder to determine.
2799 return false;
2800 } else if (IsCalleePopSRet)
2801 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2802 // expect that.
2803 return false;
2804
2805 // Do not sibcall optimize vararg calls unless all arguments are passed via
2806 // registers.
2807 LLVMContext &C = *DAG.getContext();
2808 if (isVarArg && !Outs.empty()) {
2809 // Optimizing for varargs on Win64 is unlikely to be safe without
2810 // additional testing.
2811 if (IsCalleeWin64 || IsCallerWin64)
2812 return false;
2813
2814 for (const auto &VA : ArgLocs)
2815 if (!VA.isRegLoc())
2816 return false;
2817 }
2818
2819 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2820 // stack. Therefore, if it's not used by the call it is not safe to optimize
2821 // this into a sibcall.
2822 bool Unused = false;
2823 for (const auto &In : Ins) {
2824 if (!In.Used) {
2825 Unused = true;
2826 break;
2827 }
2828 }
2829 if (Unused) {
2831 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2832 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2833 for (const auto &VA : RVLocs) {
2834 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2835 return false;
2836 }
2837 }
2838
2839 // Check that the call results are passed in the same way.
2840 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2842 return false;
2843 // The callee has to preserve all registers the caller needs to preserve.
2844 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2845 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2846 if (!CCMatch) {
2847 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2848 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2849 return false;
2850 }
2851
2852 // The stack frame of the caller cannot be replaced by the tail-callee one's
2853 // if the function is required to preserve all the registers. Conservatively
2854 // prevent tail optimization even if hypothetically all the registers are used
2855 // for passing formal parameters or returning values.
2856 if (CallerF.hasFnAttribute("no_caller_saved_registers"))
2857 return false;
2858
2859 unsigned StackArgsSize = CCInfo.getStackSize();
2860
2861 // If the callee takes no arguments then go on to check the results of the
2862 // call.
2863 if (!Outs.empty()) {
2864 if (StackArgsSize > 0) {
2865 // Check if the arguments are already laid out in the right way as
2866 // the caller's fixed stack objects.
2867 MachineFrameInfo &MFI = MF.getFrameInfo();
2868 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2869 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2870 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2871 const CCValAssign &VA = ArgLocs[I];
2872 SDValue Arg = OutVals[I];
2873 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2875 return false;
2876 if (!VA.isRegLoc()) {
2877 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2878 TII, VA))
2879 return false;
2880 }
2881 }
2882 }
2883
2884 bool PositionIndependent = isPositionIndependent();
2885 // If the tailcall address may be in a register, then make sure it's
2886 // possible to register allocate for it. In 32-bit, the call address can
2887 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2888 // callee-saved registers are restored. These happen to be the same
2889 // registers used to pass 'inreg' arguments so watch out for those.
2890 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2891 !isa<ExternalSymbolSDNode>(Callee)) ||
2892 PositionIndependent)) {
2893 unsigned NumInRegs = 0;
2894 // In PIC we need an extra register to formulate the address computation
2895 // for the callee.
2896 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2897
2898 for (const auto &VA : ArgLocs) {
2899 if (!VA.isRegLoc())
2900 continue;
2901 Register Reg = VA.getLocReg();
2902 switch (Reg) {
2903 default: break;
2904 case X86::EAX: case X86::EDX: case X86::ECX:
2905 if (++NumInRegs == MaxInRegs)
2906 return false;
2907 break;
2908 }
2909 }
2910 }
2911
2912 const MachineRegisterInfo &MRI = MF.getRegInfo();
2913 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2914 return false;
2915 }
2916
2917 bool CalleeWillPop =
2918 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2920
2921 if (unsigned BytesToPop =
2923 // If we have bytes to pop, the callee must pop them.
2924 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2925 if (!CalleePopMatches)
2926 return false;
2927 } else if (CalleeWillPop && StackArgsSize > 0) {
2928 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2929 return false;
2930 }
2931
2932 return true;
2933}
2934
2935/// Determines whether the callee is required to pop its own arguments.
2936/// Callee pop is necessary to support tail calls.
2938 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2939 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2940 // can guarantee TCO.
2941 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2942 return true;
2943
2944 switch (CallingConv) {
2945 default:
2946 return false;
2951 return !is64Bit;
2952 }
2953}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Addr
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:235
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition: LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const M68kInstrInfo *TII, const CCValAssign &VA)
Return true if the given stack call argument is already available in the same position (relatively) o...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
CCState - This class holds information needed while lowering arguments and return values.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
void convertToReg(MCRegister Reg)
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1407
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2307
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1048
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void setDSOLocal(bool Local)
Definition: GlobalValue.h:303
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
LLVMContext & getContext() const
Definition: IRBuilder.h:173
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:390
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1069
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
void setCFIType(uint32_t Type)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:748
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:799
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:825
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:495
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:710
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:496
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:698
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:490
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
Definition: SelectionDAG.h:508
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:578
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
Class to represent struct types.
Definition: DerivedTypes.h:218
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isAndroidVersionLT(unsigned Major) const
Definition: Triple.h:782
bool isAndroid() const
Tests whether the target is Android.
Definition: Triple.h:780
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:678
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:568
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:714
bool isOSFuchsia() const
Definition: Triple.h:598
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:645
bool isWindowsItaniumEnvironment() const
Definition: Triple.h:660
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:159
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
void setArgumentStackSize(unsigned size)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
unsigned getSlotSize() const
bool hasSSE1() const
Definition: X86Subtarget.h:193
bool useLight256BitInstructions() const
Definition: X86Subtarget.h:258
bool isPICStyleGOT() const
Definition: X86Subtarget.h:328
bool isTargetMCU() const
Definition: X86Subtarget.h:297
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:300
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:280
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:278
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool useAVX512Regs() const
Definition: X86Subtarget.h:253
bool isTargetCOFF() const
Definition: X86Subtarget.h:287
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:337
bool hasAVX512() const
Definition: X86Subtarget.h:201
bool hasSSE41() const
Definition: X86Subtarget.h:197
bool hasSSE2() const
Definition: X86Subtarget.h:194
bool isTargetFuchsia() const
Definition: X86Subtarget.h:298
bool isPICStyleRIPRel() const
Definition: X86Subtarget.h:329
bool isTargetCygMing() const
Definition: X86Subtarget.h:320
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:132
bool hasAVX() const
Definition: X86Subtarget.h:199
unsigned getPreferVectorWidth() const
Definition: X86Subtarget.h:225
bool isTargetAndroid() const
Definition: X86Subtarget.h:293
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:124
bool hasAVX2() const
Definition: X86Subtarget.h:200
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
bool useSoftFloat() const override
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:151
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:122
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
Definition: CallingConv.h:163
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
Definition: CallingConv.h:147
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition: CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:159
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ExternalSymbol
Definition: ISDOpcodes.h:83
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ FS
Definition: X86.h:211
@ GS
Definition: X86.h:210
Reg
All possible values of the reg field in the ModR/M byte.
@ RET_GLUE
Return with a glue operand.
@ IRET
Return from interrupt. Operand 0 is the number of bytes to pop.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ TC_RETURN
Tail call return.
@ NT_CALL
Same as call except it adds the NoTrack prefix.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition: ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition: ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1926
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:217
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals