LLVM 22.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "X86.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86ISelLowering.h"
19#include "X86InstrBuilder.h"
21#include "X86TargetMachine.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Module.h"
30
31#define DEBUG_TYPE "x86-isel"
32
33using namespace llvm;
34
35STATISTIC(NumTailCalls, "Number of tail calls");
36
37/// Call this when the user attempts to do something unsupported, like
38/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
39/// report_fatal_error, so calling code should attempt to recover without
40/// crashing.
41static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
42 const char *Msg) {
44 DAG.getContext()->diagnose(
46}
47
48/// Returns true if a CC can dynamically exclude a register from the list of
49/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
50/// the return registers.
52 switch (CC) {
53 default:
54 return false;
58 return true;
59 }
60}
61
62/// Returns true if a CC can dynamically exclude a register from the list of
63/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
64/// the parameters.
68
69static std::pair<MVT, unsigned>
71 const X86Subtarget &Subtarget) {
72 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
73 // convention is one that uses k registers.
74 if (NumElts == 2)
75 return {MVT::v2i64, 1};
76 if (NumElts == 4)
77 return {MVT::v4i32, 1};
78 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
80 return {MVT::v8i16, 1};
81 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
83 return {MVT::v16i8, 1};
84 // v32i1 passes in ymm unless we have BWI and the calling convention is
85 // regcall.
86 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
87 return {MVT::v32i8, 1};
88 // Split v64i1 vectors if we don't have v64i8 available.
89 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
90 if (Subtarget.useAVX512Regs())
91 return {MVT::v64i8, 1};
92 return {MVT::v32i8, 2};
93 }
94
95 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
96 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
97 NumElts > 64)
98 return {MVT::i8, NumElts};
99
101}
102
105 EVT VT) const {
106 if (VT.isVector()) {
107 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
108 unsigned NumElts = VT.getVectorNumElements();
109
110 MVT RegisterVT;
111 unsigned NumRegisters;
112 std::tie(RegisterVT, NumRegisters) =
113 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
114 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
115 return RegisterVT;
116 }
117
118 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
119 return MVT::v8f16;
120 }
121
122 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
123 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
124 !Subtarget.hasX87())
125 return MVT::i32;
126
127 if (isTypeLegal(MVT::f16)) {
128 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
130 Context, CC, VT.changeVectorElementType(MVT::f16));
131
132 if (VT == MVT::bf16)
133 return MVT::f16;
134 }
135
136 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
137}
138
141 EVT VT) const {
142 if (VT.isVector()) {
143 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
144 unsigned NumElts = VT.getVectorNumElements();
145
146 MVT RegisterVT;
147 unsigned NumRegisters;
148 std::tie(RegisterVT, NumRegisters) =
149 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
150 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
151 return NumRegisters;
152 }
153
154 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
155 return 1;
156 }
157
158 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
159 // x87 is disabled.
160 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
161 if (VT == MVT::f64)
162 return 2;
163 if (VT == MVT::f80)
164 return 3;
165 }
166
167 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
168 isTypeLegal(MVT::f16))
169 return getNumRegistersForCallingConv(Context, CC,
170 VT.changeVectorElementType(MVT::f16));
171
172 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
173}
174
176 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
177 unsigned &NumIntermediates, MVT &RegisterVT) const {
178 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
179 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
180 Subtarget.hasAVX512() &&
182 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
183 VT.getVectorNumElements() > 64)) {
184 RegisterVT = MVT::i8;
185 IntermediateVT = MVT::i1;
186 NumIntermediates = VT.getVectorNumElements();
187 return NumIntermediates;
188 }
189
190 // Split v64i1 vectors if we don't have v64i8 available.
191 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
193 RegisterVT = MVT::v32i8;
194 IntermediateVT = MVT::v32i1;
195 NumIntermediates = 2;
196 return 2;
197 }
198
199 // Split vNbf16 vectors according to vNf16.
200 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
201 isTypeLegal(MVT::f16))
202 VT = VT.changeVectorElementType(MVT::f16);
203
204 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
205 NumIntermediates, RegisterVT);
206}
207
209 LLVMContext& Context,
210 EVT VT) const {
211 if (!VT.isVector())
212 return MVT::i8;
213
214 if (Subtarget.hasAVX512()) {
215 // Figure out what this type will be legalized to.
216 EVT LegalVT = VT;
217 while (getTypeAction(Context, LegalVT) != TypeLegal)
218 LegalVT = getTypeToTransformTo(Context, LegalVT);
219
220 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
221 if (LegalVT.getSimpleVT().is512BitVector())
222 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
223
224 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
225 // If we legalized to less than a 512-bit vector, then we will use a vXi1
226 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
227 // vXi16/vXi8.
228 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
229 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
230 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
231 }
232 }
233
235}
236
238 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
239 const DataLayout &DL) const {
240 // On x86-64 i128 is split into two i64s and needs to be allocated to two
241 // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
242 // is split to four i32s and never actually passed in registers, but we use
243 // the consecutive register mark to match it in TableGen.
244 if (Ty->isIntegerTy(128))
245 return true;
246
247 // On x86-32, fp128 acts the same as i128.
248 if (Subtarget.is32Bit() && Ty->isFP128Ty())
249 return true;
250
251 return false;
252}
253
254/// Helper for getByValTypeAlignment to determine
255/// the desired ByVal argument alignment.
256static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
257 if (MaxAlign == 16)
258 return;
259 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
260 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
261 MaxAlign = Align(16);
262 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
263 Align EltAlign;
264 getMaxByValAlign(ATy->getElementType(), EltAlign);
265 if (EltAlign > MaxAlign)
266 MaxAlign = EltAlign;
267 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
268 for (auto *EltTy : STy->elements()) {
269 Align EltAlign;
270 getMaxByValAlign(EltTy, EltAlign);
271 if (EltAlign > MaxAlign)
272 MaxAlign = EltAlign;
273 if (MaxAlign == 16)
274 break;
275 }
276 }
277}
278
279/// Return the desired alignment for ByVal aggregate
280/// function arguments in the caller parameter area. For X86, aggregates
281/// that contain SSE vectors are placed at 16-byte boundaries while the rest
282/// are at 4-byte boundaries.
284 const DataLayout &DL) const {
285 if (Subtarget.is64Bit())
286 return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
287
288 Align Alignment(4);
289 if (Subtarget.hasSSE1())
290 getMaxByValAlign(Ty, Alignment);
291 return Alignment;
292}
293
294/// It returns EVT::Other if the type should be determined using generic
295/// target-independent logic.
296/// For vector ops we check that the overall size isn't larger than our
297/// preferred vector width.
299 LLVMContext &Context, const MemOp &Op,
300 const AttributeList &FuncAttributes) const {
301 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
302 if (Op.size() >= 16 &&
303 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
304 // FIXME: Check if unaligned 64-byte accesses are slow.
305 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
306 (Subtarget.getPreferVectorWidth() >= 512)) {
307 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
308 }
309 // FIXME: Check if unaligned 32-byte accesses are slow.
310 if (Op.size() >= 32 && Subtarget.hasAVX() &&
311 Subtarget.useLight256BitInstructions()) {
312 // Although this isn't a well-supported type for AVX1, we'll let
313 // legalization and shuffle lowering produce the optimal codegen. If we
314 // choose an optimal type with a vector element larger than a byte,
315 // getMemsetStores() may create an intermediate splat (using an integer
316 // multiply) before we splat as a vector.
317 return MVT::v32i8;
318 }
319 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
320 return MVT::v16i8;
321 // TODO: Can SSE1 handle a byte vector?
322 // If we have SSE1 registers we should be able to use them.
323 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
324 (Subtarget.getPreferVectorWidth() >= 128))
325 return MVT::v4f32;
326 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
327 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
328 // Do not use f64 to lower memcpy if source is string constant. It's
329 // better to use i32 to avoid the loads.
330 // Also, do not use f64 to lower memset unless this is a memset of zeros.
331 // The gymnastics of splatting a byte value into an XMM register and then
332 // only using 8-byte stores (because this is a CPU with slow unaligned
333 // 16-byte accesses) makes that a loser.
334 return MVT::f64;
335 }
336 }
337 // This is a compromise. If we reach here, unaligned accesses may be slow on
338 // this target. However, creating smaller, aligned accesses could be even
339 // slower and would certainly be a lot more code.
340 if (Subtarget.is64Bit() && Op.size() >= 8)
341 return MVT::i64;
342 return MVT::i32;
343}
344
346 if (VT == MVT::f32)
347 return Subtarget.hasSSE1();
348 if (VT == MVT::f64)
349 return Subtarget.hasSSE2();
350 return true;
351}
352
353static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
354 return (8 * Alignment.value()) % SizeInBits == 0;
355}
356
358 if (isBitAligned(Alignment, VT.getSizeInBits()))
359 return true;
360 switch (VT.getSizeInBits()) {
361 default:
362 // 8-byte and under are always assumed to be fast.
363 return true;
364 case 128:
365 return !Subtarget.isUnalignedMem16Slow();
366 case 256:
367 return !Subtarget.isUnalignedMem32Slow();
368 // TODO: What about AVX-512 (512-bit) accesses?
369 }
370}
371
373 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
374 unsigned *Fast) const {
375 if (Fast)
376 *Fast = isMemoryAccessFast(VT, Alignment);
377 // NonTemporal vector memory ops must be aligned.
378 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
379 // NT loads can only be vector aligned, so if its less aligned than the
380 // minimum vector size (which we can split the vector down to), we might as
381 // well use a regular unaligned vector load.
382 // We don't have any NT loads pre-SSE41.
383 if (!!(Flags & MachineMemOperand::MOLoad))
384 return (Alignment < 16 || !Subtarget.hasSSE41());
385 return false;
386 }
387 // Misaligned accesses of any size are always allowed.
388 return true;
389}
390
392 const DataLayout &DL, EVT VT,
393 unsigned AddrSpace, Align Alignment,
395 unsigned *Fast) const {
396 if (Fast)
397 *Fast = isMemoryAccessFast(VT, Alignment);
398 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
399 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
400 /*Fast=*/nullptr))
401 return true;
402 // NonTemporal vector memory ops are special, and must be aligned.
403 if (!isBitAligned(Alignment, VT.getSizeInBits()))
404 return false;
405 switch (VT.getSizeInBits()) {
406 case 128:
407 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
408 return true;
409 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
410 return true;
411 return false;
412 case 256:
413 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
414 return true;
415 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
416 return true;
417 return false;
418 case 512:
419 if (Subtarget.hasAVX512())
420 return true;
421 return false;
422 default:
423 return false; // Don't have NonTemporal vector memory ops of this size.
424 }
425 }
426 return true;
427}
428
429/// Return the entry encoding for a jump table in the
430/// current function. The returned value is a member of the
431/// MachineJumpTableInfo::JTEntryKind enum.
433 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
434 // symbol.
435 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
437 if (isPositionIndependent() &&
439 !Subtarget.isTargetCOFF())
441
442 // Otherwise, use the normal jump table encoding heuristics.
444}
445
447 return Subtarget.useSoftFloat();
448}
449
451 ArgListTy &Args) const {
452
453 // Only relabel X86-32 for C / Stdcall CCs.
454 if (Subtarget.is64Bit())
455 return;
456 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
457 return;
458 unsigned ParamRegs = 0;
459 if (auto *M = MF->getFunction().getParent())
460 ParamRegs = M->getNumberRegisterParameters();
461
462 // Mark the first N int arguments as having reg
463 for (auto &Arg : Args) {
464 Type *T = Arg.Ty;
465 if (T->isIntOrPtrTy())
466 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
467 unsigned numRegs = 1;
468 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
469 numRegs = 2;
470 if (ParamRegs < numRegs)
471 return;
472 ParamRegs -= numRegs;
473 Arg.IsInReg = true;
474 }
475 }
476}
477
478const MCExpr *
480 const MachineBasicBlock *MBB,
481 unsigned uid,MCContext &Ctx) const{
482 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
483 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
484 // entries.
485 return MCSymbolRefExpr::create(MBB->getSymbol(), X86::S_GOTOFF, Ctx);
486}
487
488/// Returns relocation base for the given PIC jumptable.
490 SelectionDAG &DAG) const {
491 if (!Subtarget.is64Bit())
492 // This doesn't have SDLoc associated with it, but is not really the
493 // same as a Register.
494 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
496 return Table;
497}
498
499/// This returns the relocation base for the given PIC jumptable,
500/// the same as getPICJumpTableRelocBase, but as an MCExpr.
502getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
503 MCContext &Ctx) const {
504 // X86-64 uses RIP relative addressing based on the jump table label.
505 if (Subtarget.isPICStyleRIPRel() ||
506 (Subtarget.is64Bit() &&
509
510 // Otherwise, the reference is relative to the PIC base.
511 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
512}
513
514std::pair<const TargetRegisterClass *, uint8_t>
516 MVT VT) const {
517 const TargetRegisterClass *RRC = nullptr;
518 uint8_t Cost = 1;
519 switch (VT.SimpleTy) {
520 default:
522 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
523 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
524 break;
525 case MVT::x86mmx:
526 RRC = &X86::VR64RegClass;
527 break;
528 case MVT::f32: case MVT::f64:
529 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
530 case MVT::v4f32: case MVT::v2f64:
531 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
532 case MVT::v8f32: case MVT::v4f64:
533 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
534 case MVT::v16f32: case MVT::v8f64:
535 RRC = &X86::VR128XRegClass;
536 break;
537 }
538 return std::make_pair(RRC, Cost);
539}
540
541unsigned X86TargetLowering::getAddressSpace() const {
542 if (Subtarget.is64Bit())
544 : X86AS::FS;
545 return X86AS::GS;
546}
547
548static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
549 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
550 TargetTriple.isAndroid();
551}
552
554 int Offset, unsigned AddressSpace) {
556 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
558}
559
561 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
562 // tcbhead_t; use it instead of the usual global variable (see
563 // sysdeps/{i386,x86_64}/nptl/tls.h)
564 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
565 unsigned AddressSpace = getAddressSpace();
566
567 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
568 if (Subtarget.isTargetFuchsia())
569 return SegmentOffset(IRB, 0x10, AddressSpace);
570
571 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
572 // Specially, some users may customize the base reg and offset.
573 int Offset = M->getStackProtectorGuardOffset();
574 // If we don't set -stack-protector-guard-offset value:
575 // %fs:0x28, unless we're using a Kernel code model, in which case
576 // it's %gs:0x28. gs:0x14 on i386.
577 if (Offset == INT_MAX)
578 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
579
580 StringRef GuardReg = M->getStackProtectorGuardReg();
581 if (GuardReg == "fs")
583 else if (GuardReg == "gs")
585
586 // Use symbol guard if user specify.
587 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
588 if (!GuardSymb.empty()) {
589 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
590 if (!GV) {
591 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
592 : Type::getInt32Ty(M->getContext());
593 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
594 nullptr, GuardSymb, nullptr,
596 if (!Subtarget.isTargetDarwin())
597 GV->setDSOLocal(M->getDirectAccessExternalData());
598 }
599 return GV;
600 }
601
602 return SegmentOffset(IRB, Offset, AddressSpace);
603 }
605}
606
608 // MSVC CRT provides functionalities for stack protection.
609 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
610 getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
611
612 RTLIB::LibcallImpl SecurityCookieVar =
613 getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
614 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
615 SecurityCookieVar != RTLIB::Unsupported) {
616 // MSVC CRT provides functionalities for stack protection.
617 // MSVC CRT has a global variable holding security cookie.
618 M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
619 PointerType::getUnqual(M.getContext()));
620
621 // MSVC CRT has a function to validate security cookie.
622 FunctionCallee SecurityCheckCookie =
623 M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
624 Type::getVoidTy(M.getContext()),
625 PointerType::getUnqual(M.getContext()));
626
627 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
628 F->setCallingConv(CallingConv::X86_FastCall);
629 F->addParamAttr(0, Attribute::AttrKind::InReg);
630 }
631 return;
632 }
633
634 StringRef GuardMode = M.getStackProtectorGuard();
635
636 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
637 if ((GuardMode == "tls" || GuardMode.empty()) &&
638 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
639 return;
641}
642
644 // MSVC CRT has a function to validate security cookie.
645 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
646 Subtarget.getTargetTriple().isWindowsItaniumEnvironment()) {
647 return M.getFunction("__security_check_cookie");
648 }
650}
651
652Value *
654 // Android provides a fixed TLS slot for the SafeStack pointer. See the
655 // definition of TLS_SLOT_SAFESTACK in
656 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
657 if (Subtarget.isTargetAndroid()) {
658 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
659 // %gs:0x24 on i386
660 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
661 return SegmentOffset(IRB, Offset, getAddressSpace());
662 }
663
664 // Fuchsia is similar.
665 if (Subtarget.isTargetFuchsia()) {
666 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
667 return SegmentOffset(IRB, 0x18, getAddressSpace());
668 }
669
671}
672
673//===----------------------------------------------------------------------===//
674// Return Value Calling Convention Implementation
675//===----------------------------------------------------------------------===//
676
677bool X86TargetLowering::CanLowerReturn(
678 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
679 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
680 const Type *RetTy) const {
682 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
683 return CCInfo.CheckReturn(Outs, RetCC_X86);
684}
685
686const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
687 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
688 return ScratchRegs;
689}
690
692 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
693 return RCRegs;
694}
695
696/// Lowers masks values (v*i1) to the local register values
697/// \returns DAG node after lowering to register type
698static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
699 const SDLoc &DL, SelectionDAG &DAG) {
700 EVT ValVT = ValArg.getValueType();
701
702 if (ValVT == MVT::v1i1)
703 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
704 DAG.getIntPtrConstant(0, DL));
705
706 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
707 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
708 // Two stage lowering might be required
709 // bitcast: v8i1 -> i8 / v16i1 -> i16
710 // anyextend: i8 -> i32 / i16 -> i32
711 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
712 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
713 if (ValLoc == MVT::i32)
714 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
715 return ValToCopy;
716 }
717
718 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
719 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
720 // One stage lowering is required
721 // bitcast: v32i1 -> i32 / v64i1 -> i64
722 return DAG.getBitcast(ValLoc, ValArg);
723 }
724
725 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
726}
727
728/// Breaks v64i1 value into two registers and adds the new node to the DAG
730 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
731 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
732 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
733 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
734 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
735 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
736 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
737 "The value should reside in two registers");
738
739 // Before splitting the value we cast it to i64
740 Arg = DAG.getBitcast(MVT::i64, Arg);
741
742 // Splitting the value into two i32 types
743 SDValue Lo, Hi;
744 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
745
746 // Attach the two i32 types into corresponding registers
747 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
748 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
749}
750
752X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
753 bool isVarArg,
755 const SmallVectorImpl<SDValue> &OutVals,
756 const SDLoc &dl, SelectionDAG &DAG) const {
757 MachineFunction &MF = DAG.getMachineFunction();
758 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
759
760 // In some cases we need to disable registers from the default CSR list.
761 // For example, when they are used as return registers (preserve_* and X86's
762 // regcall) or for argument passing (X86's regcall).
763 bool ShouldDisableCalleeSavedRegister =
764 shouldDisableRetRegFromCSR(CallConv) ||
765 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
766
767 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
768 report_fatal_error("X86 interrupts may not return any value");
769
771 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
772 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
773
775 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
776 ++I, ++OutsIndex) {
777 CCValAssign &VA = RVLocs[I];
778 assert(VA.isRegLoc() && "Can only return in registers!");
779
780 // Add the register to the CalleeSaveDisableRegs list.
781 if (ShouldDisableCalleeSavedRegister)
783
784 SDValue ValToCopy = OutVals[OutsIndex];
785 EVT ValVT = ValToCopy.getValueType();
786
787 // Promote values to the appropriate types.
788 if (VA.getLocInfo() == CCValAssign::SExt)
789 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
790 else if (VA.getLocInfo() == CCValAssign::ZExt)
791 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
792 else if (VA.getLocInfo() == CCValAssign::AExt) {
793 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
794 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
795 else
796 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
797 }
798 else if (VA.getLocInfo() == CCValAssign::BCvt)
799 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
800
802 "Unexpected FP-extend for return value.");
803
804 // Report an error if we have attempted to return a value via an XMM
805 // register and SSE was disabled.
806 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
807 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
808 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
809 } else if (!Subtarget.hasSSE2() &&
810 X86::FR64XRegClass.contains(VA.getLocReg()) &&
811 ValVT == MVT::f64) {
812 // When returning a double via an XMM register, report an error if SSE2 is
813 // not enabled.
814 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
815 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
816 }
817
818 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
819 // the RET instruction and handled by the FP Stackifier.
820 if (VA.getLocReg() == X86::FP0 ||
821 VA.getLocReg() == X86::FP1) {
822 // If this is a copy from an xmm register to ST(0), use an FPExtend to
823 // change the value to the FP stack register class.
825 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
826 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
827 // Don't emit a copytoreg.
828 continue;
829 }
830
831 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
832 // which is returned in RAX / RDX.
833 if (Subtarget.is64Bit()) {
834 if (ValVT == MVT::x86mmx) {
835 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
836 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
837 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
838 ValToCopy);
839 // If we don't have SSE2 available, convert to v4f32 so the generated
840 // register is legal.
841 if (!Subtarget.hasSSE2())
842 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
843 }
844 }
845 }
846
847 if (VA.needsCustom()) {
848 assert(VA.getValVT() == MVT::v64i1 &&
849 "Currently the only custom case is when we split v64i1 to 2 regs");
850
851 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
852 Subtarget);
853
854 // Add the second register to the CalleeSaveDisableRegs list.
855 if (ShouldDisableCalleeSavedRegister)
856 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
857 } else {
858 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
859 }
860 }
861
862 SDValue Glue;
864 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
865 // Operand #1 = Bytes To Pop
866 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
867 MVT::i32));
868
869 // Copy the result values into the output registers.
870 for (auto &RetVal : RetVals) {
871 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
872 RetOps.push_back(RetVal.second);
873 continue; // Don't emit a copytoreg.
874 }
875
876 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
877 Glue = Chain.getValue(1);
878 RetOps.push_back(
879 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
880 }
881
882 // Swift calling convention does not require we copy the sret argument
883 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
884
885 // All x86 ABIs require that for returning structs by value we copy
886 // the sret argument into %rax/%eax (depending on ABI) for the return.
887 // We saved the argument into a virtual register in the entry block,
888 // so now we copy the value out and into %rax/%eax.
889 //
890 // Checking Function.hasStructRetAttr() here is insufficient because the IR
891 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
892 // false, then an sret argument may be implicitly inserted in the SelDAG. In
893 // either case FuncInfo->setSRetReturnReg() will have been called.
894 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
895 // When we have both sret and another return value, we should use the
896 // original Chain stored in RetOps[0], instead of the current Chain updated
897 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
898
899 // For the case of sret and another return value, we have
900 // Chain_0 at the function entry
901 // Chain_1 = getCopyToReg(Chain_0) in the above loop
902 // If we use Chain_1 in getCopyFromReg, we will have
903 // Val = getCopyFromReg(Chain_1)
904 // Chain_2 = getCopyToReg(Chain_1, Val) from below
905
906 // getCopyToReg(Chain_0) will be glued together with
907 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
908 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
909 // Data dependency from Unit B to Unit A due to usage of Val in
910 // getCopyToReg(Chain_1, Val)
911 // Chain dependency from Unit A to Unit B
912
913 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
914 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
916
917 Register RetValReg
918 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
919 X86::RAX : X86::EAX;
920 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
921 Glue = Chain.getValue(1);
922
923 // RAX/EAX now acts like a return value.
924 RetOps.push_back(
925 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
926
927 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
928 // this however for preserve_most/preserve_all to minimize the number of
929 // callee-saved registers for these CCs.
930 if (ShouldDisableCalleeSavedRegister &&
931 CallConv != CallingConv::PreserveAll &&
932 CallConv != CallingConv::PreserveMost)
934 }
935
936 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
937 const MCPhysReg *I =
938 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
939 if (I) {
940 for (; *I; ++I) {
941 if (X86::GR64RegClass.contains(*I))
942 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
943 else
944 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
945 }
946 }
947
948 RetOps[0] = Chain; // Update chain.
949
950 // Add the glue if we have it.
951 if (Glue.getNode())
952 RetOps.push_back(Glue);
953
955 if (CallConv == CallingConv::X86_INTR)
956 opcode = X86ISD::IRET;
957 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
958}
959
960bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
961 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
962 return false;
963
964 SDValue TCChain = Chain;
965 SDNode *Copy = *N->user_begin();
966 if (Copy->getOpcode() == ISD::CopyToReg) {
967 // If the copy has a glue operand, we conservatively assume it isn't safe to
968 // perform a tail call.
969 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
970 return false;
971 TCChain = Copy->getOperand(0);
972 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
973 return false;
974
975 bool HasRet = false;
976 for (const SDNode *U : Copy->users()) {
977 if (U->getOpcode() != X86ISD::RET_GLUE)
978 return false;
979 // If we are returning more than one value, we can definitely
980 // not make a tail call see PR19530
981 if (U->getNumOperands() > 4)
982 return false;
983 if (U->getNumOperands() == 4 &&
984 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
985 return false;
986 HasRet = true;
987 }
988
989 if (!HasRet)
990 return false;
991
992 Chain = TCChain;
993 return true;
994}
995
996EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
997 ISD::NodeType ExtendKind) const {
998 MVT ReturnMVT = MVT::i32;
999
1000 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
1001 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
1002 // The ABI does not require i1, i8 or i16 to be extended.
1003 //
1004 // On Darwin, there is code in the wild relying on Clang's old behaviour of
1005 // always extending i8/i16 return values, so keep doing that for now.
1006 // (PR26665).
1007 ReturnMVT = MVT::i8;
1008 }
1009
1010 EVT MinVT = getRegisterType(Context, ReturnMVT);
1011 return VT.bitsLT(MinVT) ? MinVT : VT;
1012}
1013
1014/// Reads two 32 bit registers and creates a 64 bit mask value.
1015/// \param VA The current 32 bit value that need to be assigned.
1016/// \param NextVA The next 32 bit value that need to be assigned.
1017/// \param Root The parent DAG node.
1018/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1019/// glue purposes. In the case the DAG is already using
1020/// physical register instead of virtual, we should glue
1021/// our new SDValue to InGlue SDvalue.
1022/// \return a new SDvalue of size 64bit.
1024 SDValue &Root, SelectionDAG &DAG,
1025 const SDLoc &DL, const X86Subtarget &Subtarget,
1026 SDValue *InGlue = nullptr) {
1027 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1028 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1029 assert(VA.getValVT() == MVT::v64i1 &&
1030 "Expecting first location of 64 bit width type");
1031 assert(NextVA.getValVT() == VA.getValVT() &&
1032 "The locations should have the same type");
1033 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1034 "The values should reside in two registers");
1035
1036 SDValue Lo, Hi;
1037 SDValue ArgValueLo, ArgValueHi;
1038
1040 const TargetRegisterClass *RC = &X86::GR32RegClass;
1041
1042 // Read a 32 bit value from the registers.
1043 if (nullptr == InGlue) {
1044 // When no physical register is present,
1045 // create an intermediate virtual register.
1046 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1047 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1048 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1049 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1050 } else {
1051 // When a physical register is available read the value from it and glue
1052 // the reads together.
1053 ArgValueLo =
1054 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1055 *InGlue = ArgValueLo.getValue(2);
1056 ArgValueHi =
1057 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1058 *InGlue = ArgValueHi.getValue(2);
1059 }
1060
1061 // Convert the i32 type into v32i1 type.
1062 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1063
1064 // Convert the i32 type into v32i1 type.
1065 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1066
1067 // Concatenate the two values together.
1068 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1069}
1070
1071/// The function will lower a register of various sizes (8/16/32/64)
1072/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1073/// \returns a DAG node contains the operand after lowering to mask type.
1074static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1075 const EVT &ValLoc, const SDLoc &DL,
1076 SelectionDAG &DAG) {
1077 SDValue ValReturned = ValArg;
1078
1079 if (ValVT == MVT::v1i1)
1080 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1081
1082 if (ValVT == MVT::v64i1) {
1083 // In 32 bit machine, this case is handled by getv64i1Argument
1084 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1085 // In 64 bit machine, There is no need to truncate the value only bitcast
1086 } else {
1087 MVT MaskLenVT;
1088 switch (ValVT.getSimpleVT().SimpleTy) {
1089 case MVT::v8i1:
1090 MaskLenVT = MVT::i8;
1091 break;
1092 case MVT::v16i1:
1093 MaskLenVT = MVT::i16;
1094 break;
1095 case MVT::v32i1:
1096 MaskLenVT = MVT::i32;
1097 break;
1098 default:
1099 llvm_unreachable("Expecting a vector of i1 types");
1100 }
1101
1102 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1103 }
1104 return DAG.getBitcast(ValVT, ValReturned);
1105}
1106
1108 const SDLoc &dl, Register Reg, EVT VT,
1109 SDValue Glue) {
1110 SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue);
1111 SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue};
1112 return DAG.getNode(X86ISD::POP_FROM_X87_REG, dl, VTs,
1113 ArrayRef(Ops, Glue.getNode() ? 3 : 2));
1114}
1115
1116/// Lower the result values of a call into the
1117/// appropriate copies out of appropriate physical registers.
1118///
1119SDValue X86TargetLowering::LowerCallResult(
1120 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1121 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1123 uint32_t *RegMask) const {
1124
1125 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1126 // Assign locations to each value returned by this call.
1128 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1129 *DAG.getContext());
1130 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1131
1132 // Copy all of the result registers out of their specified physreg.
1133 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1134 ++I, ++InsIndex) {
1135 CCValAssign &VA = RVLocs[I];
1136 EVT CopyVT = VA.getLocVT();
1137
1138 // In some calling conventions we need to remove the used registers
1139 // from the register mask.
1140 if (RegMask) {
1141 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1142 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1143 }
1144
1145 // Report an error if there was an attempt to return FP values via XMM
1146 // registers.
1147 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1148 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1149 if (VA.getLocReg() == X86::XMM1)
1150 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1151 else
1152 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1153 } else if (!Subtarget.hasSSE2() &&
1154 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1155 CopyVT == MVT::f64) {
1156 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1157 if (VA.getLocReg() == X86::XMM1)
1158 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1159 else
1160 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1161 }
1162
1163 // If we prefer to use the value in xmm registers, copy it out as f80 and
1164 // use a truncate to move it from fp stack reg to xmm reg.
1165 bool RoundAfterCopy = false;
1166 bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1;
1167 if (X87Result && isScalarFPTypeInSSEReg(VA.getValVT())) {
1168 if (!Subtarget.hasX87())
1169 report_fatal_error("X87 register return with X87 disabled");
1170 CopyVT = MVT::f80;
1171 RoundAfterCopy = (CopyVT != VA.getLocVT());
1172 }
1173
1174 SDValue Val;
1175 if (VA.needsCustom()) {
1176 assert(VA.getValVT() == MVT::v64i1 &&
1177 "Currently the only custom case is when we split v64i1 to 2 regs");
1178 Val =
1179 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1180 } else {
1181 Chain =
1182 X87Result
1183 ? getPopFromX87Reg(DAG, Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1184 .getValue(1)
1185 : DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1186 .getValue(1);
1187 Val = Chain.getValue(0);
1188 InGlue = Chain.getValue(2);
1189 }
1190
1191 if (RoundAfterCopy)
1192 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1193 // This truncation won't change the value.
1194 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1195
1196 if (VA.isExtInLoc()) {
1197 if (VA.getValVT().isVector() &&
1198 VA.getValVT().getScalarType() == MVT::i1 &&
1199 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1200 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1201 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1202 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1203 } else
1204 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1205 }
1206
1207 if (VA.getLocInfo() == CCValAssign::BCvt)
1208 Val = DAG.getBitcast(VA.getValVT(), Val);
1209
1210 InVals.push_back(Val);
1211 }
1212
1213 return Chain;
1214}
1215
1216//===----------------------------------------------------------------------===//
1217// C & StdCall & Fast Calling Convention implementation
1218//===----------------------------------------------------------------------===//
1219// StdCall calling convention seems to be standard for many Windows' API
1220// routines and around. It differs from C calling convention just a little:
1221// callee should clean up the stack, not caller. Symbols should be also
1222// decorated in some fancy way :) It doesn't support any vector arguments.
1223// For info on fast calling convention see Fast Calling Convention (tail call)
1224// implementation LowerX86_32FastCCCallTo.
1225
1226/// Determines whether Args, either a set of outgoing arguments to a call, or a
1227/// set of incoming args of a call, contains an sret pointer that the callee
1228/// pops
1229template <typename T>
1230static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1231 const X86Subtarget &Subtarget) {
1232 // Not C++20 (yet), so no concepts available.
1233 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1234 std::is_same_v<T, ISD::InputArg>,
1235 "requires ISD::OutputArg or ISD::InputArg");
1236
1237 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1238 // for most compilations.
1239 if (!Subtarget.is32Bit())
1240 return false;
1241
1242 if (Args.empty())
1243 return false;
1244
1245 // Most calls do not have an sret argument, check the arg next.
1246 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1247 if (!Flags.isSRet() || Flags.isInReg())
1248 return false;
1249
1250 // The MSVCabi does not pop the sret.
1251 if (Subtarget.getTargetTriple().isOSMSVCRT())
1252 return false;
1253
1254 // MCUs don't pop the sret
1255 if (Subtarget.isTargetMCU())
1256 return false;
1257
1258 // Callee pops argument
1259 return true;
1260}
1261
1262/// Make a copy of an aggregate at address specified by "Src" to address
1263/// "Dst" with size and alignment information specified by the specific
1264/// parameter attribute. The copy will be passed as a byval function parameter.
1266 SDValue Chain, ISD::ArgFlagsTy Flags,
1267 SelectionDAG &DAG, const SDLoc &dl) {
1268 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1269
1270 return DAG.getMemcpy(
1271 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1272 /*isVolatile*/ false, /*AlwaysInline=*/true,
1273 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1274}
1275
1276/// Return true if the calling convention is one that we can guarantee TCO for.
1278 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1281}
1282
1283/// Return true if we might ever do TCO for calls with this calling convention.
1285 switch (CC) {
1286 // C calling conventions:
1287 case CallingConv::C:
1288 case CallingConv::Win64:
1291 // Callee pop conventions:
1296 // Swift:
1297 case CallingConv::Swift:
1298 return true;
1299 default:
1300 return canGuaranteeTCO(CC);
1301 }
1302}
1303
1304/// Return true if the function is being made into a tailcall target by
1305/// changing its ABI.
1306static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1307 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1309}
1310
1311bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1312 if (!CI->isTailCall())
1313 return false;
1314
1315 CallingConv::ID CalleeCC = CI->getCallingConv();
1316 if (!mayTailCallThisCC(CalleeCC))
1317 return false;
1318
1319 return true;
1320}
1321
1322SDValue
1323X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1325 const SDLoc &dl, SelectionDAG &DAG,
1326 const CCValAssign &VA,
1327 MachineFrameInfo &MFI, unsigned i) const {
1328 // Create the nodes corresponding to a load from this parameter slot.
1329 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1330 bool AlwaysUseMutable = shouldGuaranteeTCO(
1331 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1332 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1333 EVT ValVT;
1334 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1335
1336 // If value is passed by pointer we have address passed instead of the value
1337 // itself. No need to extend if the mask value and location share the same
1338 // absolute size.
1339 bool ExtendedInMem =
1340 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1342
1343 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1344 ValVT = VA.getLocVT();
1345 else
1346 ValVT = VA.getValVT();
1347
1348 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1349 // changed with more analysis.
1350 // In case of tail call optimization mark all arguments mutable. Since they
1351 // could be overwritten by lowering of arguments in case of a tail call.
1352 if (Flags.isByVal()) {
1353 unsigned Bytes = Flags.getByValSize();
1354 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1355
1356 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1357 // can be improved with deeper analysis.
1358 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1359 /*isAliased=*/true);
1360 return DAG.getFrameIndex(FI, PtrVT);
1361 }
1362
1363 EVT ArgVT = Ins[i].ArgVT;
1364
1365 // If this is a vector that has been split into multiple parts, don't elide
1366 // the copy. The layout on the stack may not match the packed in-memory
1367 // layout.
1368 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1369
1370 // This is an argument in memory. We might be able to perform copy elision.
1371 // If the argument is passed directly in memory without any extension, then we
1372 // can perform copy elision. Large vector types, for example, may be passed
1373 // indirectly by pointer.
1374 if (Flags.isCopyElisionCandidate() &&
1375 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1376 !ScalarizedVector) {
1377 SDValue PartAddr;
1378 if (Ins[i].PartOffset == 0) {
1379 // If this is a one-part value or the first part of a multi-part value,
1380 // create a stack object for the entire argument value type and return a
1381 // load from our portion of it. This assumes that if the first part of an
1382 // argument is in memory, the rest will also be in memory.
1383 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1384 /*IsImmutable=*/false);
1385 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1386 return DAG.getLoad(
1387 ValVT, dl, Chain, PartAddr,
1389 }
1390
1391 // This is not the first piece of an argument in memory. See if there is
1392 // already a fixed stack object including this offset. If so, assume it
1393 // was created by the PartOffset == 0 branch above and create a load from
1394 // the appropriate offset into it.
1395 int64_t PartBegin = VA.getLocMemOffset();
1396 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1397 int FI = MFI.getObjectIndexBegin();
1398 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1399 int64_t ObjBegin = MFI.getObjectOffset(FI);
1400 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1401 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1402 break;
1403 }
1404 if (MFI.isFixedObjectIndex(FI)) {
1405 SDValue Addr =
1406 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1407 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1408 return DAG.getLoad(ValVT, dl, Chain, Addr,
1410 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1411 }
1412 }
1413
1414 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1415 VA.getLocMemOffset(), isImmutable);
1416
1417 // Set SExt or ZExt flag.
1418 if (VA.getLocInfo() == CCValAssign::ZExt) {
1419 MFI.setObjectZExt(FI, true);
1420 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1421 MFI.setObjectSExt(FI, true);
1422 }
1423
1424 MaybeAlign Alignment;
1425 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1426 ValVT != MVT::f80)
1427 Alignment = MaybeAlign(4);
1428 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1429 SDValue Val = DAG.getLoad(
1430 ValVT, dl, Chain, FIN,
1432 Alignment);
1433 return ExtendedInMem
1434 ? (VA.getValVT().isVector()
1435 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1436 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1437 : Val;
1438}
1439
1440// FIXME: Get this from tablegen.
1442 const X86Subtarget &Subtarget) {
1443 assert(Subtarget.is64Bit());
1444
1445 if (Subtarget.isCallingConvWin64(CallConv)) {
1446 static const MCPhysReg GPR64ArgRegsWin64[] = {
1447 X86::RCX, X86::RDX, X86::R8, X86::R9
1448 };
1449 return GPR64ArgRegsWin64;
1450 }
1451
1452 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1453 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1454 };
1455 return GPR64ArgRegs64Bit;
1456}
1457
1458// FIXME: Get this from tablegen.
1460 CallingConv::ID CallConv,
1461 const X86Subtarget &Subtarget) {
1462 assert(Subtarget.is64Bit());
1463 if (Subtarget.isCallingConvWin64(CallConv)) {
1464 // The XMM registers which might contain var arg parameters are shadowed
1465 // in their paired GPR. So we only need to save the GPR to their home
1466 // slots.
1467 // TODO: __vectorcall will change this.
1468 return {};
1469 }
1470
1471 bool isSoftFloat = Subtarget.useSoftFloat();
1472 if (isSoftFloat || !Subtarget.hasSSE1())
1473 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1474 // registers.
1475 return {};
1476
1477 static const MCPhysReg XMMArgRegs64Bit[] = {
1478 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1479 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1480 };
1481 return XMMArgRegs64Bit;
1482}
1483
1484#ifndef NDEBUG
1486 return llvm::is_sorted(
1487 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1488 return A.getValNo() < B.getValNo();
1489 });
1490}
1491#endif
1492
1493namespace {
1494/// This is a helper class for lowering variable arguments parameters.
1495class VarArgsLoweringHelper {
1496public:
1497 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1498 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1499 CallingConv::ID CallConv, CCState &CCInfo)
1500 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1501 TheMachineFunction(DAG.getMachineFunction()),
1502 TheFunction(TheMachineFunction.getFunction()),
1503 FrameInfo(TheMachineFunction.getFrameInfo()),
1504 FrameLowering(*Subtarget.getFrameLowering()),
1505 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1506 CCInfo(CCInfo) {}
1507
1508 // Lower variable arguments parameters.
1509 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1510
1511private:
1512 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1513
1514 void forwardMustTailParameters(SDValue &Chain);
1515
1516 bool is64Bit() const { return Subtarget.is64Bit(); }
1517 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1518
1519 X86MachineFunctionInfo *FuncInfo;
1520 const SDLoc &DL;
1521 SelectionDAG &DAG;
1522 const X86Subtarget &Subtarget;
1523 MachineFunction &TheMachineFunction;
1524 const Function &TheFunction;
1525 MachineFrameInfo &FrameInfo;
1526 const TargetFrameLowering &FrameLowering;
1527 const TargetLowering &TargLowering;
1528 CallingConv::ID CallConv;
1529 CCState &CCInfo;
1530};
1531} // namespace
1532
1533void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1534 SDValue &Chain, unsigned StackSize) {
1535 // If the function takes variable number of arguments, make a frame index for
1536 // the start of the first vararg value... for expansion of llvm.va_start. We
1537 // can skip this if there are no va_start calls.
1538 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1539 CallConv != CallingConv::X86_ThisCall)) {
1540 FuncInfo->setVarArgsFrameIndex(
1541 FrameInfo.CreateFixedObject(1, StackSize, true));
1542 }
1543
1544 // 64-bit calling conventions support varargs and register parameters, so we
1545 // have to do extra work to spill them in the prologue.
1546 if (is64Bit()) {
1547 // Find the first unallocated argument registers.
1548 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1549 ArrayRef<MCPhysReg> ArgXMMs =
1550 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1551 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1552 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1553
1554 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1555 "SSE register cannot be used when SSE is disabled!");
1556
1557 if (isWin64()) {
1558 // Get to the caller-allocated home save location. Add 8 to account
1559 // for the return address.
1560 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1561 FuncInfo->setRegSaveFrameIndex(
1562 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1563 // Fixup to set vararg frame on shadow area (4 x i64).
1564 if (NumIntRegs < 4)
1565 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1566 } else {
1567 // For X86-64, if there are vararg parameters that are passed via
1568 // registers, then we must store them to their spots on the stack so
1569 // they may be loaded by dereferencing the result of va_next.
1570 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1571 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1572 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1573 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1574 }
1575
1577 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1578 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1579 // keeping live input value
1580 SDValue ALVal; // if applicable keeps SDValue for %al register
1581
1582 // Gather all the live in physical registers.
1583 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1584 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1585 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1586 }
1587 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1588 if (!AvailableXmms.empty()) {
1589 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1590 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1591 for (MCPhysReg Reg : AvailableXmms) {
1592 // FastRegisterAllocator spills virtual registers at basic
1593 // block boundary. That leads to usages of xmm registers
1594 // outside of check for %al. Pass physical registers to
1595 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1596 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1597 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1598 }
1599 }
1600
1601 // Store the integer parameter registers.
1603 SDValue RSFIN =
1604 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1605 TargLowering.getPointerTy(DAG.getDataLayout()));
1606 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1607 for (SDValue Val : LiveGPRs) {
1608 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1609 TargLowering.getPointerTy(DAG.getDataLayout()),
1610 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1611 SDValue Store =
1612 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1614 DAG.getMachineFunction(),
1615 FuncInfo->getRegSaveFrameIndex(), Offset));
1616 MemOps.push_back(Store);
1617 Offset += 8;
1618 }
1619
1620 // Now store the XMM (fp + vector) parameter registers.
1621 if (!LiveXMMRegs.empty()) {
1622 SmallVector<SDValue, 12> SaveXMMOps;
1623 SaveXMMOps.push_back(Chain);
1624 SaveXMMOps.push_back(ALVal);
1625 SaveXMMOps.push_back(RSFIN);
1626 SaveXMMOps.push_back(
1627 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1628 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1629 MachineMemOperand *StoreMMO =
1632 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1633 Offset),
1636 DL, DAG.getVTList(MVT::Other),
1637 SaveXMMOps, MVT::i8, StoreMMO));
1638 }
1639
1640 if (!MemOps.empty())
1641 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1642 }
1643}
1644
1645void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1646 // Find the largest legal vector type.
1647 MVT VecVT = MVT::Other;
1648 // FIXME: Only some x86_32 calling conventions support AVX512.
1649 if (Subtarget.useAVX512Regs() &&
1650 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1651 CallConv == CallingConv::Intel_OCL_BI)))
1652 VecVT = MVT::v16f32;
1653 else if (Subtarget.hasAVX())
1654 VecVT = MVT::v8f32;
1655 else if (Subtarget.hasSSE2())
1656 VecVT = MVT::v4f32;
1657
1658 // We forward some GPRs and some vector types.
1659 SmallVector<MVT, 2> RegParmTypes;
1660 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1661 RegParmTypes.push_back(IntVT);
1662 if (VecVT != MVT::Other)
1663 RegParmTypes.push_back(VecVT);
1664
1665 // Compute the set of forwarded registers. The rest are scratch.
1667 FuncInfo->getForwardedMustTailRegParms();
1668 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1669
1670 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1671 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1672 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1673 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1674 }
1675
1676 // Copy all forwards from physical to virtual registers.
1677 for (ForwardedRegister &FR : Forwards) {
1678 // FIXME: Can we use a less constrained schedule?
1679 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1680 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1681 TargLowering.getRegClassFor(FR.VT));
1682 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1683 }
1684}
1685
1686void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1687 unsigned StackSize) {
1688 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1689 // If necessary, it would be set into the correct value later.
1690 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1691 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1692
1693 if (FrameInfo.hasVAStart())
1694 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1695
1696 if (FrameInfo.hasMustTailInVarArgFunc())
1697 forwardMustTailParameters(Chain);
1698}
1699
1700SDValue X86TargetLowering::LowerFormalArguments(
1701 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1702 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1703 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1704 MachineFunction &MF = DAG.getMachineFunction();
1705 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1706
1707 const Function &F = MF.getFunction();
1708 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1709 F.getName() == "main")
1710 FuncInfo->setForceFramePointer(true);
1711
1712 MachineFrameInfo &MFI = MF.getFrameInfo();
1713 bool Is64Bit = Subtarget.is64Bit();
1714 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1715
1716 assert(
1717 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1718 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1719
1720 // Assign locations to all of the incoming arguments.
1722 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1723
1724 // Allocate shadow area for Win64.
1725 if (IsWin64)
1726 CCInfo.AllocateStack(32, Align(8));
1727
1728 CCInfo.AnalyzeArguments(Ins, CC_X86);
1729
1730 // In vectorcall calling convention a second pass is required for the HVA
1731 // types.
1732 if (CallingConv::X86_VectorCall == CallConv) {
1733 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1734 }
1735
1736 // The next loop assumes that the locations are in the same order of the
1737 // input arguments.
1738 assert(isSortedByValueNo(ArgLocs) &&
1739 "Argument Location list must be sorted before lowering");
1740
1741 SDValue ArgValue;
1742 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1743 ++I, ++InsIndex) {
1744 assert(InsIndex < Ins.size() && "Invalid Ins index");
1745 CCValAssign &VA = ArgLocs[I];
1746
1747 if (VA.isRegLoc()) {
1748 EVT RegVT = VA.getLocVT();
1749 if (VA.needsCustom()) {
1750 assert(
1751 VA.getValVT() == MVT::v64i1 &&
1752 "Currently the only custom case is when we split v64i1 to 2 regs");
1753
1754 // v64i1 values, in regcall calling convention, that are
1755 // compiled to 32 bit arch, are split up into two registers.
1756 ArgValue =
1757 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1758 } else {
1759 const TargetRegisterClass *RC;
1760 if (RegVT == MVT::i8)
1761 RC = &X86::GR8RegClass;
1762 else if (RegVT == MVT::i16)
1763 RC = &X86::GR16RegClass;
1764 else if (RegVT == MVT::i32)
1765 RC = &X86::GR32RegClass;
1766 else if (Is64Bit && RegVT == MVT::i64)
1767 RC = &X86::GR64RegClass;
1768 else if (RegVT == MVT::f16)
1769 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1770 else if (RegVT == MVT::f32)
1771 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1772 else if (RegVT == MVT::f64)
1773 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1774 else if (RegVT == MVT::f80)
1775 RC = &X86::RFP80RegClass;
1776 else if (RegVT == MVT::f128)
1777 RC = &X86::VR128RegClass;
1778 else if (RegVT.is512BitVector())
1779 RC = &X86::VR512RegClass;
1780 else if (RegVT.is256BitVector())
1781 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1782 else if (RegVT.is128BitVector())
1783 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1784 else if (RegVT == MVT::x86mmx)
1785 RC = &X86::VR64RegClass;
1786 else if (RegVT == MVT::v1i1)
1787 RC = &X86::VK1RegClass;
1788 else if (RegVT == MVT::v8i1)
1789 RC = &X86::VK8RegClass;
1790 else if (RegVT == MVT::v16i1)
1791 RC = &X86::VK16RegClass;
1792 else if (RegVT == MVT::v32i1)
1793 RC = &X86::VK32RegClass;
1794 else if (RegVT == MVT::v64i1)
1795 RC = &X86::VK64RegClass;
1796 else
1797 llvm_unreachable("Unknown argument type!");
1798
1799 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1800 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1801 }
1802
1803 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1804 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1805 // right size.
1806 if (VA.getLocInfo() == CCValAssign::SExt)
1807 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1808 DAG.getValueType(VA.getValVT()));
1809 else if (VA.getLocInfo() == CCValAssign::ZExt)
1810 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1811 DAG.getValueType(VA.getValVT()));
1812 else if (VA.getLocInfo() == CCValAssign::BCvt)
1813 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1814
1815 if (VA.isExtInLoc()) {
1816 // Handle MMX values passed in XMM regs.
1817 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1818 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1819 else if (VA.getValVT().isVector() &&
1820 VA.getValVT().getScalarType() == MVT::i1 &&
1821 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1822 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1823 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1824 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1825 } else
1826 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1827 }
1828 } else {
1829 assert(VA.isMemLoc());
1830 ArgValue =
1831 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1832 }
1833
1834 // If value is passed via pointer - do a load.
1835 if (VA.getLocInfo() == CCValAssign::Indirect &&
1836 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1837 ArgValue =
1838 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1839 }
1840
1841 InVals.push_back(ArgValue);
1842 }
1843
1844 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1845 if (Ins[I].Flags.isSwiftAsync()) {
1846 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1847 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1848 X86FI->setHasSwiftAsyncContext(true);
1849 else {
1850 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1851 int FI =
1852 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1853 X86FI->setSwiftAsyncContextFrameIdx(FI);
1854 SDValue St = DAG.getStore(
1855 DAG.getEntryNode(), dl, InVals[I],
1856 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1858 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1859 }
1860 }
1861
1862 // Swift calling convention does not require we copy the sret argument
1863 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1864 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1865 continue;
1866
1867 // All x86 ABIs require that for returning structs by value we copy the
1868 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1869 // the argument into a virtual register so that we can access it from the
1870 // return points.
1871 if (Ins[I].Flags.isSRet()) {
1872 assert(!FuncInfo->getSRetReturnReg() &&
1873 "SRet return has already been set");
1874 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1875 Register Reg =
1877 FuncInfo->setSRetReturnReg(Reg);
1878 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1879 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1880 break;
1881 }
1882 }
1883
1884 unsigned StackSize = CCInfo.getStackSize();
1885 // Align stack specially for tail calls.
1886 if (shouldGuaranteeTCO(CallConv,
1888 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1889
1890 if (IsVarArg)
1891 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1892 .lowerVarArgsParameters(Chain, StackSize);
1893
1894 // Some CCs need callee pop.
1895 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1897 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1898 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1899 // X86 interrupts must pop the error code (and the alignment padding) if
1900 // present.
1901 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1902 } else {
1903 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1904 // If this is an sret function, the return should pop the hidden pointer.
1905 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1906 FuncInfo->setBytesToPopOnReturn(4);
1907 }
1908
1909 if (!Is64Bit) {
1910 // RegSaveFrameIndex is X86-64 only.
1911 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1912 }
1913
1914 FuncInfo->setArgumentStackSize(StackSize);
1915
1916 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1917 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1918 if (Personality == EHPersonality::CoreCLR) {
1919 assert(Is64Bit);
1920 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1921 // that we'd prefer this slot be allocated towards the bottom of the frame
1922 // (i.e. near the stack pointer after allocating the frame). Every
1923 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1924 // offset from the bottom of this and each funclet's frame must be the
1925 // same, so the size of funclets' (mostly empty) frames is dictated by
1926 // how far this slot is from the bottom (since they allocate just enough
1927 // space to accommodate holding this slot at the correct offset).
1928 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1929 EHInfo->PSPSymFrameIdx = PSPSymFI;
1930 }
1931 }
1932
1933 if (shouldDisableArgRegFromCSR(CallConv) ||
1934 F.hasFnAttribute("no_caller_saved_registers")) {
1935 MachineRegisterInfo &MRI = MF.getRegInfo();
1936 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1937 MRI.disableCalleeSavedRegister(Pair.first);
1938 }
1939
1940 if (CallingConv::PreserveNone == CallConv)
1941 for (const ISD::InputArg &In : Ins) {
1942 if (In.Flags.isSwiftSelf() || In.Flags.isSwiftAsync() ||
1943 In.Flags.isSwiftError()) {
1944 errorUnsupported(DAG, dl,
1945 "Swift attributes can't be used with preserve_none");
1946 break;
1947 }
1948 }
1949
1950 return Chain;
1951}
1952
1953SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1954 SDValue Arg, const SDLoc &dl,
1955 SelectionDAG &DAG,
1956 const CCValAssign &VA,
1957 ISD::ArgFlagsTy Flags,
1958 bool isByVal) const {
1959 unsigned LocMemOffset = VA.getLocMemOffset();
1960 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1961 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1962 StackPtr, PtrOff);
1963 if (isByVal)
1964 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1965
1966 MaybeAlign Alignment;
1967 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1968 Arg.getSimpleValueType() != MVT::f80)
1969 Alignment = MaybeAlign(4);
1970 return DAG.getStore(
1971 Chain, dl, Arg, PtrOff,
1973 Alignment);
1974}
1975
1976/// Emit a load of return address if tail call
1977/// optimization is performed and it is required.
1978SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1979 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1980 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1981 // Adjust the Return address stack slot.
1982 EVT VT = getPointerTy(DAG.getDataLayout());
1983 OutRetAddr = getReturnAddressFrameIndex(DAG);
1984
1985 // Load the "old" Return address.
1986 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1987 return SDValue(OutRetAddr.getNode(), 1);
1988}
1989
1990/// Emit a store of the return address if tail call
1991/// optimization is performed and it is required (FPDiff!=0).
1993 SDValue Chain, SDValue RetAddrFrIdx,
1994 EVT PtrVT, unsigned SlotSize,
1995 int FPDiff, const SDLoc &dl) {
1996 // Store the return address to the appropriate stack slot.
1997 if (!FPDiff) return Chain;
1998 // Calculate the new stack slot for the return address.
1999 int NewReturnAddrFI =
2000 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
2001 false);
2002 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
2003 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
2005 DAG.getMachineFunction(), NewReturnAddrFI));
2006 return Chain;
2007}
2008
2009/// Returns a vector_shuffle mask for an movs{s|d}, movd
2010/// operation of specified width.
2011SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
2012 SDValue V1, SDValue V2) const {
2013 unsigned NumElems = VT.getVectorNumElements();
2014 SmallVector<int, 8> Mask;
2015 Mask.push_back(NumElems);
2016 for (unsigned i = 1; i != NumElems; ++i)
2017 Mask.push_back(i);
2018 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
2019}
2020
2021SDValue
2022X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2023 SmallVectorImpl<SDValue> &InVals) const {
2024 SelectionDAG &DAG = CLI.DAG;
2025 SDLoc &dl = CLI.DL;
2026 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2027 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2028 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2029 SDValue Chain = CLI.Chain;
2030 SDValue Callee = CLI.Callee;
2031 CallingConv::ID CallConv = CLI.CallConv;
2032 bool &isTailCall = CLI.IsTailCall;
2033 bool isVarArg = CLI.IsVarArg;
2034 const auto *CB = CLI.CB;
2035
2036 MachineFunction &MF = DAG.getMachineFunction();
2037 bool Is64Bit = Subtarget.is64Bit();
2038 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2039 bool IsSibcall = false;
2040 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2041 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2042 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2043 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2044 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2045 CB->hasFnAttr("no_caller_saved_registers"));
2046 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2047 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2048 const Module *M = MF.getFunction().getParent();
2049
2050 // If the indirect call target has the nocf_check attribute, the call needs
2051 // the NOTRACK prefix. For simplicity just disable tail calls as there are
2052 // so many variants.
2053 bool IsNoTrackIndirectCall = IsIndirectCall && CB->doesNoCfCheck() &&
2054 M->getModuleFlag("cf-protection-branch");
2055 if (IsNoTrackIndirectCall)
2056 isTailCall = false;
2057
2058 MachineFunction::CallSiteInfo CSInfo;
2059 if (CallConv == CallingConv::X86_INTR)
2060 report_fatal_error("X86 interrupts may not be called directly");
2061
2062 // Set type id for call site info.
2063 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
2064 CSInfo = MachineFunction::CallSiteInfo(*CB);
2065
2066 if (IsIndirectCall && !IsWin64 &&
2067 M->getModuleFlag("import-call-optimization"))
2068 errorUnsupported(DAG, dl,
2069 "Indirect calls must have a normal calling convention if "
2070 "Import Call Optimization is enabled");
2071
2072 // Analyze operands of the call, assigning locations to each operand.
2074 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2075
2076 // Allocate shadow area for Win64.
2077 if (IsWin64)
2078 CCInfo.AllocateStack(32, Align(8));
2079
2080 CCInfo.AnalyzeArguments(Outs, CC_X86);
2081
2082 // In vectorcall calling convention a second pass is required for the HVA
2083 // types.
2084 if (CallingConv::X86_VectorCall == CallConv) {
2085 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2086 }
2087
2088 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2089 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2090 // If we are using a GOT, disable tail calls to external symbols with
2091 // default visibility. Tail calling such a symbol requires using a GOT
2092 // relocation, which forces early binding of the symbol. This breaks code
2093 // that require lazy function symbol resolution. Using musttail or
2094 // GuaranteedTailCallOpt will override this.
2095 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2096 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2097 G->getGlobal()->hasDefaultVisibility()))
2098 isTailCall = false;
2099 }
2100
2101 if (isTailCall && !IsMustTail) {
2102 // Check if it's really possible to do a tail call.
2103 isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2104 IsCalleePopSRet);
2105
2106 // Sibcalls are automatically detected tailcalls which do not require
2107 // ABI changes.
2108 if (!IsGuaranteeTCO && isTailCall)
2109 IsSibcall = true;
2110
2111 if (isTailCall)
2112 ++NumTailCalls;
2113 }
2114
2115 if (IsMustTail && !isTailCall)
2116 report_fatal_error("failed to perform tail call elimination on a call "
2117 "site marked musttail");
2118
2119 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2120 "Var args not supported with calling convention fastcc, ghc or hipe");
2121
2122 // Get a count of how many bytes are to be pushed on the stack.
2123 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2124 if (IsSibcall)
2125 // This is a sibcall. The memory operands are available in caller's
2126 // own caller's stack.
2127 NumBytes = 0;
2128 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2129 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2130
2131 int FPDiff = 0;
2132 if (isTailCall &&
2133 shouldGuaranteeTCO(CallConv,
2135 // Lower arguments at fp - stackoffset + fpdiff.
2136 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2137
2138 FPDiff = NumBytesCallerPushed - NumBytes;
2139
2140 // Set the delta of movement of the returnaddr stackslot.
2141 // But only set if delta is greater than previous delta.
2142 if (FPDiff < X86Info->getTCReturnAddrDelta())
2143 X86Info->setTCReturnAddrDelta(FPDiff);
2144 }
2145
2146 unsigned NumBytesToPush = NumBytes;
2147 unsigned NumBytesToPop = NumBytes;
2148
2149 // If we have an inalloca argument, all stack space has already been allocated
2150 // for us and be right at the top of the stack. We don't support multiple
2151 // arguments passed in memory when using inalloca.
2152 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2153 NumBytesToPush = 0;
2154 if (!ArgLocs.back().isMemLoc())
2155 report_fatal_error("cannot use inalloca attribute on a register "
2156 "parameter");
2157 if (ArgLocs.back().getLocMemOffset() != 0)
2158 report_fatal_error("any parameter with the inalloca attribute must be "
2159 "the only memory argument");
2160 } else if (CLI.IsPreallocated) {
2161 assert(ArgLocs.back().isMemLoc() &&
2162 "cannot use preallocated attribute on a register "
2163 "parameter");
2164 SmallVector<size_t, 4> PreallocatedOffsets;
2165 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2166 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2167 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2168 }
2169 }
2170 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2171 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2172 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2173 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2174 NumBytesToPush = 0;
2175 }
2176
2177 if (!IsSibcall && !IsMustTail)
2178 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2179 NumBytes - NumBytesToPush, dl);
2180
2181 SDValue RetAddrFrIdx;
2182 // Load return address for tail calls.
2183 if (isTailCall && FPDiff)
2184 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2185 Is64Bit, FPDiff, dl);
2186
2188 SmallVector<SDValue, 8> MemOpChains;
2190
2191 // The next loop assumes that the locations are in the same order of the
2192 // input arguments.
2193 assert(isSortedByValueNo(ArgLocs) &&
2194 "Argument Location list must be sorted before lowering");
2195
2196 // Walk the register/memloc assignments, inserting copies/loads. In the case
2197 // of tail call optimization arguments are handle later.
2198 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2199 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2200 ++I, ++OutIndex) {
2201 assert(OutIndex < Outs.size() && "Invalid Out index");
2202 // Skip inalloca/preallocated arguments, they have already been written.
2203 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2204 if (Flags.isInAlloca() || Flags.isPreallocated())
2205 continue;
2206
2207 CCValAssign &VA = ArgLocs[I];
2208 EVT RegVT = VA.getLocVT();
2209 SDValue Arg = OutVals[OutIndex];
2210 bool isByVal = Flags.isByVal();
2211
2212 // Promote the value if needed.
2213 switch (VA.getLocInfo()) {
2214 default: llvm_unreachable("Unknown loc info!");
2215 case CCValAssign::Full: break;
2216 case CCValAssign::SExt:
2217 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2218 break;
2219 case CCValAssign::ZExt:
2220 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2221 break;
2222 case CCValAssign::AExt:
2223 if (Arg.getValueType().isVector() &&
2224 Arg.getValueType().getVectorElementType() == MVT::i1)
2225 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2226 else if (RegVT.is128BitVector()) {
2227 // Special case: passing MMX values in XMM registers.
2228 Arg = DAG.getBitcast(MVT::i64, Arg);
2229 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2230 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2231 } else
2232 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2233 break;
2234 case CCValAssign::BCvt:
2235 Arg = DAG.getBitcast(RegVT, Arg);
2236 break;
2237 case CCValAssign::Indirect: {
2238 if (isByVal) {
2239 // Memcpy the argument to a temporary stack slot to prevent
2240 // the caller from seeing any modifications the callee may make
2241 // as guaranteed by the `byval` attribute.
2242 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2243 Flags.getByValSize(),
2244 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2245 SDValue StackSlot =
2246 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2247 Chain =
2248 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2249 // From now on treat this as a regular pointer
2250 Arg = StackSlot;
2251 isByVal = false;
2252 } else {
2253 // Store the argument.
2254 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2255 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2256 Chain = DAG.getStore(
2257 Chain, dl, Arg, SpillSlot,
2259 Arg = SpillSlot;
2260 }
2261 break;
2262 }
2263 }
2264
2265 if (VA.needsCustom()) {
2266 assert(VA.getValVT() == MVT::v64i1 &&
2267 "Currently the only custom case is when we split v64i1 to 2 regs");
2268 // Split v64i1 value into two registers
2269 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2270 } else if (VA.isRegLoc()) {
2271 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2272 const TargetOptions &Options = DAG.getTarget().Options;
2273 if (Options.EmitCallSiteInfo)
2274 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2275 if (isVarArg && IsWin64) {
2276 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2277 // shadow reg if callee is a varargs function.
2278 Register ShadowReg;
2279 switch (VA.getLocReg()) {
2280 case X86::XMM0: ShadowReg = X86::RCX; break;
2281 case X86::XMM1: ShadowReg = X86::RDX; break;
2282 case X86::XMM2: ShadowReg = X86::R8; break;
2283 case X86::XMM3: ShadowReg = X86::R9; break;
2284 }
2285 if (ShadowReg)
2286 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2287 }
2288 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2289 assert(VA.isMemLoc());
2290 if (!StackPtr.getNode())
2291 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2293 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2294 dl, DAG, VA, Flags, isByVal));
2295 }
2296 }
2297
2298 if (!MemOpChains.empty())
2299 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2300
2301 if (Subtarget.isPICStyleGOT()) {
2302 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2303 // GOT pointer (except regcall).
2304 if (!isTailCall) {
2305 // Indirect call with RegCall calling convertion may use up all the
2306 // general registers, so it is not suitable to bind EBX reister for
2307 // GOT address, just let register allocator handle it.
2308 if (CallConv != CallingConv::X86_RegCall)
2309 RegsToPass.push_back(std::make_pair(
2310 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2311 getPointerTy(DAG.getDataLayout()))));
2312 } else {
2313 // If we are tail calling and generating PIC/GOT style code load the
2314 // address of the callee into ECX. The value in ecx is used as target of
2315 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2316 // for tail calls on PIC/GOT architectures. Normally we would just put the
2317 // address of GOT into ebx and then call target@PLT. But for tail calls
2318 // ebx would be restored (since ebx is callee saved) before jumping to the
2319 // target@PLT.
2320
2321 // Note: The actual moving to ECX is done further down.
2322 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2323 if (G && !G->getGlobal()->hasLocalLinkage() &&
2324 G->getGlobal()->hasDefaultVisibility())
2325 Callee = LowerGlobalAddress(Callee, DAG);
2326 else if (isa<ExternalSymbolSDNode>(Callee))
2327 Callee = LowerExternalSymbol(Callee, DAG);
2328 }
2329 }
2330
2331 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2332 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2333 // From AMD64 ABI document:
2334 // For calls that may call functions that use varargs or stdargs
2335 // (prototype-less calls or calls to functions containing ellipsis (...) in
2336 // the declaration) %al is used as hidden argument to specify the number
2337 // of SSE registers used. The contents of %al do not need to match exactly
2338 // the number of registers, but must be an ubound on the number of SSE
2339 // registers used and is in the range 0 - 8 inclusive.
2340
2341 // Count the number of XMM registers allocated.
2342 static const MCPhysReg XMMArgRegs[] = {
2343 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2344 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2345 };
2346 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2347 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2348 && "SSE registers cannot be used when SSE is disabled");
2349 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2350 DAG.getConstant(NumXMMRegs, dl,
2351 MVT::i8)));
2352 }
2353
2354 if (isVarArg && IsMustTail) {
2355 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2356 for (const auto &F : Forwards) {
2357 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2358 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2359 }
2360 }
2361
2362 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2363 // don't need this because the eligibility check rejects calls that require
2364 // shuffling arguments passed in memory.
2365 if (!IsSibcall && isTailCall) {
2366 // Force all the incoming stack arguments to be loaded from the stack
2367 // before any new outgoing arguments or the return address are stored to the
2368 // stack, because the outgoing stack slots may alias the incoming argument
2369 // stack slots, and the alias isn't otherwise explicit. This is slightly
2370 // more conservative than necessary, because it means that each store
2371 // effectively depends on every argument instead of just those arguments it
2372 // would clobber.
2373 Chain = DAG.getStackArgumentTokenFactor(Chain);
2374
2375 SmallVector<SDValue, 8> MemOpChains2;
2376 SDValue FIN;
2377 int FI = 0;
2378 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2379 ++I, ++OutsIndex) {
2380 CCValAssign &VA = ArgLocs[I];
2381
2382 if (VA.isRegLoc()) {
2383 if (VA.needsCustom()) {
2384 assert((CallConv == CallingConv::X86_RegCall) &&
2385 "Expecting custom case only in regcall calling convention");
2386 // This means that we are in special case where one argument was
2387 // passed through two register locations - Skip the next location
2388 ++I;
2389 }
2390
2391 continue;
2392 }
2393
2394 assert(VA.isMemLoc());
2395 SDValue Arg = OutVals[OutsIndex];
2396 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2397 // Skip inalloca/preallocated arguments. They don't require any work.
2398 if (Flags.isInAlloca() || Flags.isPreallocated())
2399 continue;
2400 // Create frame index.
2401 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2402 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2403 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2404 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2405
2406 if (Flags.isByVal()) {
2407 // Copy relative to framepointer.
2409 if (!StackPtr.getNode())
2410 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2413 StackPtr, Source);
2414
2415 MemOpChains2.push_back(
2416 CreateCopyOfByValArgument(Source, FIN, Chain, Flags, DAG, dl));
2417 } else {
2418 // Store relative to framepointer.
2419 MemOpChains2.push_back(DAG.getStore(
2420 Chain, dl, Arg, FIN,
2422 }
2423 }
2424
2425 if (!MemOpChains2.empty())
2426 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2427
2428 // Store the return address to the appropriate stack slot.
2429 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2431 RegInfo->getSlotSize(), FPDiff, dl);
2432 }
2433
2434 // Build a sequence of copy-to-reg nodes chained together with token chain
2435 // and glue operands which copy the outgoing args into registers.
2436 SDValue InGlue;
2437 for (const auto &[Reg, N] : RegsToPass) {
2438 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
2439 InGlue = Chain.getValue(1);
2440 }
2441
2442 bool IsImpCall = false;
2443 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2444 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2445 // In the 64-bit large code model, we have to make all calls
2446 // through a register, since the call instruction's 32-bit
2447 // pc-relative offset may not be large enough to hold the whole
2448 // address.
2449 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2450 Callee->getOpcode() == ISD::ExternalSymbol) {
2451 // Lower direct calls to global addresses and external symbols. Setting
2452 // ForCall to true here has the effect of removing WrapperRIP when possible
2453 // to allow direct calls to be selected without first materializing the
2454 // address into a register.
2455 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
2456 } else if (Subtarget.isTarget64BitILP32() &&
2457 Callee.getValueType() == MVT::i32) {
2458 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2459 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2460 }
2461
2463
2464 if (!IsSibcall && isTailCall && !IsMustTail) {
2465 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2466 InGlue = Chain.getValue(1);
2467 }
2468
2469 Ops.push_back(Chain);
2470 Ops.push_back(Callee);
2471
2472 if (isTailCall)
2473 Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2474
2475 // Add argument registers to the end of the list so that they are known live
2476 // into the call.
2477 for (const auto &[Reg, N] : RegsToPass)
2478 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2479
2480 // Add a register mask operand representing the call-preserved registers.
2481 const uint32_t *Mask = [&]() {
2482 auto AdaptedCC = CallConv;
2483 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2484 // use X86_INTR calling convention because it has the same CSR mask
2485 // (same preserved registers).
2486 if (HasNCSR)
2488 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2489 // to use the CSR_NoRegs_RegMask.
2490 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2491 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2492 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2493 }();
2494 assert(Mask && "Missing call preserved mask for calling convention");
2495
2496 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2497 X86Info->setFPClobberedByCall(true);
2498 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2499 X86Info->setFPClobberedByInvoke(true);
2500 }
2501 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2502 X86Info->setBPClobberedByCall(true);
2503 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2504 X86Info->setBPClobberedByInvoke(true);
2505 }
2506
2507 // If this is an invoke in a 32-bit function using a funclet-based
2508 // personality, assume the function clobbers all registers. If an exception
2509 // is thrown, the runtime will not restore CSRs.
2510 // FIXME: Model this more precisely so that we can register allocate across
2511 // the normal edge and spill and fill across the exceptional edge.
2512 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2513 const Function &CallerFn = MF.getFunction();
2514 EHPersonality Pers =
2515 CallerFn.hasPersonalityFn()
2518 if (isFuncletEHPersonality(Pers))
2519 Mask = RegInfo->getNoPreservedMask();
2520 }
2521
2522 // Define a new register mask from the existing mask.
2523 uint32_t *RegMask = nullptr;
2524
2525 // In some calling conventions we need to remove the used physical registers
2526 // from the reg mask. Create a new RegMask for such calling conventions.
2527 // RegMask for calling conventions that disable only return registers (e.g.
2528 // preserve_most) will be modified later in LowerCallResult.
2529 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2530 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2531 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2532
2533 // Allocate a new Reg Mask and copy Mask.
2534 RegMask = MF.allocateRegMask();
2535 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2536 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2537
2538 // Make sure all sub registers of the argument registers are reset
2539 // in the RegMask.
2540 if (ShouldDisableArgRegs) {
2541 for (auto const &RegPair : RegsToPass)
2542 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2543 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2544 }
2545
2546 // Create the RegMask Operand according to our updated mask.
2547 Ops.push_back(DAG.getRegisterMask(RegMask));
2548 } else {
2549 // Create the RegMask Operand according to the static mask.
2550 Ops.push_back(DAG.getRegisterMask(Mask));
2551 }
2552
2553 if (InGlue.getNode())
2554 Ops.push_back(InGlue);
2555
2556 if (isTailCall) {
2557 // We used to do:
2558 //// If this is the first return lowered for this function, add the regs
2559 //// to the liveout set for the function.
2560 // This isn't right, although it's probably harmless on x86; liveouts
2561 // should be computed from returns not tail calls. Consider a void
2562 // function making a tail call to a function returning int.
2564 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops);
2565
2566 if (IsCFICall)
2567 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2568
2569 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2570 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2571 return Ret;
2572 }
2573
2574 // Returns a chain & a glue for retval copy to use.
2575 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2576 if (IsImpCall) {
2577 Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2578 } else if (IsNoTrackIndirectCall) {
2579 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2580 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2581 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2582 // expanded to the call, directly followed by a special marker sequence and
2583 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2584 assert(!isTailCall &&
2585 "tail calls cannot be marked with clang.arc.attachedcall");
2586 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2587
2588 // Add a target global address for the retainRV/claimRV runtime function
2589 // just before the call target.
2591 auto PtrVT = getPointerTy(DAG.getDataLayout());
2592 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2593 Ops.insert(Ops.begin() + 1, GA);
2594 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2595 } else {
2596 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2597 }
2598
2599 if (IsCFICall)
2600 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2601
2602 InGlue = Chain.getValue(1);
2603 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2604 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2605
2606 // Save heapallocsite metadata.
2607 if (CLI.CB)
2608 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2609 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2610
2611 // Create the CALLSEQ_END node.
2612 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2613 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2615 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2616 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2617 // If this call passes a struct-return pointer, the callee
2618 // pops that struct pointer.
2619 NumBytesForCalleeToPop = 4;
2620
2621 // Returns a glue for retval copy to use.
2622 if (!IsSibcall) {
2623 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2624 InGlue, dl);
2625 InGlue = Chain.getValue(1);
2626 }
2627
2628 if (CallingConv::PreserveNone == CallConv)
2629 for (const ISD::OutputArg &Out : Outs) {
2630 if (Out.Flags.isSwiftSelf() || Out.Flags.isSwiftAsync() ||
2631 Out.Flags.isSwiftError()) {
2632 errorUnsupported(DAG, dl,
2633 "Swift attributes can't be used with preserve_none");
2634 break;
2635 }
2636 }
2637
2638 // Handle result values, copying them out of physregs into vregs that we
2639 // return.
2640 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2641 InVals, RegMask);
2642}
2643
2644//===----------------------------------------------------------------------===//
2645// Fast Calling Convention (tail call) implementation
2646//===----------------------------------------------------------------------===//
2647
2648// Like std call, callee cleans arguments, convention except that ECX is
2649// reserved for storing the tail called function address. Only 2 registers are
2650// free for argument passing (inreg). Tail call optimization is performed
2651// provided:
2652// * tailcallopt is enabled
2653// * caller/callee are fastcc
2654// On X86_64 architecture with GOT-style position independent code only local
2655// (within module) calls are supported at the moment.
2656// To keep the stack aligned according to platform abi the function
2657// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2658// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2659// If a tail called function callee has more arguments than the caller the
2660// caller needs to make sure that there is room to move the RETADDR to. This is
2661// achieved by reserving an area the size of the argument delta right after the
2662// original RETADDR, but before the saved framepointer or the spilled registers
2663// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2664// stack layout:
2665// arg1
2666// arg2
2667// RETADDR
2668// [ new RETADDR
2669// move area ]
2670// (possible EBP)
2671// ESI
2672// EDI
2673// local1 ..
2674
2675/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2676/// requirement.
2677unsigned
2678X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2679 SelectionDAG &DAG) const {
2680 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2681 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2682 assert(StackSize % SlotSize == 0 &&
2683 "StackSize must be a multiple of SlotSize");
2684 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2685}
2686
2687/// Return true if the given stack call argument is already available in the
2688/// same position (relatively) of the caller's incoming argument stack.
2689static
2692 const X86InstrInfo *TII, const CCValAssign &VA) {
2693 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2694
2695 for (;;) {
2696 // Look through nodes that don't alter the bits of the incoming value.
2697 unsigned Op = Arg.getOpcode();
2698 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2699 Op == ISD::AssertZext) {
2700 Arg = Arg.getOperand(0);
2701 continue;
2702 }
2703 if (Op == ISD::TRUNCATE) {
2704 const SDValue &TruncInput = Arg.getOperand(0);
2705 if (TruncInput.getOpcode() == ISD::AssertZext &&
2706 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2707 Arg.getValueType()) {
2708 Arg = TruncInput.getOperand(0);
2709 continue;
2710 }
2711 }
2712 break;
2713 }
2714
2715 int FI = INT_MAX;
2716 if (Arg.getOpcode() == ISD::CopyFromReg) {
2717 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2718 if (!VR.isVirtual())
2719 return false;
2720 MachineInstr *Def = MRI->getVRegDef(VR);
2721 if (!Def)
2722 return false;
2723 if (!Flags.isByVal()) {
2724 if (!TII->isLoadFromStackSlot(*Def, FI))
2725 return false;
2726 } else {
2727 unsigned Opcode = Def->getOpcode();
2728 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2729 Opcode == X86::LEA64_32r) &&
2730 Def->getOperand(1).isFI()) {
2731 FI = Def->getOperand(1).getIndex();
2732 Bytes = Flags.getByValSize();
2733 } else
2734 return false;
2735 }
2736 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2737 if (Flags.isByVal())
2738 // ByVal argument is passed in as a pointer but it's now being
2739 // dereferenced. e.g.
2740 // define @foo(%struct.X* %A) {
2741 // tail call @bar(%struct.X* byval %A)
2742 // }
2743 return false;
2744 SDValue Ptr = Ld->getBasePtr();
2746 if (!FINode)
2747 return false;
2748 FI = FINode->getIndex();
2749 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2751 FI = FINode->getIndex();
2752 Bytes = Flags.getByValSize();
2753 } else
2754 return false;
2755
2756 assert(FI != INT_MAX);
2757 if (!MFI.isFixedObjectIndex(FI))
2758 return false;
2759
2760 if (Offset != MFI.getObjectOffset(FI))
2761 return false;
2762
2763 // If this is not byval, check that the argument stack object is immutable.
2764 // inalloca and argument copy elision can create mutable argument stack
2765 // objects. Byval objects can be mutated, but a byval call intends to pass the
2766 // mutated memory.
2767 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2768 return false;
2769
2770 if (VA.getLocVT().getFixedSizeInBits() >
2772 // If the argument location is wider than the argument type, check that any
2773 // extension flags match.
2774 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2775 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2776 return false;
2777 }
2778 }
2779
2780 return Bytes == MFI.getObjectSize(FI);
2781}
2782
2783static bool
2785 Register CallerSRetReg) {
2786 const auto &Outs = CLI.Outs;
2787 const auto &OutVals = CLI.OutVals;
2788
2789 // We know the caller has a sret pointer argument (CallerSRetReg). Locate the
2790 // operand index within the callee that may have a sret pointer too.
2791 unsigned Pos = 0;
2792 for (unsigned E = Outs.size(); Pos != E; ++Pos)
2793 if (Outs[Pos].Flags.isSRet())
2794 break;
2795 // Bail out if the callee has not any sret argument.
2796 if (Pos == Outs.size())
2797 return false;
2798
2799 // At this point, either the caller is forwarding its sret argument to the
2800 // callee, or the callee is being passed a different sret pointer. We now look
2801 // for a CopyToReg, where the callee sret argument is written into a new vreg
2802 // (which should later be %rax/%eax, if this is returned).
2803 SDValue SRetArgVal = OutVals[Pos];
2804 for (SDNode *User : SRetArgVal->users()) {
2805 if (User->getOpcode() != ISD::CopyToReg)
2806 continue;
2808 if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
2809 return true;
2810 }
2811
2812 return false;
2813}
2814
2815/// Check whether the call is eligible for tail call optimization. Targets
2816/// that want to do tail call optimization should implement this function.
2817/// Note that the x86 backend does not check musttail calls for eligibility! The
2818/// rest of x86 tail call lowering must be prepared to forward arguments of any
2819/// type.
2820bool X86TargetLowering::IsEligibleForTailCallOptimization(
2822 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2823 SelectionDAG &DAG = CLI.DAG;
2824 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2825 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2826 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2827 SDValue Callee = CLI.Callee;
2828 CallingConv::ID CalleeCC = CLI.CallConv;
2829 bool isVarArg = CLI.IsVarArg;
2830
2831 if (!mayTailCallThisCC(CalleeCC))
2832 return false;
2833
2834 // If -tailcallopt is specified, make fastcc functions tail-callable.
2835 MachineFunction &MF = DAG.getMachineFunction();
2836 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2837 const Function &CallerF = MF.getFunction();
2838
2839 // If the function return type is x86_fp80 and the callee return type is not,
2840 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2841 // perform a tailcall optimization here.
2842 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2843 return false;
2844
2845 CallingConv::ID CallerCC = CallerF.getCallingConv();
2846 bool CCMatch = CallerCC == CalleeCC;
2847 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2848 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2849 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2850 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2851
2852 // Win64 functions have extra shadow space for argument homing. Don't do the
2853 // sibcall if the caller and callee have mismatched expectations for this
2854 // space.
2855 if (IsCalleeWin64 != IsCallerWin64)
2856 return false;
2857
2858 if (IsGuaranteeTCO) {
2859 if (canGuaranteeTCO(CalleeCC) && CCMatch)
2860 return true;
2861 return false;
2862 }
2863
2864 // Look for obvious safe cases to perform tail call optimization that do not
2865 // require ABI changes. This is what gcc calls sibcall.
2866
2867 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2868 // emit a special epilogue.
2869 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2870 if (RegInfo->hasStackRealignment(MF))
2871 return false;
2872
2873 // Avoid sibcall optimization if we are an sret return function and the callee
2874 // is incompatible, unless such premises are proven wrong. See comment in
2875 // LowerReturn about why hasStructRetAttr is insufficient.
2876 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
2877 // For a compatible tail call the callee must return our sret pointer. So it
2878 // needs to be (a) an sret function itself and (b) we pass our sret as its
2879 // sret. Condition #b is harder to determine.
2880 if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
2881 return false;
2882 } else if (IsCalleePopSRet)
2883 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2884 // expect that.
2885 return false;
2886
2887 // Do not sibcall optimize vararg calls unless all arguments are passed via
2888 // registers.
2889 LLVMContext &C = *DAG.getContext();
2890 if (isVarArg && !Outs.empty()) {
2891 // Optimizing for varargs on Win64 is unlikely to be safe without
2892 // additional testing.
2893 if (IsCalleeWin64 || IsCallerWin64)
2894 return false;
2895
2896 for (const auto &VA : ArgLocs)
2897 if (!VA.isRegLoc())
2898 return false;
2899 }
2900
2901 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2902 // stack. Therefore, if it's not used by the call it is not safe to optimize
2903 // this into a sibcall.
2904 bool Unused = false;
2905 for (const auto &In : Ins) {
2906 if (!In.Used) {
2907 Unused = true;
2908 break;
2909 }
2910 }
2911 if (Unused) {
2913 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2914 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2915 for (const auto &VA : RVLocs) {
2916 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2917 return false;
2918 }
2919 }
2920
2921 // Check that the call results are passed in the same way.
2922 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2924 return false;
2925 // The callee has to preserve all registers the caller needs to preserve.
2926 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2927 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2928 if (!CCMatch) {
2929 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2930 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2931 return false;
2932 }
2933
2934 // The stack frame of the caller cannot be replaced by the tail-callee one's
2935 // if the function is required to preserve all the registers. Conservatively
2936 // prevent tail optimization even if hypothetically all the registers are used
2937 // for passing formal parameters or returning values.
2938 if (CallerF.hasFnAttribute("no_caller_saved_registers"))
2939 return false;
2940
2941 unsigned StackArgsSize = CCInfo.getStackSize();
2942
2943 // If the callee takes no arguments then go on to check the results of the
2944 // call.
2945 if (!Outs.empty()) {
2946 if (StackArgsSize > 0) {
2947 // Check if the arguments are already laid out in the right way as
2948 // the caller's fixed stack objects.
2949 MachineFrameInfo &MFI = MF.getFrameInfo();
2950 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2951 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2952 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2953 const CCValAssign &VA = ArgLocs[I];
2954 SDValue Arg = OutVals[I];
2955 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2957 return false;
2958 if (!VA.isRegLoc()) {
2959 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2960 TII, VA))
2961 return false;
2962 }
2963 }
2964 }
2965
2966 bool PositionIndependent = isPositionIndependent();
2967 // If the tailcall address may be in a register, then make sure it's
2968 // possible to register allocate for it. In 32-bit, the call address can
2969 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2970 // callee-saved registers are restored. These happen to be the same
2971 // registers used to pass 'inreg' arguments so watch out for those.
2972 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2973 !isa<ExternalSymbolSDNode>(Callee)) ||
2974 PositionIndependent)) {
2975 unsigned NumInRegs = 0;
2976 // In PIC we need an extra register to formulate the address computation
2977 // for the callee.
2978 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2979
2980 for (const auto &VA : ArgLocs) {
2981 if (!VA.isRegLoc())
2982 continue;
2983 Register Reg = VA.getLocReg();
2984 switch (Reg) {
2985 default: break;
2986 case X86::EAX: case X86::EDX: case X86::ECX:
2987 if (++NumInRegs == MaxInRegs)
2988 return false;
2989 break;
2990 }
2991 }
2992 }
2993
2994 const MachineRegisterInfo &MRI = MF.getRegInfo();
2995 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2996 return false;
2997 }
2998
2999 bool CalleeWillPop =
3000 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
3002
3003 if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
3004 // If we have bytes to pop, the callee must pop them.
3005 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
3006 if (!CalleePopMatches)
3007 return false;
3008 } else if (CalleeWillPop && StackArgsSize > 0) {
3009 // If we don't have bytes to pop, make sure the callee doesn't pop any.
3010 return false;
3011 }
3012
3013 return true;
3014}
3015
3016/// Determines whether the callee is required to pop its own arguments.
3017/// Callee pop is necessary to support tail calls.
3019 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
3020 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
3021 // can guarantee TCO.
3022 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
3023 return true;
3024
3025 switch (CallingConv) {
3026 default:
3027 return false;
3032 return !is64Bit;
3033 }
3034}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
return SDValue()
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const M68kInstrInfo *TII, const CCValAssign &VA)
Return true if the given stack call argument is already available in the same position (relatively) o...
#define F(x, y, z)
Definition MD5.cpp:55
#define I(x, y, z)
Definition MD5.cpp:58
#define G(x, y, z)
Definition MD5.cpp:56
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:480
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain, const SDLoc &dl, Register Reg, EVT VT, SDValue Glue)
static bool mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI, Register CallerSRetReg)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition ArrayRef.h:147
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:191
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
CCState - This class holds information needed while lowering arguments and return values.
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
void convertToReg(MCRegister Reg)
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:163
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:63
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:903
Constant * getPersonalityFn() const
Get the personality function associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:727
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
LLVMContext & getContext() const
Definition IRBuilder.h:203
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
void setCFIType(uint32_t Type)
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Class to represent struct types.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual ArrayRef< MCPhysReg > getRoundingControlRegisters() const
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isAndroid() const
Tests whether the target is Android.
Definition Triple.h:826
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition Triple.h:723
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition Triple.h:754
bool isOSFuchsia() const
Definition Triple.h:643
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:298
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:159
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:297
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:281
Value * getOperand(unsigned i) const
Definition User.h:232
LLVM Value Representation.
Definition Value.h:75
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
const uint32_t * getNoPreservedMask() const override
bool hasSSE1() const
bool isTargetMCU() const
const Triple & getTargetTriple() const
bool useAVX512Regs() const
bool isCallingConvWin64(CallingConv::ID CC) const
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
bool useSoftFloat() const override
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:201
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:259
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:841
@ GlobalAddress
Definition ISDOpcodes.h:88
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:577
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:832
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:662
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:225
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:569
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:219
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:838
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:947
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:844
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ RET_GLUE
Return with a glue operand.
@ IRET
Return from interrupt. Operand 0 is the number of bytes to pop.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ TC_RETURN
Tail call return.
@ NT_CALL
Same as call except it adds the NoTrack prefix.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
@ POP_FROM_X87_REG
The same as ISD::CopyFromReg except that this node makes it explicit that it may lower to an x87 FPU ...
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:477
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2136
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1920
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr Align Constant()
Allow constructions of constexpr Align.
Definition Alignment.h:88
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition ValueTypes.h:217
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
Type * RetTy
Same as OrigRetTy, or partially legalized for soft float libcalls.