LLVM 23.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "X86.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86ISelLowering.h"
19#include "X86InstrBuilder.h"
21#include "X86TargetMachine.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Module.h"
31
32#define DEBUG_TYPE "x86-isel"
33
34using namespace llvm;
35
36STATISTIC(NumTailCalls, "Number of tail calls");
37
38/// Call this when the user attempts to do something unsupported, like
39/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
40/// report_fatal_error, so calling code should attempt to recover without
41/// crashing.
42static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
43 const char *Msg) {
45 DAG.getContext()->diagnose(
47}
48
49/// Returns true if a CC can dynamically exclude a register from the list of
50/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
51/// the return registers.
53 switch (CC) {
54 default:
55 return false;
59 return true;
60 }
61}
62
63/// Returns true if a CC can dynamically exclude a register from the list of
64/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
65/// the parameters.
69
70static std::pair<MVT, unsigned>
72 const X86Subtarget &Subtarget) {
73 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
74 // convention is one that uses k registers.
75 if (NumElts == 2)
76 return {MVT::v2i64, 1};
77 if (NumElts == 4)
78 return {MVT::v4i32, 1};
79 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
81 return {MVT::v8i16, 1};
82 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
84 return {MVT::v16i8, 1};
85 // v32i1 passes in ymm unless we have BWI and the calling convention is
86 // regcall.
87 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
88 return {MVT::v32i8, 1};
89 // Split v64i1 vectors if we don't have v64i8 available.
90 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
91 if (Subtarget.useAVX512Regs())
92 return {MVT::v64i8, 1};
93 return {MVT::v32i8, 2};
94 }
95
96 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
97 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
98 NumElts > 64)
99 return {MVT::i8, NumElts};
100
102}
103
106 EVT VT) const {
107 if (VT.isVector()) {
108 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
109 unsigned NumElts = VT.getVectorNumElements();
110
111 MVT RegisterVT;
112 unsigned NumRegisters;
113 std::tie(RegisterVT, NumRegisters) =
114 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
115 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
116 return RegisterVT;
117 }
118
119 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
120 return MVT::v8f16;
121 }
122
123 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
124 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
125 !Subtarget.hasX87())
126 return MVT::i32;
127
128 if (isTypeLegal(MVT::f16)) {
129 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
131 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
132
133 if (VT == MVT::bf16)
134 return MVT::f16;
135 }
136
137 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
138}
139
142 EVT VT) const {
143 if (VT.isVector()) {
144 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
145 unsigned NumElts = VT.getVectorNumElements();
146
147 MVT RegisterVT;
148 unsigned NumRegisters;
149 std::tie(RegisterVT, NumRegisters) =
150 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
151 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
152 return NumRegisters;
153 }
154
155 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
156 return 1;
157 }
158
159 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
160 // x87 is disabled.
161 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
162 if (VT == MVT::f64)
163 return 2;
164 if (VT == MVT::f80)
165 return 3;
166 }
167
168 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
169 isTypeLegal(MVT::f16))
171 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
172
173 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
174}
175
177 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
178 unsigned &NumIntermediates, MVT &RegisterVT) const {
179 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
180 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
181 Subtarget.hasAVX512() &&
183 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
184 VT.getVectorNumElements() > 64)) {
185 RegisterVT = MVT::i8;
186 IntermediateVT = MVT::i1;
187 NumIntermediates = VT.getVectorNumElements();
188 return NumIntermediates;
189 }
190
191 // Split v64i1 vectors if we don't have v64i8 available.
192 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
194 RegisterVT = MVT::v32i8;
195 IntermediateVT = MVT::v32i1;
196 NumIntermediates = 2;
197 return 2;
198 }
199
200 // Split vNbf16 vectors according to vNf16.
201 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
202 isTypeLegal(MVT::f16))
203 VT = VT.changeVectorElementType(Context, MVT::f16);
204
205 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
206 NumIntermediates, RegisterVT);
207}
208
210 LLVMContext& Context,
211 EVT VT) const {
212 if (!VT.isVector())
213 return MVT::i8;
214
215 if (Subtarget.hasAVX512()) {
216 // Figure out what this type will be legalized to.
217 EVT LegalVT = VT;
218 while (getTypeAction(Context, LegalVT) != TypeLegal)
219 LegalVT = getTypeToTransformTo(Context, LegalVT);
220
221 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
222 if (LegalVT.getSimpleVT().is512BitVector())
223 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
224
225 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
226 // If we legalized to less than a 512-bit vector, then we will use a vXi1
227 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
228 // vXi16/vXi8.
229 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
230 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
231 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
232 }
233 }
234
236}
237
239 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
240 const DataLayout &DL) const {
241 // On x86-64 i128 is split into two i64s and needs to be allocated to two
242 // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
243 // is split to four i32s and never actually passed in registers, but we use
244 // the consecutive register mark to match it in TableGen.
245 if (Ty->isIntegerTy(128))
246 return true;
247
248 // On x86-32, fp128 acts the same as i128.
249 if (Subtarget.is32Bit() && Ty->isFP128Ty())
250 return true;
251
252 return false;
253}
254
255/// Helper for getByValTypeAlignment to determine
256/// the desired ByVal argument alignment.
257static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
258 if (MaxAlign == 16)
259 return;
260 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
261 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
262 MaxAlign = Align(16);
263 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
264 Align EltAlign;
265 getMaxByValAlign(ATy->getElementType(), EltAlign);
266 if (EltAlign > MaxAlign)
267 MaxAlign = EltAlign;
268 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
269 for (auto *EltTy : STy->elements()) {
270 Align EltAlign;
271 getMaxByValAlign(EltTy, EltAlign);
272 if (EltAlign > MaxAlign)
273 MaxAlign = EltAlign;
274 if (MaxAlign == 16)
275 break;
276 }
277 }
278}
279
280/// Return the desired alignment for ByVal aggregate
281/// function arguments in the caller parameter area. For X86, aggregates
282/// that contain SSE vectors are placed at 16-byte boundaries while the rest
283/// are at 4-byte boundaries.
285 const DataLayout &DL) const {
286 if (Subtarget.is64Bit())
287 return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
288
289 Align Alignment(4);
290 if (Subtarget.hasSSE1())
291 getMaxByValAlign(Ty, Alignment);
292 return Alignment;
293}
294
295/// It returns EVT::Other if the type should be determined using generic
296/// target-independent logic.
297/// For vector ops we check that the overall size isn't larger than our
298/// preferred vector width.
300 LLVMContext &Context, const MemOp &Op,
301 const AttributeList &FuncAttributes) const {
302 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
303 if (Op.size() >= 16 &&
304 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
305 // FIXME: Check if unaligned 64-byte accesses are slow.
306 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
307 (Subtarget.getPreferVectorWidth() >= 512)) {
308 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
309 }
310 // FIXME: Check if unaligned 32-byte accesses are slow.
311 if (Op.size() >= 32 && Subtarget.hasAVX() &&
312 Subtarget.useLight256BitInstructions()) {
313 // Although this isn't a well-supported type for AVX1, we'll let
314 // legalization and shuffle lowering produce the optimal codegen. If we
315 // choose an optimal type with a vector element larger than a byte,
316 // getMemsetStores() may create an intermediate splat (using an integer
317 // multiply) before we splat as a vector.
318 return MVT::v32i8;
319 }
320 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
321 return MVT::v16i8;
322 // TODO: Can SSE1 handle a byte vector?
323 // If we have SSE1 registers we should be able to use them.
324 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
325 (Subtarget.getPreferVectorWidth() >= 128))
326 return MVT::v4f32;
327 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
328 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
329 // Do not use f64 to lower memcpy if source is string constant. It's
330 // better to use i32 to avoid the loads.
331 // Also, do not use f64 to lower memset unless this is a memset of zeros.
332 // The gymnastics of splatting a byte value into an XMM register and then
333 // only using 8-byte stores (because this is a CPU with slow unaligned
334 // 16-byte accesses) makes that a loser.
335 return MVT::f64;
336 }
337 }
338 // This is a compromise. If we reach here, unaligned accesses may be slow on
339 // this target. However, creating smaller, aligned accesses could be even
340 // slower and would certainly be a lot more code.
341 if (Subtarget.is64Bit() && Op.size() >= 8)
342 return MVT::i64;
343 return MVT::i32;
344}
345
347 if (VT == MVT::f32)
348 return Subtarget.hasSSE1();
349 if (VT == MVT::f64)
350 return Subtarget.hasSSE2();
351 return true;
352}
353
354static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
355 return (8 * Alignment.value()) % SizeInBits == 0;
356}
357
359 if (isBitAligned(Alignment, VT.getSizeInBits()))
360 return true;
361 switch (VT.getSizeInBits()) {
362 default:
363 // 8-byte and under are always assumed to be fast.
364 return true;
365 case 128:
366 return !Subtarget.isUnalignedMem16Slow();
367 case 256:
368 return !Subtarget.isUnalignedMem32Slow();
369 // TODO: What about AVX-512 (512-bit) accesses?
370 }
371}
372
374 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
375 unsigned *Fast) const {
376 if (Fast)
377 *Fast = isMemoryAccessFast(VT, Alignment);
378 // NonTemporal vector memory ops must be aligned.
379 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
380 // NT loads can only be vector aligned, so if its less aligned than the
381 // minimum vector size (which we can split the vector down to), we might as
382 // well use a regular unaligned vector load.
383 // We don't have any NT loads pre-SSE41.
384 if (!!(Flags & MachineMemOperand::MOLoad))
385 return (Alignment < 16 || !Subtarget.hasSSE41());
386 return false;
387 }
388 // Misaligned accesses of any size are always allowed.
389 return true;
390}
391
393 const DataLayout &DL, EVT VT,
394 unsigned AddrSpace, Align Alignment,
396 unsigned *Fast) const {
397 if (Fast)
398 *Fast = isMemoryAccessFast(VT, Alignment);
399 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
400 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
401 /*Fast=*/nullptr))
402 return true;
403 // NonTemporal vector memory ops are special, and must be aligned.
404 if (!isBitAligned(Alignment, VT.getSizeInBits()))
405 return false;
406 switch (VT.getSizeInBits()) {
407 case 128:
408 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
409 return true;
410 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
411 return true;
412 return false;
413 case 256:
414 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
415 return true;
416 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
417 return true;
418 return false;
419 case 512:
420 if (Subtarget.hasAVX512())
421 return true;
422 return false;
423 default:
424 return false; // Don't have NonTemporal vector memory ops of this size.
425 }
426 }
427 return true;
428}
429
430/// Return the entry encoding for a jump table in the
431/// current function. The returned value is a member of the
432/// MachineJumpTableInfo::JTEntryKind enum.
434 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
435 // symbol.
436 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
438 if (isPositionIndependent() &&
440 !Subtarget.isTargetCOFF())
442
443 // Otherwise, use the normal jump table encoding heuristics.
445}
446
448 return Subtarget.useSoftFloat();
449}
450
452 ArgListTy &Args) const {
453
454 // Only relabel X86-32 for C / Stdcall CCs.
455 if (Subtarget.is64Bit())
456 return;
457 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
458 return;
459 unsigned ParamRegs = 0;
460 if (auto *M = MF->getFunction().getParent())
461 ParamRegs = M->getNumberRegisterParameters();
462
463 // Mark the first N int arguments as having reg
464 for (auto &Arg : Args) {
465 Type *T = Arg.Ty;
466 if (T->isIntOrPtrTy())
467 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
468 unsigned numRegs = 1;
469 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
470 numRegs = 2;
471 if (ParamRegs < numRegs)
472 return;
473 ParamRegs -= numRegs;
474 Arg.IsInReg = true;
475 }
476 }
477}
478
479const MCExpr *
481 const MachineBasicBlock *MBB,
482 unsigned uid,MCContext &Ctx) const{
483 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
484 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
485 // entries.
486 return MCSymbolRefExpr::create(MBB->getSymbol(), X86::S_GOTOFF, Ctx);
487}
488
489/// Returns relocation base for the given PIC jumptable.
491 SelectionDAG &DAG) const {
492 if (!Subtarget.is64Bit())
493 // This doesn't have SDLoc associated with it, but is not really the
494 // same as a Register.
495 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
497 return Table;
498}
499
500/// This returns the relocation base for the given PIC jumptable,
501/// the same as getPICJumpTableRelocBase, but as an MCExpr.
503getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
504 MCContext &Ctx) const {
505 // X86-64 uses RIP relative addressing based on the jump table label.
506 if (Subtarget.isPICStyleRIPRel() ||
507 (Subtarget.is64Bit() &&
510
511 // Otherwise, the reference is relative to the PIC base.
512 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
513}
514
515std::pair<const TargetRegisterClass *, uint8_t>
517 MVT VT) const {
518 const TargetRegisterClass *RRC = nullptr;
519 uint8_t Cost = 1;
520 switch (VT.SimpleTy) {
521 default:
523 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
524 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
525 break;
526 case MVT::x86mmx:
527 RRC = &X86::VR64RegClass;
528 break;
529 case MVT::f32: case MVT::f64:
530 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
531 case MVT::v4f32: case MVT::v2f64:
532 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
533 case MVT::v8f32: case MVT::v4f64:
534 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
535 case MVT::v16f32: case MVT::v8f64:
536 RRC = &X86::VR128XRegClass;
537 break;
538 }
539 return std::make_pair(RRC, Cost);
540}
541
542unsigned X86TargetLowering::getAddressSpace() const {
543 if (Subtarget.is64Bit())
545 : X86AS::FS;
546 return X86AS::GS;
547}
548
549static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
550 return TargetTriple.isOSGlibc() || TargetTriple.isMusl() ||
551 TargetTriple.isOSFuchsia() || TargetTriple.isAndroid();
552}
553
560
562 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
563 // tcbhead_t; use it instead of the usual global variable (see
564 // sysdeps/{i386,x86_64}/nptl/tls.h)
565 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
566 unsigned AddressSpace = getAddressSpace();
567
568 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
569 if (Subtarget.isTargetFuchsia())
570 return SegmentOffset(IRB, 0x10, AddressSpace);
571
572 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
573 // Specially, some users may customize the base reg and offset.
574 int Offset = M->getStackProtectorGuardOffset();
575 // If we don't set -stack-protector-guard-offset value:
576 // %fs:0x28, unless we're using a Kernel code model, in which case
577 // it's %gs:0x28. gs:0x14 on i386.
578 if (Offset == INT_MAX)
579 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
580
581 StringRef GuardReg = M->getStackProtectorGuardReg();
582 if (GuardReg == "fs")
584 else if (GuardReg == "gs")
586
587 // Use symbol guard if user specify.
588 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
589 if (!GuardSymb.empty()) {
590 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
591 if (!GV) {
592 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
593 : Type::getInt32Ty(M->getContext());
594 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
595 nullptr, GuardSymb, nullptr,
597 if (!Subtarget.isTargetDarwin())
598 GV->setDSOLocal(M->getDirectAccessExternalData());
599 }
600 return GV;
601 }
602
603 return SegmentOffset(IRB, Offset, AddressSpace);
604 }
606}
607
609 // MSVC CRT provides functionalities for stack protection.
610 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
611 getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
612
613 RTLIB::LibcallImpl SecurityCookieVar =
614 getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
615 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
616 SecurityCookieVar != RTLIB::Unsupported) {
617 // MSVC CRT provides functionalities for stack protection.
618 // MSVC CRT has a global variable holding security cookie.
619 M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
620 PointerType::getUnqual(M.getContext()));
621
622 // MSVC CRT has a function to validate security cookie.
623 FunctionCallee SecurityCheckCookie =
624 M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
625 Type::getVoidTy(M.getContext()),
626 PointerType::getUnqual(M.getContext()));
627
628 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
629 F->setCallingConv(CallingConv::X86_FastCall);
630 F->addParamAttr(0, Attribute::AttrKind::InReg);
631 }
632 return;
633 }
634
635 StringRef GuardMode = M.getStackProtectorGuard();
636
637 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
638 if ((GuardMode == "tls" || GuardMode.empty()) &&
639 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
640 return;
642}
643
644Value *
646 // Android provides a fixed TLS slot for the SafeStack pointer. See the
647 // definition of TLS_SLOT_SAFESTACK in
648 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
649 if (Subtarget.isTargetAndroid()) {
650 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
651 // %gs:0x24 on i386
652 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
653 return SegmentOffset(IRB, Offset, getAddressSpace());
654 }
655
656 // Fuchsia is similar.
657 if (Subtarget.isTargetFuchsia()) {
658 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
659 return SegmentOffset(IRB, 0x18, getAddressSpace());
660 }
661
663}
664
665//===----------------------------------------------------------------------===//
666// Return Value Calling Convention Implementation
667//===----------------------------------------------------------------------===//
668
669bool X86TargetLowering::CanLowerReturn(
670 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
671 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
672 const Type *RetTy) const {
674 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
675 return CCInfo.CheckReturn(Outs, RetCC_X86);
676}
677
678const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
679 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
680 return ScratchRegs;
681}
682
684 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
685 return RCRegs;
686}
687
688/// Lowers masks values (v*i1) to the local register values
689/// \returns DAG node after lowering to register type
690static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
691 const SDLoc &DL, SelectionDAG &DAG) {
692 EVT ValVT = ValArg.getValueType();
693
694 if (ValVT == MVT::v1i1)
695 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
696 DAG.getIntPtrConstant(0, DL));
697
698 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
699 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
700 // Two stage lowering might be required
701 // bitcast: v8i1 -> i8 / v16i1 -> i16
702 // anyextend: i8 -> i32 / i16 -> i32
703 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
704 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
705 if (ValLoc == MVT::i32)
706 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
707 return ValToCopy;
708 }
709
710 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
711 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
712 // One stage lowering is required
713 // bitcast: v32i1 -> i32 / v64i1 -> i64
714 return DAG.getBitcast(ValLoc, ValArg);
715 }
716
717 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
718}
719
720/// Breaks v64i1 value into two registers and adds the new node to the DAG
722 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
723 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
724 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
725 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
726 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
727 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
728 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
729 "The value should reside in two registers");
730
731 // Before splitting the value we cast it to i64
732 Arg = DAG.getBitcast(MVT::i64, Arg);
733
734 // Splitting the value into two i32 types
735 SDValue Lo, Hi;
736 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
737
738 // Attach the two i32 types into corresponding registers
739 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
740 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
741}
742
744X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
745 bool isVarArg,
747 const SmallVectorImpl<SDValue> &OutVals,
748 const SDLoc &dl, SelectionDAG &DAG) const {
749 MachineFunction &MF = DAG.getMachineFunction();
750 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
751
752 // In some cases we need to disable registers from the default CSR list.
753 // For example, when they are used as return registers (preserve_* and X86's
754 // regcall) or for argument passing (X86's regcall).
755 bool ShouldDisableCalleeSavedRegister =
756 shouldDisableRetRegFromCSR(CallConv) ||
757 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
758
759 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
760 report_fatal_error("X86 interrupts may not return any value");
761
763 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
764 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
765
767 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
768 ++I, ++OutsIndex) {
769 CCValAssign &VA = RVLocs[I];
770 assert(VA.isRegLoc() && "Can only return in registers!");
771
772 // Add the register to the CalleeSaveDisableRegs list.
773 if (ShouldDisableCalleeSavedRegister)
775
776 SDValue ValToCopy = OutVals[OutsIndex];
777 EVT ValVT = ValToCopy.getValueType();
778
779 // Promote values to the appropriate types.
780 if (VA.getLocInfo() == CCValAssign::SExt)
781 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
782 else if (VA.getLocInfo() == CCValAssign::ZExt)
783 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
784 else if (VA.getLocInfo() == CCValAssign::AExt) {
785 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
786 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
787 else
788 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
789 }
790 else if (VA.getLocInfo() == CCValAssign::BCvt)
791 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
792
794 "Unexpected FP-extend for return value.");
795
796 // Report an error if we have attempted to return a value via an XMM
797 // register and SSE was disabled.
798 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
799 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
800 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
801 } else if (!Subtarget.hasSSE2() &&
802 X86::FR64XRegClass.contains(VA.getLocReg()) &&
803 ValVT == MVT::f64) {
804 // When returning a double via an XMM register, report an error if SSE2 is
805 // not enabled.
806 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
807 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
808 }
809
810 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
811 // the RET instruction and handled by the FP Stackifier.
812 if (VA.getLocReg() == X86::FP0 ||
813 VA.getLocReg() == X86::FP1) {
814 // If this is a copy from an xmm register to ST(0), use an FPExtend to
815 // change the value to the FP stack register class.
817 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
818 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
819 // Don't emit a copytoreg.
820 continue;
821 }
822
823 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
824 // which is returned in RAX / RDX.
825 if (Subtarget.is64Bit()) {
826 if (ValVT == MVT::x86mmx) {
827 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
828 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
829 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
830 ValToCopy);
831 // If we don't have SSE2 available, convert to v4f32 so the generated
832 // register is legal.
833 if (!Subtarget.hasSSE2())
834 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
835 }
836 }
837 }
838
839 if (VA.needsCustom()) {
840 assert(VA.getValVT() == MVT::v64i1 &&
841 "Currently the only custom case is when we split v64i1 to 2 regs");
842
843 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
844 Subtarget);
845
846 // Add the second register to the CalleeSaveDisableRegs list.
847 if (ShouldDisableCalleeSavedRegister)
848 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
849 } else {
850 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
851 }
852 }
853
854 SDValue Glue;
856 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
857 // Operand #1 = Bytes To Pop
858 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
859 MVT::i32));
860
861 // Copy the result values into the output registers.
862 for (auto &RetVal : RetVals) {
863 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
864 RetOps.push_back(RetVal.second);
865 continue; // Don't emit a copytoreg.
866 }
867
868 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
869 Glue = Chain.getValue(1);
870 RetOps.push_back(
871 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
872 }
873
874 // Swift calling convention does not require we copy the sret argument
875 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
876
877 // All x86 ABIs require that for returning structs by value we copy
878 // the sret argument into %rax/%eax (depending on ABI) for the return.
879 // We saved the argument into a virtual register in the entry block,
880 // so now we copy the value out and into %rax/%eax.
881 //
882 // Checking Function.hasStructRetAttr() here is insufficient because the IR
883 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
884 // false, then an sret argument may be implicitly inserted in the SelDAG. In
885 // either case FuncInfo->setSRetReturnReg() will have been called.
886 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
887 // When we have both sret and another return value, we should use the
888 // original Chain stored in RetOps[0], instead of the current Chain updated
889 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
890
891 // For the case of sret and another return value, we have
892 // Chain_0 at the function entry
893 // Chain_1 = getCopyToReg(Chain_0) in the above loop
894 // If we use Chain_1 in getCopyFromReg, we will have
895 // Val = getCopyFromReg(Chain_1)
896 // Chain_2 = getCopyToReg(Chain_1, Val) from below
897
898 // getCopyToReg(Chain_0) will be glued together with
899 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
900 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
901 // Data dependency from Unit B to Unit A due to usage of Val in
902 // getCopyToReg(Chain_1, Val)
903 // Chain dependency from Unit A to Unit B
904
905 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
906 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
908
909 Register RetValReg
910 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
911 X86::RAX : X86::EAX;
912 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
913 Glue = Chain.getValue(1);
914
915 // RAX/EAX now acts like a return value.
916 RetOps.push_back(
917 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
918
919 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
920 // this however for preserve_most/preserve_all to minimize the number of
921 // callee-saved registers for these CCs.
922 if (ShouldDisableCalleeSavedRegister &&
923 CallConv != CallingConv::PreserveAll &&
924 CallConv != CallingConv::PreserveMost)
926 }
927
928 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
929 const MCPhysReg *I =
930 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
931 if (I) {
932 for (; *I; ++I) {
933 if (X86::GR64RegClass.contains(*I))
934 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
935 else
936 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
937 }
938 }
939
940 RetOps[0] = Chain; // Update chain.
941
942 // Add the glue if we have it.
943 if (Glue.getNode())
944 RetOps.push_back(Glue);
945
947 if (CallConv == CallingConv::X86_INTR)
948 opcode = X86ISD::IRET;
949 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
950}
951
952bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
953 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
954 return false;
955
956 SDValue TCChain = Chain;
957 SDNode *Copy = *N->user_begin();
958 if (Copy->getOpcode() == ISD::CopyToReg) {
959 // If the copy has a glue operand, we conservatively assume it isn't safe to
960 // perform a tail call.
961 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
962 return false;
963 TCChain = Copy->getOperand(0);
964 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
965 return false;
966
967 bool HasRet = false;
968 for (const SDNode *U : Copy->users()) {
969 if (U->getOpcode() != X86ISD::RET_GLUE)
970 return false;
971 // If we are returning more than one value, we can definitely
972 // not make a tail call see PR19530
973 if (U->getNumOperands() > 4)
974 return false;
975 if (U->getNumOperands() == 4 &&
976 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
977 return false;
978 HasRet = true;
979 }
980
981 if (!HasRet)
982 return false;
983
984 Chain = TCChain;
985 return true;
986}
987
988EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
989 ISD::NodeType ExtendKind) const {
990 MVT ReturnMVT = MVT::i32;
991
992 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
993 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
994 // The ABI does not require i1, i8 or i16 to be extended.
995 //
996 // On Darwin, there is code in the wild relying on Clang's old behaviour of
997 // always extending i8/i16 return values, so keep doing that for now.
998 // (PR26665).
999 ReturnMVT = MVT::i8;
1000 }
1001
1002 EVT MinVT = getRegisterType(Context, ReturnMVT);
1003 return VT.bitsLT(MinVT) ? MinVT : VT;
1004}
1005
1006/// Reads two 32 bit registers and creates a 64 bit mask value.
1007/// \param VA The current 32 bit value that need to be assigned.
1008/// \param NextVA The next 32 bit value that need to be assigned.
1009/// \param Root The parent DAG node.
1010/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1011/// glue purposes. In the case the DAG is already using
1012/// physical register instead of virtual, we should glue
1013/// our new SDValue to InGlue SDvalue.
1014/// \return a new SDvalue of size 64bit.
1016 SDValue &Root, SelectionDAG &DAG,
1017 const SDLoc &DL, const X86Subtarget &Subtarget,
1018 SDValue *InGlue = nullptr) {
1019 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1020 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1021 assert(VA.getValVT() == MVT::v64i1 &&
1022 "Expecting first location of 64 bit width type");
1023 assert(NextVA.getValVT() == VA.getValVT() &&
1024 "The locations should have the same type");
1025 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1026 "The values should reside in two registers");
1027
1028 SDValue Lo, Hi;
1029 SDValue ArgValueLo, ArgValueHi;
1030
1032 const TargetRegisterClass *RC = &X86::GR32RegClass;
1033
1034 // Read a 32 bit value from the registers.
1035 if (nullptr == InGlue) {
1036 // When no physical register is present,
1037 // create an intermediate virtual register.
1038 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1039 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1040 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1041 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1042 } else {
1043 // When a physical register is available read the value from it and glue
1044 // the reads together.
1045 ArgValueLo =
1046 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1047 *InGlue = ArgValueLo.getValue(2);
1048 ArgValueHi =
1049 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1050 *InGlue = ArgValueHi.getValue(2);
1051 }
1052
1053 // Convert the i32 type into v32i1 type.
1054 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1055
1056 // Convert the i32 type into v32i1 type.
1057 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1058
1059 // Concatenate the two values together.
1060 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1061}
1062
1063/// The function will lower a register of various sizes (8/16/32/64)
1064/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1065/// \returns a DAG node contains the operand after lowering to mask type.
1066static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1067 const EVT &ValLoc, const SDLoc &DL,
1068 SelectionDAG &DAG) {
1069 SDValue ValReturned = ValArg;
1070
1071 if (ValVT == MVT::v1i1)
1072 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1073
1074 if (ValVT == MVT::v64i1) {
1075 // In 32 bit machine, this case is handled by getv64i1Argument
1076 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1077 // In 64 bit machine, There is no need to truncate the value only bitcast
1078 } else {
1079 MVT MaskLenVT;
1080 switch (ValVT.getSimpleVT().SimpleTy) {
1081 case MVT::v8i1:
1082 MaskLenVT = MVT::i8;
1083 break;
1084 case MVT::v16i1:
1085 MaskLenVT = MVT::i16;
1086 break;
1087 case MVT::v32i1:
1088 MaskLenVT = MVT::i32;
1089 break;
1090 default:
1091 llvm_unreachable("Expecting a vector of i1 types");
1092 }
1093
1094 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1095 }
1096 return DAG.getBitcast(ValVT, ValReturned);
1097}
1098
1100 const SDLoc &dl, Register Reg, EVT VT,
1101 SDValue Glue) {
1102 SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue);
1103 SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue};
1104 return DAG.getNode(X86ISD::POP_FROM_X87_REG, dl, VTs,
1105 ArrayRef(Ops, Glue.getNode() ? 3 : 2));
1106}
1107
1108/// Lower the result values of a call into the
1109/// appropriate copies out of appropriate physical registers.
1110///
1111SDValue X86TargetLowering::LowerCallResult(
1112 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1113 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1115 uint32_t *RegMask) const {
1116
1117 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1118 // Assign locations to each value returned by this call.
1120 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1121 *DAG.getContext());
1122 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1123
1124 // Copy all of the result registers out of their specified physreg.
1125 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1126 ++I, ++InsIndex) {
1127 CCValAssign &VA = RVLocs[I];
1128 EVT CopyVT = VA.getLocVT();
1129
1130 // In some calling conventions we need to remove the used registers
1131 // from the register mask.
1132 if (RegMask) {
1133 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1134 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1135 }
1136
1137 // Report an error if there was an attempt to return FP values via XMM
1138 // registers.
1139 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1140 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1141 if (VA.getLocReg() == X86::XMM1)
1142 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1143 else
1144 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1145 } else if (!Subtarget.hasSSE2() &&
1146 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1147 CopyVT == MVT::f64) {
1148 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1149 if (VA.getLocReg() == X86::XMM1)
1150 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1151 else
1152 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1153 }
1154
1155 // If we prefer to use the value in xmm registers, copy it out as f80 and
1156 // use a truncate to move it from fp stack reg to xmm reg.
1157 bool RoundAfterCopy = false;
1158 bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1;
1159 if (X87Result && isScalarFPTypeInSSEReg(VA.getValVT())) {
1160 if (!Subtarget.hasX87())
1161 report_fatal_error("X87 register return with X87 disabled");
1162 CopyVT = MVT::f80;
1163 RoundAfterCopy = (CopyVT != VA.getLocVT());
1164 }
1165
1166 SDValue Val;
1167 if (VA.needsCustom()) {
1168 assert(VA.getValVT() == MVT::v64i1 &&
1169 "Currently the only custom case is when we split v64i1 to 2 regs");
1170 Val =
1171 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1172 } else {
1173 Chain =
1174 X87Result
1175 ? getPopFromX87Reg(DAG, Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1176 .getValue(1)
1177 : DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1178 .getValue(1);
1179 Val = Chain.getValue(0);
1180 InGlue = Chain.getValue(2);
1181 }
1182
1183 if (RoundAfterCopy)
1184 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1185 // This truncation won't change the value.
1186 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1187
1188 if (VA.isExtInLoc()) {
1189 if (VA.getValVT().isVector() &&
1190 VA.getValVT().getScalarType() == MVT::i1 &&
1191 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1192 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1193 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1194 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1195 } else
1196 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1197 }
1198
1199 if (VA.getLocInfo() == CCValAssign::BCvt)
1200 Val = DAG.getBitcast(VA.getValVT(), Val);
1201
1202 InVals.push_back(Val);
1203 }
1204
1205 return Chain;
1206}
1207
1208//===----------------------------------------------------------------------===//
1209// C & StdCall & Fast Calling Convention implementation
1210//===----------------------------------------------------------------------===//
1211// StdCall calling convention seems to be standard for many Windows' API
1212// routines and around. It differs from C calling convention just a little:
1213// callee should clean up the stack, not caller. Symbols should be also
1214// decorated in some fancy way :) It doesn't support any vector arguments.
1215// For info on fast calling convention see Fast Calling Convention (tail call)
1216// implementation LowerX86_32FastCCCallTo.
1217
1218/// Determines whether Args, either a set of outgoing arguments to a call, or a
1219/// set of incoming args of a call, contains an sret pointer that the callee
1220/// pops
1221template <typename T>
1222static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1223 const X86Subtarget &Subtarget) {
1224 // Not C++20 (yet), so no concepts available.
1225 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1226 std::is_same_v<T, ISD::InputArg>,
1227 "requires ISD::OutputArg or ISD::InputArg");
1228
1229 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1230 // for most compilations.
1231 if (!Subtarget.is32Bit())
1232 return false;
1233
1234 if (Args.empty())
1235 return false;
1236
1237 // Most calls do not have an sret argument, check the arg next.
1238 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1239 if (!Flags.isSRet() || Flags.isInReg())
1240 return false;
1241
1242 // The MSVCabi does not pop the sret.
1243 if (Subtarget.getTargetTriple().isOSMSVCRT())
1244 return false;
1245
1246 // MCUs don't pop the sret
1247 if (Subtarget.isTargetMCU())
1248 return false;
1249
1250 // Callee pops argument
1251 return true;
1252}
1253
1254/// Make a copy of an aggregate at address specified by "Src" to address
1255/// "Dst" with size and alignment information specified by the specific
1256/// parameter attribute. The copy will be passed as a byval function parameter.
1258 SDValue Chain, ISD::ArgFlagsTy Flags,
1259 SelectionDAG &DAG, const SDLoc &dl) {
1260 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1261
1262 return DAG.getMemcpy(
1263 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1264 /*isVolatile*/ false, /*AlwaysInline=*/true,
1265 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1266}
1267
1268/// Return true if the calling convention is one that we can guarantee TCO for.
1270 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1273}
1274
1275/// Return true if we might ever do TCO for calls with this calling convention.
1277 switch (CC) {
1278 // C calling conventions:
1279 case CallingConv::C:
1280 case CallingConv::Win64:
1283 // Callee pop conventions:
1288 // Swift:
1289 case CallingConv::Swift:
1290 return true;
1291 default:
1292 return canGuaranteeTCO(CC);
1293 }
1294}
1295
1296/// Return true if the function is being made into a tailcall target by
1297/// changing its ABI.
1298static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1299 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1301}
1302
1303bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1304 if (!CI->isTailCall())
1305 return false;
1306
1307 CallingConv::ID CalleeCC = CI->getCallingConv();
1308 if (!mayTailCallThisCC(CalleeCC))
1309 return false;
1310
1311 return true;
1312}
1313
1314SDValue
1315X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1317 const SDLoc &dl, SelectionDAG &DAG,
1318 const CCValAssign &VA,
1319 MachineFrameInfo &MFI, unsigned i) const {
1320 // Create the nodes corresponding to a load from this parameter slot.
1321 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1322 bool AlwaysUseMutable = shouldGuaranteeTCO(
1323 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1324 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1325 EVT ValVT;
1326 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1327
1328 // If value is passed by pointer we have address passed instead of the value
1329 // itself. No need to extend if the mask value and location share the same
1330 // absolute size.
1331 bool ExtendedInMem =
1332 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1334
1335 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1336 ValVT = VA.getLocVT();
1337 else
1338 ValVT = VA.getValVT();
1339
1340 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1341 // changed with more analysis.
1342 // In case of tail call optimization mark all arguments mutable. Since they
1343 // could be overwritten by lowering of arguments in case of a tail call.
1344 if (Flags.isByVal()) {
1345 unsigned Bytes = Flags.getByValSize();
1346 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1347
1348 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1349 // can be improved with deeper analysis.
1350 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1351 /*isAliased=*/true);
1352 return DAG.getFrameIndex(FI, PtrVT);
1353 }
1354
1355 EVT ArgVT = Ins[i].ArgVT;
1356
1357 // If this is a vector that has been split into multiple parts, don't elide
1358 // the copy. The layout on the stack may not match the packed in-memory
1359 // layout.
1360 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1361
1362 // This is an argument in memory. We might be able to perform copy elision.
1363 // If the argument is passed directly in memory without any extension, then we
1364 // can perform copy elision. Large vector types, for example, may be passed
1365 // indirectly by pointer.
1366 if (Flags.isCopyElisionCandidate() &&
1367 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1368 !ScalarizedVector) {
1369 SDValue PartAddr;
1370 if (Ins[i].PartOffset == 0) {
1371 // If this is a one-part value or the first part of a multi-part value,
1372 // create a stack object for the entire argument value type and return a
1373 // load from our portion of it. This assumes that if the first part of an
1374 // argument is in memory, the rest will also be in memory.
1375 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1376 /*IsImmutable=*/false);
1377 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1378 return DAG.getLoad(
1379 ValVT, dl, Chain, PartAddr,
1381 }
1382
1383 // This is not the first piece of an argument in memory. See if there is
1384 // already a fixed stack object including this offset. If so, assume it
1385 // was created by the PartOffset == 0 branch above and create a load from
1386 // the appropriate offset into it.
1387 int64_t PartBegin = VA.getLocMemOffset();
1388 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1389 int FI = MFI.getObjectIndexBegin();
1390 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1391 int64_t ObjBegin = MFI.getObjectOffset(FI);
1392 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1393 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1394 break;
1395 }
1396 if (MFI.isFixedObjectIndex(FI)) {
1397 SDValue Addr =
1398 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1399 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1400 return DAG.getLoad(ValVT, dl, Chain, Addr,
1402 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1403 }
1404 }
1405
1406 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1407 VA.getLocMemOffset(), isImmutable);
1408
1409 // Set SExt or ZExt flag.
1410 if (VA.getLocInfo() == CCValAssign::ZExt) {
1411 MFI.setObjectZExt(FI, true);
1412 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1413 MFI.setObjectSExt(FI, true);
1414 }
1415
1416 MaybeAlign Alignment;
1417 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1418 ValVT != MVT::f80)
1419 Alignment = MaybeAlign(4);
1420 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1421 SDValue Val = DAG.getLoad(
1422 ValVT, dl, Chain, FIN,
1424 Alignment);
1425 return ExtendedInMem
1426 ? (VA.getValVT().isVector()
1427 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1428 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1429 : Val;
1430}
1431
1432// FIXME: Get this from tablegen.
1434 const X86Subtarget &Subtarget) {
1435 assert(Subtarget.is64Bit());
1436
1437 if (Subtarget.isCallingConvWin64(CallConv)) {
1438 static const MCPhysReg GPR64ArgRegsWin64[] = {
1439 X86::RCX, X86::RDX, X86::R8, X86::R9
1440 };
1441 return GPR64ArgRegsWin64;
1442 }
1443
1444 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1445 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1446 };
1447 return GPR64ArgRegs64Bit;
1448}
1449
1450// FIXME: Get this from tablegen.
1452 CallingConv::ID CallConv,
1453 const X86Subtarget &Subtarget) {
1454 assert(Subtarget.is64Bit());
1455 if (Subtarget.isCallingConvWin64(CallConv)) {
1456 // The XMM registers which might contain var arg parameters are shadowed
1457 // in their paired GPR. So we only need to save the GPR to their home
1458 // slots.
1459 // TODO: __vectorcall will change this.
1460 return {};
1461 }
1462
1463 bool isSoftFloat = Subtarget.useSoftFloat();
1464 if (isSoftFloat || !Subtarget.hasSSE1())
1465 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1466 // registers.
1467 return {};
1468
1469 static const MCPhysReg XMMArgRegs64Bit[] = {
1470 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1471 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1472 };
1473 return XMMArgRegs64Bit;
1474}
1475
1476#ifndef NDEBUG
1478 return llvm::is_sorted(
1479 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1480 return A.getValNo() < B.getValNo();
1481 });
1482}
1483#endif
1484
1485namespace {
1486/// This is a helper class for lowering variable arguments parameters.
1487class VarArgsLoweringHelper {
1488public:
1489 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1490 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1491 CallingConv::ID CallConv, CCState &CCInfo)
1492 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1493 TheMachineFunction(DAG.getMachineFunction()),
1494 TheFunction(TheMachineFunction.getFunction()),
1495 FrameInfo(TheMachineFunction.getFrameInfo()),
1496 FrameLowering(*Subtarget.getFrameLowering()),
1497 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1498 CCInfo(CCInfo) {}
1499
1500 // Lower variable arguments parameters.
1501 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1502
1503private:
1504 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1505
1506 void forwardMustTailParameters(SDValue &Chain);
1507
1508 bool is64Bit() const { return Subtarget.is64Bit(); }
1509 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1510
1511 X86MachineFunctionInfo *FuncInfo;
1512 const SDLoc &DL;
1513 SelectionDAG &DAG;
1514 const X86Subtarget &Subtarget;
1515 MachineFunction &TheMachineFunction;
1516 const Function &TheFunction;
1517 MachineFrameInfo &FrameInfo;
1518 const TargetFrameLowering &FrameLowering;
1519 const TargetLowering &TargLowering;
1520 CallingConv::ID CallConv;
1521 CCState &CCInfo;
1522};
1523} // namespace
1524
1525void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1526 SDValue &Chain, unsigned StackSize) {
1527 // If the function takes variable number of arguments, make a frame index for
1528 // the start of the first vararg value... for expansion of llvm.va_start. We
1529 // can skip this if there are no va_start calls.
1530 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1531 CallConv != CallingConv::X86_ThisCall)) {
1532 FuncInfo->setVarArgsFrameIndex(
1533 FrameInfo.CreateFixedObject(1, StackSize, true));
1534 }
1535
1536 // 64-bit calling conventions support varargs and register parameters, so we
1537 // have to do extra work to spill them in the prologue.
1538 if (is64Bit()) {
1539 // Find the first unallocated argument registers.
1540 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1541 ArrayRef<MCPhysReg> ArgXMMs =
1542 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1543 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1544 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1545
1546 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1547 "SSE register cannot be used when SSE is disabled!");
1548
1549 if (isWin64()) {
1550 // Get to the caller-allocated home save location. Add 8 to account
1551 // for the return address.
1552 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1553 FuncInfo->setRegSaveFrameIndex(
1554 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1555 // Fixup to set vararg frame on shadow area (4 x i64).
1556 if (NumIntRegs < 4)
1557 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1558 } else {
1559 // For X86-64, if there are vararg parameters that are passed via
1560 // registers, then we must store them to their spots on the stack so
1561 // they may be loaded by dereferencing the result of va_next.
1562 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1563 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1564 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1565 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1566 }
1567
1569 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1570 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1571 // keeping live input value
1572 SDValue ALVal; // if applicable keeps SDValue for %al register
1573
1574 // Gather all the live in physical registers.
1575 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1576 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1577 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1578 }
1579 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1580 if (!AvailableXmms.empty()) {
1581 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1582 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1583 for (MCPhysReg Reg : AvailableXmms) {
1584 // FastRegisterAllocator spills virtual registers at basic
1585 // block boundary. That leads to usages of xmm registers
1586 // outside of check for %al. Pass physical registers to
1587 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1588 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1589 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1590 }
1591 }
1592
1593 // Store the integer parameter registers.
1595 SDValue RSFIN =
1596 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1597 TargLowering.getPointerTy(DAG.getDataLayout()));
1598 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1599 for (SDValue Val : LiveGPRs) {
1600 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1601 TargLowering.getPointerTy(DAG.getDataLayout()),
1602 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1603 SDValue Store =
1604 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1606 DAG.getMachineFunction(),
1607 FuncInfo->getRegSaveFrameIndex(), Offset));
1608 MemOps.push_back(Store);
1609 Offset += 8;
1610 }
1611
1612 // Now store the XMM (fp + vector) parameter registers.
1613 if (!LiveXMMRegs.empty()) {
1614 SmallVector<SDValue, 12> SaveXMMOps;
1615 SaveXMMOps.push_back(Chain);
1616 SaveXMMOps.push_back(ALVal);
1617 SaveXMMOps.push_back(RSFIN);
1618 SaveXMMOps.push_back(
1619 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1620 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1621 MachineMemOperand *StoreMMO =
1624 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1625 Offset),
1628 DL, DAG.getVTList(MVT::Other),
1629 SaveXMMOps, MVT::i8, StoreMMO));
1630 }
1631
1632 if (!MemOps.empty())
1633 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1634 }
1635}
1636
1637void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1638 // Find the largest legal vector type.
1639 MVT VecVT = MVT::Other;
1640 // FIXME: Only some x86_32 calling conventions support AVX512.
1641 if (Subtarget.useAVX512Regs() &&
1642 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1643 CallConv == CallingConv::Intel_OCL_BI)))
1644 VecVT = MVT::v16f32;
1645 else if (Subtarget.hasAVX())
1646 VecVT = MVT::v8f32;
1647 else if (Subtarget.hasSSE2())
1648 VecVT = MVT::v4f32;
1649
1650 // We forward some GPRs and some vector types.
1651 SmallVector<MVT, 2> RegParmTypes;
1652 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1653 RegParmTypes.push_back(IntVT);
1654 if (VecVT != MVT::Other)
1655 RegParmTypes.push_back(VecVT);
1656
1657 // Compute the set of forwarded registers. The rest are scratch.
1659 FuncInfo->getForwardedMustTailRegParms();
1660 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1661
1662 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1663 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1664 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1665 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1666 }
1667
1668 // Copy all forwards from physical to virtual registers.
1669 for (ForwardedRegister &FR : Forwards) {
1670 // FIXME: Can we use a less constrained schedule?
1671 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1672 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1673 TargLowering.getRegClassFor(FR.VT));
1674 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1675 }
1676}
1677
1678void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1679 unsigned StackSize) {
1680 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1681 // If necessary, it would be set into the correct value later.
1682 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1683 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1684
1685 if (FrameInfo.hasVAStart())
1686 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1687
1688 if (FrameInfo.hasMustTailInVarArgFunc())
1689 forwardMustTailParameters(Chain);
1690}
1691
1692SDValue X86TargetLowering::LowerFormalArguments(
1693 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1694 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1695 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1696 MachineFunction &MF = DAG.getMachineFunction();
1697 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1698
1699 const Function &F = MF.getFunction();
1700 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1701 F.getName() == "main")
1702 FuncInfo->setForceFramePointer(true);
1703
1704 MachineFrameInfo &MFI = MF.getFrameInfo();
1705 bool Is64Bit = Subtarget.is64Bit();
1706 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1707
1708 assert(
1709 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1710 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1711
1712 // Assign locations to all of the incoming arguments.
1714 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1715
1716 // Allocate shadow area for Win64.
1717 if (IsWin64)
1718 CCInfo.AllocateStack(32, Align(8));
1719
1720 CCInfo.AnalyzeArguments(Ins, CC_X86);
1721
1722 // In vectorcall calling convention a second pass is required for the HVA
1723 // types.
1724 if (CallingConv::X86_VectorCall == CallConv) {
1725 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1726 }
1727
1728 // The next loop assumes that the locations are in the same order of the
1729 // input arguments.
1730 assert(isSortedByValueNo(ArgLocs) &&
1731 "Argument Location list must be sorted before lowering");
1732
1733 SDValue ArgValue;
1734 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1735 ++I, ++InsIndex) {
1736 assert(InsIndex < Ins.size() && "Invalid Ins index");
1737 CCValAssign &VA = ArgLocs[I];
1738
1739 if (VA.isRegLoc()) {
1740 EVT RegVT = VA.getLocVT();
1741 if (VA.needsCustom()) {
1742 assert(
1743 VA.getValVT() == MVT::v64i1 &&
1744 "Currently the only custom case is when we split v64i1 to 2 regs");
1745
1746 // v64i1 values, in regcall calling convention, that are
1747 // compiled to 32 bit arch, are split up into two registers.
1748 ArgValue =
1749 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1750 } else {
1751 const TargetRegisterClass *RC;
1752 if (RegVT == MVT::i8)
1753 RC = &X86::GR8RegClass;
1754 else if (RegVT == MVT::i16)
1755 RC = &X86::GR16RegClass;
1756 else if (RegVT == MVT::i32)
1757 RC = &X86::GR32RegClass;
1758 else if (Is64Bit && RegVT == MVT::i64)
1759 RC = &X86::GR64RegClass;
1760 else if (RegVT == MVT::f16)
1761 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1762 else if (RegVT == MVT::f32)
1763 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1764 else if (RegVT == MVT::f64)
1765 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1766 else if (RegVT == MVT::f80)
1767 RC = &X86::RFP80RegClass;
1768 else if (RegVT == MVT::f128)
1769 RC = &X86::VR128RegClass;
1770 else if (RegVT.is512BitVector())
1771 RC = &X86::VR512RegClass;
1772 else if (RegVT.is256BitVector())
1773 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1774 else if (RegVT.is128BitVector())
1775 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1776 else if (RegVT == MVT::x86mmx)
1777 RC = &X86::VR64RegClass;
1778 else if (RegVT == MVT::v1i1)
1779 RC = &X86::VK1RegClass;
1780 else if (RegVT == MVT::v8i1)
1781 RC = &X86::VK8RegClass;
1782 else if (RegVT == MVT::v16i1)
1783 RC = &X86::VK16RegClass;
1784 else if (RegVT == MVT::v32i1)
1785 RC = &X86::VK32RegClass;
1786 else if (RegVT == MVT::v64i1)
1787 RC = &X86::VK64RegClass;
1788 else
1789 llvm_unreachable("Unknown argument type!");
1790
1791 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1792 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1793 }
1794
1795 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1796 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1797 // right size.
1798 if (VA.getLocInfo() == CCValAssign::SExt)
1799 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1800 DAG.getValueType(VA.getValVT()));
1801 else if (VA.getLocInfo() == CCValAssign::ZExt)
1802 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1803 DAG.getValueType(VA.getValVT()));
1804 else if (VA.getLocInfo() == CCValAssign::BCvt)
1805 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1806
1807 if (VA.isExtInLoc()) {
1808 // Handle MMX values passed in XMM regs.
1809 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1810 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1811 else if (VA.getValVT().isVector() &&
1812 VA.getValVT().getScalarType() == MVT::i1 &&
1813 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1814 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1815 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1816 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1817 } else
1818 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1819 }
1820 } else {
1821 assert(VA.isMemLoc());
1822 ArgValue =
1823 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1824 }
1825
1826 // If value is passed via pointer - do a load.
1827 if (VA.getLocInfo() == CCValAssign::Indirect &&
1828 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1829 ArgValue =
1830 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1831 }
1832
1833 InVals.push_back(ArgValue);
1834 }
1835
1836 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1837 if (Ins[I].Flags.isSwiftAsync()) {
1838 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1839 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1840 X86FI->setHasSwiftAsyncContext(true);
1841 else {
1842 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1843 int FI =
1844 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1845 X86FI->setSwiftAsyncContextFrameIdx(FI);
1846 SDValue St = DAG.getStore(
1847 DAG.getEntryNode(), dl, InVals[I],
1848 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1850 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1851 }
1852 }
1853
1854 // Swift calling convention does not require we copy the sret argument
1855 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1856 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1857 continue;
1858
1859 // All x86 ABIs require that for returning structs by value we copy the
1860 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1861 // the argument into a virtual register so that we can access it from the
1862 // return points.
1863 if (Ins[I].Flags.isSRet()) {
1864 assert(!FuncInfo->getSRetReturnReg() &&
1865 "SRet return has already been set");
1866 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1867 Register Reg =
1869 FuncInfo->setSRetReturnReg(Reg);
1870 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1871 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1872 break;
1873 }
1874 }
1875
1876 unsigned StackSize = CCInfo.getStackSize();
1877 // Align stack specially for tail calls.
1878 if (shouldGuaranteeTCO(CallConv,
1880 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1881
1882 if (IsVarArg)
1883 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1884 .lowerVarArgsParameters(Chain, StackSize);
1885
1886 // Some CCs need callee pop.
1887 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1889 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1890 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1891 // X86 interrupts must pop the error code (and the alignment padding) if
1892 // present.
1893 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1894 } else {
1895 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1896 // If this is an sret function, the return should pop the hidden pointer.
1897 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1898 FuncInfo->setBytesToPopOnReturn(4);
1899 }
1900
1901 if (!Is64Bit) {
1902 // RegSaveFrameIndex is X86-64 only.
1903 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1904 }
1905
1906 FuncInfo->setArgumentStackSize(StackSize);
1907
1908 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1909 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1910 if (Personality == EHPersonality::CoreCLR) {
1911 assert(Is64Bit);
1912 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1913 // that we'd prefer this slot be allocated towards the bottom of the frame
1914 // (i.e. near the stack pointer after allocating the frame). Every
1915 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1916 // offset from the bottom of this and each funclet's frame must be the
1917 // same, so the size of funclets' (mostly empty) frames is dictated by
1918 // how far this slot is from the bottom (since they allocate just enough
1919 // space to accommodate holding this slot at the correct offset).
1920 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1921 EHInfo->PSPSymFrameIdx = PSPSymFI;
1922 }
1923 }
1924
1925 if (shouldDisableArgRegFromCSR(CallConv) ||
1926 F.hasFnAttribute("no_caller_saved_registers")) {
1927 MachineRegisterInfo &MRI = MF.getRegInfo();
1928 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1929 MRI.disableCalleeSavedRegister(Pair.first);
1930 }
1931
1932 if (CallingConv::PreserveNone == CallConv)
1933 for (const ISD::InputArg &In : Ins) {
1934 if (In.Flags.isSwiftSelf() || In.Flags.isSwiftAsync() ||
1935 In.Flags.isSwiftError()) {
1936 errorUnsupported(DAG, dl,
1937 "Swift attributes can't be used with preserve_none");
1938 break;
1939 }
1940 }
1941
1942 return Chain;
1943}
1944
1945SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1946 SDValue Arg, const SDLoc &dl,
1947 SelectionDAG &DAG,
1948 const CCValAssign &VA,
1949 ISD::ArgFlagsTy Flags,
1950 bool isByVal) const {
1951 unsigned LocMemOffset = VA.getLocMemOffset();
1952 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1953 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1954 StackPtr, PtrOff);
1955 if (isByVal)
1956 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1957
1958 MaybeAlign Alignment;
1959 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1960 Arg.getSimpleValueType() != MVT::f80)
1961 Alignment = MaybeAlign(4);
1962 return DAG.getStore(
1963 Chain, dl, Arg, PtrOff,
1965 Alignment);
1966}
1967
1968/// Emit a load of return address if tail call
1969/// optimization is performed and it is required.
1970SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1971 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1972 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1973 // Adjust the Return address stack slot.
1974 EVT VT = getPointerTy(DAG.getDataLayout());
1975 OutRetAddr = getReturnAddressFrameIndex(DAG);
1976
1977 // Load the "old" Return address.
1978 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1979 return SDValue(OutRetAddr.getNode(), 1);
1980}
1981
1982/// Emit a store of the return address if tail call
1983/// optimization is performed and it is required (FPDiff!=0).
1985 SDValue Chain, SDValue RetAddrFrIdx,
1986 EVT PtrVT, unsigned SlotSize,
1987 int FPDiff, const SDLoc &dl) {
1988 // Store the return address to the appropriate stack slot.
1989 if (!FPDiff) return Chain;
1990 // Calculate the new stack slot for the return address.
1991 int NewReturnAddrFI =
1992 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1993 false);
1994 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1995 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1997 DAG.getMachineFunction(), NewReturnAddrFI));
1998 return Chain;
1999}
2000
2001/// Returns a vector_shuffle mask for an movs{s|d}, movd
2002/// operation of specified width.
2003SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
2004 SDValue V1, SDValue V2) const {
2005 unsigned NumElems = VT.getVectorNumElements();
2006 SmallVector<int, 8> Mask;
2007 Mask.push_back(NumElems);
2008 for (unsigned i = 1; i != NumElems; ++i)
2009 Mask.push_back(i);
2010 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
2011}
2012
2013// Returns the type of copying which is required to set up a byval argument to
2014// a tail-called function. This isn't needed for non-tail calls, because they
2015// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2016// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2017// optimised to zero copies when forwarding an argument from the caller's
2018// caller (NoCopy).
2019X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
2020 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2021 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2022
2023 // Globals are always safe to copy from.
2025 return CopyOnce;
2026
2027 // Can only analyse frame index nodes, conservatively assume we need a
2028 // temporary.
2029 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2030 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2031 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2032 return CopyViaTemp;
2033
2034 int SrcFI = SrcFrameIdxNode->getIndex();
2035 int DstFI = DstFrameIdxNode->getIndex();
2036 assert(MFI.isFixedObjectIndex(DstFI) &&
2037 "byval passed in non-fixed stack slot");
2038
2039 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2040 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2041
2042 // If the source is in the local frame, then the copy to the argument
2043 // memory is always valid.
2044 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2045 if (!FixedSrc || (FixedSrc && SrcOffset < 0))
2046 return CopyOnce;
2047
2048 // If the value is already in the correct location, then no copying is
2049 // needed. If not, then we need to copy via a temporary.
2050 if (SrcOffset == DstOffset)
2051 return NoCopy;
2052 else
2053 return CopyViaTemp;
2054}
2055
2056SDValue
2057X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2058 SmallVectorImpl<SDValue> &InVals) const {
2059 SelectionDAG &DAG = CLI.DAG;
2060 SDLoc &dl = CLI.DL;
2061 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2062 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2063 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2064 SDValue Chain = CLI.Chain;
2065 SDValue Callee = CLI.Callee;
2066 CallingConv::ID CallConv = CLI.CallConv;
2067 bool &isTailCall = CLI.IsTailCall;
2068 bool isVarArg = CLI.IsVarArg;
2069 const auto *CB = CLI.CB;
2070
2071 MachineFunction &MF = DAG.getMachineFunction();
2072 bool Is64Bit = Subtarget.is64Bit();
2073 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2074 bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
2075 CallConv, MF.getTarget().Options.GuaranteedTailCallOpt);
2076 bool IsCalleePopSRet =
2077 !ShouldGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2078 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2079 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2080 CB->hasFnAttr("no_caller_saved_registers"));
2081 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2082 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2083 const Module *M = MF.getFunction().getParent();
2084
2085 // If the indirect call target has the nocf_check attribute, the call needs
2086 // the NOTRACK prefix. For simplicity just disable tail calls as there are
2087 // so many variants.
2088 // FIXME: This will cause backend errors if the user forces the issue.
2089 bool IsNoTrackIndirectCall = IsIndirectCall && CB->doesNoCfCheck() &&
2090 M->getModuleFlag("cf-protection-branch");
2091 if (IsNoTrackIndirectCall)
2092 isTailCall = false;
2093
2094 MachineFunction::CallSiteInfo CSInfo;
2095 if (CallConv == CallingConv::X86_INTR)
2096 report_fatal_error("X86 interrupts may not be called directly");
2097
2098 // Set type id for call site info.
2099 if (MF.getTarget().Options.EmitCallGraphSection && CB && CB->isIndirectCall())
2100 CSInfo = MachineFunction::CallSiteInfo(*CB);
2101
2102 if (IsIndirectCall && !IsWin64 &&
2103 M->getModuleFlag("import-call-optimization"))
2104 errorUnsupported(DAG, dl,
2105 "Indirect calls must have a normal calling convention if "
2106 "Import Call Optimization is enabled");
2107
2108 // Analyze operands of the call, assigning locations to each operand.
2110 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2111
2112 // Allocate shadow area for Win64.
2113 if (IsWin64)
2114 CCInfo.AllocateStack(32, Align(8));
2115
2116 CCInfo.AnalyzeArguments(Outs, CC_X86);
2117
2118 // In vectorcall calling convention a second pass is required for the HVA
2119 // types.
2120 if (CallingConv::X86_VectorCall == CallConv) {
2121 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2122 }
2123
2124 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2125 bool IsSibcall = false;
2126 if (isTailCall && ShouldGuaranteeTCO) {
2127 // If we need to guarantee TCO for a non-musttail call, we just need to make
2128 // sure the conventions match. If a tail call uses one of the supported TCO
2129 // conventions and the caller and callee match, we can tail call any
2130 // function prototype.
2131 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
2132 isTailCall = (CallConv == CallerCC);
2133 IsSibcall = IsMustTail;
2134 } else if (isTailCall) {
2135 // Check if this tail call is a "sibling" call, which is loosely defined to
2136 // be a tail call that doesn't require heroics like moving the return
2137 // address or swapping byval arguments. We treat some musttail calls as
2138 // sibling calls to avoid unnecessary argument copies.
2139 IsSibcall =
2140 isEligibleForSiblingCallOpt(CLI, CCInfo, ArgLocs, IsCalleePopSRet);
2141 isTailCall = IsSibcall || IsMustTail;
2142 }
2143
2144 if (isTailCall)
2145 ++NumTailCalls;
2146
2147 if (IsMustTail && !isTailCall)
2148 report_fatal_error("failed to perform tail call elimination on a call "
2149 "site marked musttail");
2150
2151 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2152 "Var args not supported with calling convention fastcc, ghc or hipe");
2153
2154 // Get a count of how many bytes are to be pushed on the stack.
2155 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2156 if (IsSibcall)
2157 // This is a sibcall. The memory operands are available in caller's
2158 // own caller's stack.
2159 NumBytes = 0;
2160 else if (ShouldGuaranteeTCO && canGuaranteeTCO(CallConv))
2161 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2162
2163 // A sibcall is ABI-compatible and does not need to adjust the stack pointer.
2164 int FPDiff = 0;
2165 if (isTailCall && ShouldGuaranteeTCO && !IsSibcall) {
2166 // Lower arguments at fp - stackoffset + fpdiff.
2167 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2168
2169 FPDiff = NumBytesCallerPushed - NumBytes;
2170
2171 // Set the delta of movement of the returnaddr stackslot.
2172 // But only set if delta is greater than previous delta.
2173 if (FPDiff < X86Info->getTCReturnAddrDelta())
2174 X86Info->setTCReturnAddrDelta(FPDiff);
2175 }
2176
2177 unsigned NumBytesToPush = NumBytes;
2178 unsigned NumBytesToPop = NumBytes;
2179
2181 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2182
2183 // If we are doing a tail-call, any byval arguments will be written to stack
2184 // space which was used for incoming arguments. If any the values being used
2185 // are incoming byval arguments to this function, then they might be
2186 // overwritten by the stores of the outgoing arguments. To avoid this, we
2187 // need to make a temporary copy of them in local stack space, then copy back
2188 // to the argument area.
2189 // FIXME: There's potential to improve the code by using virtual registers for
2190 // temporary storage, and letting the register allocator spill if needed.
2191 SmallVector<SDValue, 8> ByValTemporaries;
2192 SDValue ByValTempChain;
2193 if (isTailCall) {
2194 // Use null SDValue to mean "no temporary recorded for this arg index".
2195 ByValTemporaries.assign(OutVals.size(), SDValue());
2196
2197 SmallVector<SDValue, 8> ByValCopyChains;
2198 for (const CCValAssign &VA : ArgLocs) {
2199 unsigned ArgIdx = VA.getValNo();
2200 SDValue Src = OutVals[ArgIdx];
2201 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2202
2203 if (!Flags.isByVal())
2204 continue;
2205
2206 auto PtrVT = getPointerTy(DAG.getDataLayout());
2207
2208 if (!StackPtr.getNode())
2209 StackPtr =
2210 DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT);
2211
2212 // Destination: where this byval should live in the callee’s frame
2213 // after the tail call.
2214 int64_t Offset = VA.getLocMemOffset() + FPDiff;
2215 uint64_t Size = VA.getLocVT().getFixedSizeInBits() / 8;
2217 /*IsImmutable=*/true);
2218 SDValue Dst = DAG.getFrameIndex(FI, PtrVT);
2219
2220 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2221
2222 if (Copy == NoCopy) {
2223 // If the argument is already at the correct offset on the stack
2224 // (because we are forwarding a byval argument from our caller), we
2225 // don't need any copying.
2226 continue;
2227 } else if (Copy == CopyOnce) {
2228 // If the argument is in our local stack frame, no other argument
2229 // preparation can clobber it, so we can copy it to the final location
2230 // later.
2231 ByValTemporaries[ArgIdx] = Src;
2232 } else {
2233 assert(Copy == CopyViaTemp && "unexpected enum value");
2234 // If we might be copying this argument from the outgoing argument
2235 // stack area, we need to copy via a temporary in the local stack
2236 // frame.
2237 MachineFrameInfo &MFI = MF.getFrameInfo();
2238 int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(),
2239 Flags.getNonZeroByValAlign(),
2240 /*isSS=*/false);
2241 SDValue Temp =
2242 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2243
2244 SDValue CopyChain =
2245 CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
2246 ByValCopyChains.push_back(CopyChain);
2247 }
2248 }
2249 if (!ByValCopyChains.empty())
2250 ByValTempChain =
2251 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2252 }
2253
2254 // If we have an inalloca argument, all stack space has already been allocated
2255 // for us and be right at the top of the stack. We don't support multiple
2256 // arguments passed in memory when using inalloca.
2257 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2258 NumBytesToPush = 0;
2259 if (!ArgLocs.back().isMemLoc())
2260 report_fatal_error("cannot use inalloca attribute on a register "
2261 "parameter");
2262 if (ArgLocs.back().getLocMemOffset() != 0)
2263 report_fatal_error("any parameter with the inalloca attribute must be "
2264 "the only memory argument");
2265 } else if (CLI.IsPreallocated) {
2266 assert(ArgLocs.back().isMemLoc() &&
2267 "cannot use preallocated attribute on a register "
2268 "parameter");
2269 SmallVector<size_t, 4> PreallocatedOffsets;
2270 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2271 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2272 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2273 }
2274 }
2275 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2276 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2277 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2278 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2279 NumBytesToPush = 0;
2280 }
2281
2282 if (!IsSibcall && !IsMustTail)
2283 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2284 NumBytes - NumBytesToPush, dl);
2285
2286 SDValue RetAddrFrIdx;
2287 // Load return address for tail calls.
2288 if (isTailCall && FPDiff)
2289 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2290 Is64Bit, FPDiff, dl);
2291
2293 SmallVector<SDValue, 8> MemOpChains;
2294
2295 // The next loop assumes that the locations are in the same order of the
2296 // input arguments.
2297 assert(isSortedByValueNo(ArgLocs) &&
2298 "Argument Location list must be sorted before lowering");
2299
2300 // Walk the register/memloc assignments, inserting copies/loads. In the case
2301 // of tail call optimization arguments are handle later.
2302 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2303 ++I, ++OutIndex) {
2304 assert(OutIndex < Outs.size() && "Invalid Out index");
2305 // Skip inalloca/preallocated arguments, they have already been written.
2306 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2307 if (Flags.isInAlloca() || Flags.isPreallocated())
2308 continue;
2309
2310 CCValAssign &VA = ArgLocs[I];
2311 EVT RegVT = VA.getLocVT();
2312 SDValue Arg = OutVals[OutIndex];
2313 bool isByVal = Flags.isByVal();
2314
2315 // Promote the value if needed.
2316 switch (VA.getLocInfo()) {
2317 default: llvm_unreachable("Unknown loc info!");
2318 case CCValAssign::Full: break;
2319 case CCValAssign::SExt:
2320 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2321 break;
2322 case CCValAssign::ZExt:
2323 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2324 break;
2325 case CCValAssign::AExt:
2326 if (Arg.getValueType().isVector() &&
2327 Arg.getValueType().getVectorElementType() == MVT::i1)
2328 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2329 else if (RegVT.is128BitVector()) {
2330 // Special case: passing MMX values in XMM registers.
2331 Arg = DAG.getBitcast(MVT::i64, Arg);
2332 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2333 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2334 } else
2335 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2336 break;
2337 case CCValAssign::BCvt:
2338 Arg = DAG.getBitcast(RegVT, Arg);
2339 break;
2340 case CCValAssign::Indirect: {
2341 if (isByVal) {
2342 // Memcpy the argument to a temporary stack slot to prevent
2343 // the caller from seeing any modifications the callee may make
2344 // as guaranteed by the `byval` attribute.
2345 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2346 Flags.getByValSize(),
2347 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2348 SDValue StackSlot =
2349 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2350 Chain =
2351 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2352 // From now on treat this as a regular pointer
2353 Arg = StackSlot;
2354 isByVal = false;
2355 } else {
2356 // Store the argument.
2357 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2358 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2359 Chain = DAG.getStore(
2360 Chain, dl, Arg, SpillSlot,
2362 Arg = SpillSlot;
2363 }
2364 break;
2365 }
2366 }
2367
2368 if (VA.needsCustom()) {
2369 assert(VA.getValVT() == MVT::v64i1 &&
2370 "Currently the only custom case is when we split v64i1 to 2 regs");
2371 // Split v64i1 value into two registers
2372 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2373 } else if (VA.isRegLoc()) {
2374 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2375 const TargetOptions &Options = DAG.getTarget().Options;
2376 if (Options.EmitCallSiteInfo)
2377 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2378 if (isVarArg && IsWin64) {
2379 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2380 // shadow reg if callee is a varargs function.
2381 Register ShadowReg;
2382 switch (VA.getLocReg()) {
2383 case X86::XMM0: ShadowReg = X86::RCX; break;
2384 case X86::XMM1: ShadowReg = X86::RDX; break;
2385 case X86::XMM2: ShadowReg = X86::R8; break;
2386 case X86::XMM3: ShadowReg = X86::R9; break;
2387 }
2388 if (ShadowReg)
2389 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2390 }
2391 } else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) {
2392 assert(VA.isMemLoc());
2393 if (!StackPtr.getNode())
2394 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2396 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2397 dl, DAG, VA, Flags, isByVal));
2398 }
2399 }
2400
2401 if (!MemOpChains.empty())
2402 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2403
2404 if (Subtarget.isPICStyleGOT()) {
2405 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2406 // GOT pointer (except regcall).
2407 if (!isTailCall) {
2408 // Indirect call with RegCall calling convertion may use up all the
2409 // general registers, so it is not suitable to bind EBX reister for
2410 // GOT address, just let register allocator handle it.
2411 if (CallConv != CallingConv::X86_RegCall)
2412 RegsToPass.push_back(std::make_pair(
2413 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2414 getPointerTy(DAG.getDataLayout()))));
2415 } else {
2416 // If we are tail calling and generating PIC/GOT style code load the
2417 // address of the callee into ECX. The value in ecx is used as target of
2418 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2419 // for tail calls on PIC/GOT architectures. Normally we would just put the
2420 // address of GOT into ebx and then call target@PLT. But for tail calls
2421 // ebx would be restored (since ebx is callee saved) before jumping to the
2422 // target@PLT.
2423
2424 // Note: The actual moving to ECX is done further down.
2425 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2426 if (G && !G->getGlobal()->hasLocalLinkage() &&
2427 G->getGlobal()->hasDefaultVisibility())
2428 Callee = LowerGlobalAddress(Callee, DAG);
2429 else if (isa<ExternalSymbolSDNode>(Callee))
2430 Callee = LowerExternalSymbol(Callee, DAG);
2431 }
2432 }
2433
2434 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2435 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2436 // From AMD64 ABI document:
2437 // For calls that may call functions that use varargs or stdargs
2438 // (prototype-less calls or calls to functions containing ellipsis (...) in
2439 // the declaration) %al is used as hidden argument to specify the number
2440 // of SSE registers used. The contents of %al do not need to match exactly
2441 // the number of registers, but must be an ubound on the number of SSE
2442 // registers used and is in the range 0 - 8 inclusive.
2443
2444 // Count the number of XMM registers allocated.
2445 static const MCPhysReg XMMArgRegs[] = {
2446 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2447 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2448 };
2449 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2450 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2451 && "SSE registers cannot be used when SSE is disabled");
2452 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2453 DAG.getConstant(NumXMMRegs, dl,
2454 MVT::i8)));
2455 }
2456
2457 if (isVarArg && IsMustTail) {
2458 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2459 for (const auto &F : Forwards) {
2460 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2461 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2462 }
2463 }
2464
2465 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2466 // don't need this because the eligibility check rejects calls that require
2467 // shuffling arguments passed in memory.
2468 if (isTailCall && !IsSibcall) {
2469 // Force all the incoming stack arguments to be loaded from the stack
2470 // before any new outgoing arguments or the return address are stored to the
2471 // stack, because the outgoing stack slots may alias the incoming argument
2472 // stack slots, and the alias isn't otherwise explicit. This is slightly
2473 // more conservative than necessary, because it means that each store
2474 // effectively depends on every argument instead of just those arguments it
2475 // would clobber.
2476 Chain = DAG.getStackArgumentTokenFactor(Chain);
2477
2478 if (ByValTempChain)
2479 Chain =
2480 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);
2481
2482 SmallVector<SDValue, 8> MemOpChains2;
2483 SDValue FIN;
2484 int FI = 0;
2485 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2486 ++I, ++OutsIndex) {
2487 CCValAssign &VA = ArgLocs[I];
2488
2489 if (VA.isRegLoc()) {
2490 if (VA.needsCustom()) {
2491 assert((CallConv == CallingConv::X86_RegCall) &&
2492 "Expecting custom case only in regcall calling convention");
2493 // This means that we are in special case where one argument was
2494 // passed through two register locations - Skip the next location
2495 ++I;
2496 }
2497
2498 continue;
2499 }
2500
2501 assert(VA.isMemLoc());
2502 SDValue Arg = OutVals[OutsIndex];
2503 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2504 // Skip inalloca/preallocated arguments. They don't require any work.
2505 if (Flags.isInAlloca() || Flags.isPreallocated())
2506 continue;
2507 // Create frame index.
2508 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2509 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2510 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2511 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2512
2513 if (Flags.isByVal()) {
2514 if (SDValue ByValSrc = ByValTemporaries[OutsIndex]) {
2515 auto PtrVT = getPointerTy(DAG.getDataLayout());
2516 SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
2517
2519 ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2520 }
2521 } else {
2522 // Store relative to framepointer.
2523 MemOpChains2.push_back(DAG.getStore(
2524 Chain, dl, Arg, FIN,
2526 }
2527 }
2528
2529 if (!MemOpChains2.empty())
2530 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2531
2532 // Store the return address to the appropriate stack slot.
2533 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2535 RegInfo->getSlotSize(), FPDiff, dl);
2536 }
2537
2538 // Build a sequence of copy-to-reg nodes chained together with token chain
2539 // and glue operands which copy the outgoing args into registers.
2540 SDValue InGlue;
2541 for (const auto &[Reg, N] : RegsToPass) {
2542 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
2543 InGlue = Chain.getValue(1);
2544 }
2545
2546 bool IsImpCall = false;
2547 bool IsCFGuardCall = false;
2548 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2549 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2550 // In the 64-bit large code model, we have to make all calls
2551 // through a register, since the call instruction's 32-bit
2552 // pc-relative offset may not be large enough to hold the whole
2553 // address.
2554 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2555 Callee->getOpcode() == ISD::ExternalSymbol) {
2556 // Lower direct calls to global addresses and external symbols. Setting
2557 // ForCall to true here has the effect of removing WrapperRIP when possible
2558 // to allow direct calls to be selected without first materializing the
2559 // address into a register.
2560 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
2561 } else if (Subtarget.isTarget64BitILP32() &&
2562 Callee.getValueType() == MVT::i32) {
2563 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2564 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2565 } else if (Is64Bit && CB && isCFGuardCall(CB)) {
2566 // We'll use a specific psuedo instruction for tail calls to control flow
2567 // guard functions to guarantee the instruction used for the call. To do
2568 // this we need to unwrap the load now and use the CFG Func GV as the
2569 // callee.
2570 IsCFGuardCall = true;
2571 auto *LoadNode = cast<LoadSDNode>(Callee);
2572 GlobalAddressSDNode *GA =
2573 cast<GlobalAddressSDNode>(unwrapAddress(LoadNode->getBasePtr()));
2575 "CFG Call should be to a guard function");
2576 assert(LoadNode->getOffset()->isUndef() &&
2577 "CFG Function load should not have an offset");
2579 GA->getGlobal(), dl, GA->getValueType(0), 0, X86II::MO_NO_FLAG);
2580 }
2581
2583
2584 if (!IsSibcall && isTailCall && !IsMustTail) {
2585 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2586 InGlue = Chain.getValue(1);
2587 }
2588
2589 Ops.push_back(Chain);
2590 Ops.push_back(Callee);
2591
2592 if (isTailCall)
2593 Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2594
2595 // Add argument registers to the end of the list so that they are known live
2596 // into the call.
2597 for (const auto &[Reg, N] : RegsToPass)
2598 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2599
2600 // Add a register mask operand representing the call-preserved registers.
2601 const uint32_t *Mask = [&]() {
2602 auto AdaptedCC = CallConv;
2603 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2604 // use X86_INTR calling convention because it has the same CSR mask
2605 // (same preserved registers).
2606 if (HasNCSR)
2608 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2609 // to use the CSR_NoRegs_RegMask.
2610 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2611 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2612 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2613 }();
2614 assert(Mask && "Missing call preserved mask for calling convention");
2615
2616 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2617 X86Info->setFPClobberedByCall(true);
2618 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2619 X86Info->setFPClobberedByInvoke(true);
2620 }
2621 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2622 X86Info->setBPClobberedByCall(true);
2623 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2624 X86Info->setBPClobberedByInvoke(true);
2625 }
2626
2627 // If this is an invoke in a 32-bit function using a funclet-based
2628 // personality, assume the function clobbers all registers. If an exception
2629 // is thrown, the runtime will not restore CSRs.
2630 // FIXME: Model this more precisely so that we can register allocate across
2631 // the normal edge and spill and fill across the exceptional edge.
2632 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2633 const Function &CallerFn = MF.getFunction();
2634 EHPersonality Pers =
2635 CallerFn.hasPersonalityFn()
2638 if (isFuncletEHPersonality(Pers))
2639 Mask = RegInfo->getNoPreservedMask();
2640 }
2641
2642 // Define a new register mask from the existing mask.
2643 uint32_t *RegMask = nullptr;
2644
2645 // In some calling conventions we need to remove the used physical registers
2646 // from the reg mask. Create a new RegMask for such calling conventions.
2647 // RegMask for calling conventions that disable only return registers (e.g.
2648 // preserve_most) will be modified later in LowerCallResult.
2649 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2650 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2651 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2652
2653 // Allocate a new Reg Mask and copy Mask.
2654 RegMask = MF.allocateRegMask();
2655 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2656 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2657
2658 // Make sure all sub registers of the argument registers are reset
2659 // in the RegMask.
2660 if (ShouldDisableArgRegs) {
2661 for (auto const &RegPair : RegsToPass)
2662 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2663 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2664 }
2665
2666 // Create the RegMask Operand according to our updated mask.
2667 Ops.push_back(DAG.getRegisterMask(RegMask));
2668 } else {
2669 // Create the RegMask Operand according to the static mask.
2670 Ops.push_back(DAG.getRegisterMask(Mask));
2671 }
2672
2673 if (InGlue.getNode())
2674 Ops.push_back(InGlue);
2675
2676 if (isTailCall) {
2677 // We used to do:
2678 //// If this is the first return lowered for this function, add the regs
2679 //// to the liveout set for the function.
2680 // This isn't right, although it's probably harmless on x86; liveouts
2681 // should be computed from returns not tail calls. Consider a void
2682 // function making a tail call to a function returning int.
2684 auto Opcode =
2686 SDValue Ret = DAG.getNode(Opcode, dl, MVT::Other, Ops);
2687
2688 if (IsCFICall)
2689 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2690
2691 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2692 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2693 return Ret;
2694 }
2695
2696 // Returns a chain & a glue for retval copy to use.
2697 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2698 if (IsImpCall) {
2699 Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2700 } else if (IsNoTrackIndirectCall) {
2701 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2702 } else if (IsCFGuardCall) {
2703 Chain = DAG.getNode(X86ISD::CALL_GLOBALADDR, dl, NodeTys, Ops);
2704 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2705 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2706 // expanded to the call, directly followed by a special marker sequence and
2707 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2708 assert(!isTailCall &&
2709 "tail calls cannot be marked with clang.arc.attachedcall");
2710 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2711
2712 // Add a target global address for the retainRV/claimRV runtime function
2713 // just before the call target.
2715 auto PtrVT = getPointerTy(DAG.getDataLayout());
2716 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2717 Ops.insert(Ops.begin() + 1, GA);
2718 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2719 } else {
2720 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2721 }
2722
2723 if (IsCFICall)
2724 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2725
2726 InGlue = Chain.getValue(1);
2727 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2728 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2729
2730 // Save heapallocsite metadata.
2731 if (CLI.CB)
2732 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2733 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2734
2735 // Create the CALLSEQ_END node.
2736 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2737 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2739 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2740 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2741 // If this call passes a struct-return pointer, the callee
2742 // pops that struct pointer.
2743 NumBytesForCalleeToPop = 4;
2744
2745 // Returns a glue for retval copy to use.
2746 if (!IsSibcall) {
2747 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2748 InGlue, dl);
2749 InGlue = Chain.getValue(1);
2750 }
2751
2752 if (CallingConv::PreserveNone == CallConv)
2753 for (const ISD::OutputArg &Out : Outs) {
2754 if (Out.Flags.isSwiftSelf() || Out.Flags.isSwiftAsync() ||
2755 Out.Flags.isSwiftError()) {
2756 errorUnsupported(DAG, dl,
2757 "Swift attributes can't be used with preserve_none");
2758 break;
2759 }
2760 }
2761
2762 // Handle result values, copying them out of physregs into vregs that we
2763 // return.
2764 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2765 InVals, RegMask);
2766}
2767
2768//===----------------------------------------------------------------------===//
2769// Fast Calling Convention (tail call) implementation
2770//===----------------------------------------------------------------------===//
2771
2772// Like std call, callee cleans arguments, convention except that ECX is
2773// reserved for storing the tail called function address. Only 2 registers are
2774// free for argument passing (inreg). Tail call optimization is performed
2775// provided:
2776// * tailcallopt is enabled
2777// * caller/callee are fastcc
2778// On X86_64 architecture with GOT-style position independent code only local
2779// (within module) calls are supported at the moment.
2780// To keep the stack aligned according to platform abi the function
2781// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2782// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2783// If a tail called function callee has more arguments than the caller the
2784// caller needs to make sure that there is room to move the RETADDR to. This is
2785// achieved by reserving an area the size of the argument delta right after the
2786// original RETADDR, but before the saved framepointer or the spilled registers
2787// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2788// stack layout:
2789// arg1
2790// arg2
2791// RETADDR
2792// [ new RETADDR
2793// move area ]
2794// (possible EBP)
2795// ESI
2796// EDI
2797// local1 ..
2798
2799/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2800/// requirement.
2801unsigned
2802X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2803 SelectionDAG &DAG) const {
2804 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2805 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2806 assert(StackSize % SlotSize == 0 &&
2807 "StackSize must be a multiple of SlotSize");
2808 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2809}
2810
2811/// Return true if the given stack call argument is already available in the
2812/// same position (relatively) of the caller's incoming argument stack.
2813static
2816 const X86InstrInfo *TII, const CCValAssign &VA) {
2817 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2818
2819 for (;;) {
2820 // Look through nodes that don't alter the bits of the incoming value.
2821 unsigned Op = Arg.getOpcode();
2822 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2823 Op == ISD::AssertZext) {
2824 Arg = Arg.getOperand(0);
2825 continue;
2826 }
2827 if (Op == ISD::TRUNCATE) {
2828 const SDValue &TruncInput = Arg.getOperand(0);
2829 if (TruncInput.getOpcode() == ISD::AssertZext &&
2830 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2831 Arg.getValueType()) {
2832 Arg = TruncInput.getOperand(0);
2833 continue;
2834 }
2835 }
2836 break;
2837 }
2838
2839 int FI = INT_MAX;
2840 if (Arg.getOpcode() == ISD::CopyFromReg) {
2841 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2842 if (!VR.isVirtual())
2843 return false;
2844 MachineInstr *Def = MRI->getVRegDef(VR);
2845 if (!Def)
2846 return false;
2847 if (!Flags.isByVal()) {
2848 if (!TII->isLoadFromStackSlot(*Def, FI))
2849 return false;
2850 } else {
2851 unsigned Opcode = Def->getOpcode();
2852 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2853 Opcode == X86::LEA64_32r) &&
2854 Def->getOperand(1).isFI()) {
2855 FI = Def->getOperand(1).getIndex();
2856 Bytes = Flags.getByValSize();
2857 } else
2858 return false;
2859 }
2860 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2861 if (Flags.isByVal())
2862 // ByVal argument is passed in as a pointer but it's now being
2863 // dereferenced. e.g.
2864 // define @foo(%struct.X* %A) {
2865 // tail call @bar(%struct.X* byval %A)
2866 // }
2867 return false;
2868 SDValue Ptr = Ld->getBasePtr();
2870 if (!FINode)
2871 return false;
2872 FI = FINode->getIndex();
2873 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2875 FI = FINode->getIndex();
2876 Bytes = Flags.getByValSize();
2877 } else
2878 return false;
2879
2880 assert(FI != INT_MAX);
2881 if (!MFI.isFixedObjectIndex(FI))
2882 return false;
2883
2884 if (Offset != MFI.getObjectOffset(FI))
2885 return false;
2886
2887 // If this is not byval, check that the argument stack object is immutable.
2888 // inalloca and argument copy elision can create mutable argument stack
2889 // objects. Byval objects can be mutated, but a byval call intends to pass the
2890 // mutated memory.
2891 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2892 return false;
2893
2894 if (VA.getLocVT().getFixedSizeInBits() >
2896 // If the argument location is wider than the argument type, check that any
2897 // extension flags match.
2898 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2899 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2900 return false;
2901 }
2902 }
2903
2904 return Bytes == MFI.getObjectSize(FI);
2905}
2906
2907static bool
2909 Register CallerSRetReg) {
2910 const auto &Outs = CLI.Outs;
2911 const auto &OutVals = CLI.OutVals;
2912
2913 // We know the caller has a sret pointer argument (CallerSRetReg). Locate the
2914 // operand index within the callee that may have a sret pointer too.
2915 unsigned Pos = 0;
2916 for (unsigned E = Outs.size(); Pos != E; ++Pos)
2917 if (Outs[Pos].Flags.isSRet())
2918 break;
2919 // Bail out if the callee has not any sret argument.
2920 if (Pos == Outs.size())
2921 return false;
2922
2923 // At this point, either the caller is forwarding its sret argument to the
2924 // callee, or the callee is being passed a different sret pointer. We now look
2925 // for a CopyToReg, where the callee sret argument is written into a new vreg
2926 // (which should later be %rax/%eax, if this is returned).
2927 SDValue SRetArgVal = OutVals[Pos];
2928 for (SDNode *User : SRetArgVal->users()) {
2929 if (User->getOpcode() != ISD::CopyToReg)
2930 continue;
2932 if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
2933 return true;
2934 }
2935
2936 return false;
2937}
2938
2939/// Check whether the call is eligible for sibling call optimization. Sibling
2940/// calls are loosely defined to be simple, profitable tail calls that only
2941/// require adjusting register parameters. We do not speculatively to optimize
2942/// complex calls that require lots of argument memory operations that may
2943/// alias.
2944///
2945/// Note that LLVM supports multiple ways, such as musttail, to force tail call
2946/// emission. Returning false from this function will not prevent tail call
2947/// emission in all cases.
2948bool X86TargetLowering::isEligibleForSiblingCallOpt(
2950 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2951 SelectionDAG &DAG = CLI.DAG;
2952 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2953 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2954 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2955 SDValue Callee = CLI.Callee;
2956 CallingConv::ID CalleeCC = CLI.CallConv;
2957 bool isVarArg = CLI.IsVarArg;
2958
2959 if (!mayTailCallThisCC(CalleeCC))
2960 return false;
2961
2962 // If -tailcallopt is specified, make fastcc functions tail-callable.
2963 MachineFunction &MF = DAG.getMachineFunction();
2964 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2965 const Function &CallerF = MF.getFunction();
2966
2967 // If the function return type is x86_fp80 and the callee return type is not,
2968 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2969 // perform a tailcall optimization here.
2970 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2971 return false;
2972
2973 // Win64 functions have extra shadow space for argument homing. Don't do the
2974 // sibcall if the caller and callee have mismatched expectations for this
2975 // space.
2976 CallingConv::ID CallerCC = CallerF.getCallingConv();
2977 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2978 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2979 if (IsCalleeWin64 != IsCallerWin64)
2980 return false;
2981
2982 // If we are using a GOT, don't generate sibling calls to non-local,
2983 // default-visibility symbols. Tail calling such a symbol requires using a GOT
2984 // relocation, which forces early binding of the symbol. This breaks code that
2985 // require lazy function symbol resolution. Using musttail or
2986 // GuaranteedTailCallOpt will override this.
2987 if (Subtarget.isPICStyleGOT()) {
2988 if (isa<ExternalSymbolSDNode>(Callee))
2989 return false;
2990 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2991 if (!G->getGlobal()->hasLocalLinkage() &&
2992 G->getGlobal()->hasDefaultVisibility())
2993 return false;
2994 }
2995 }
2996
2997 // Look for obvious safe cases to perform tail call optimization that do not
2998 // require ABI changes. This is what gcc calls sibcall.
2999
3000 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
3001 // emit a special epilogue.
3002 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
3003 if (RegInfo->hasStackRealignment(MF))
3004 return false;
3005
3006 // Avoid sibcall optimization if we are an sret return function and the callee
3007 // is incompatible, unless such premises are proven wrong. See comment in
3008 // LowerReturn about why hasStructRetAttr is insufficient.
3009 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3010 // For a compatible tail call the callee must return our sret pointer. So it
3011 // needs to be (a) an sret function itself and (b) we pass our sret as its
3012 // sret. Condition #b is harder to determine.
3013 if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
3014 return false;
3015 } else if (IsCalleePopSRet)
3016 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
3017 // expect that.
3018 return false;
3019
3020 // Do not sibcall optimize vararg calls unless all arguments are passed via
3021 // registers.
3022 LLVMContext &C = *DAG.getContext();
3023 if (isVarArg && !Outs.empty()) {
3024 // Optimizing for varargs on Win64 is unlikely to be safe without
3025 // additional testing.
3026 if (IsCalleeWin64 || IsCallerWin64)
3027 return false;
3028
3029 for (const auto &VA : ArgLocs)
3030 if (!VA.isRegLoc())
3031 return false;
3032 }
3033
3034 // If the call result is in ST0 / ST1, it needs to be popped off the x87
3035 // stack. Therefore, if it's not used by the call it is not safe to optimize
3036 // this into a sibcall.
3037 bool Unused = false;
3038 for (const auto &In : Ins) {
3039 if (!In.Used) {
3040 Unused = true;
3041 break;
3042 }
3043 }
3044 if (Unused) {
3046 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
3047 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3048 for (const auto &VA : RVLocs) {
3049 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
3050 return false;
3051 }
3052 }
3053
3054 // Check that the call results are passed in the same way.
3055 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3057 return false;
3058 // The callee has to preserve all registers the caller needs to preserve.
3059 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3060 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3061 if (CallerCC != CalleeCC) {
3062 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3063 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3064 return false;
3065 }
3066
3067 // The stack frame of the caller cannot be replaced by the tail-callee one's
3068 // if the function is required to preserve all the registers. Conservatively
3069 // prevent tail optimization even if hypothetically all the registers are used
3070 // for passing formal parameters or returning values.
3071 if (CallerF.hasFnAttribute("no_caller_saved_registers"))
3072 return false;
3073
3074 unsigned StackArgsSize = CCInfo.getStackSize();
3075
3076 // If the callee takes no arguments then go on to check the results of the
3077 // call.
3078 if (!Outs.empty()) {
3079 if (StackArgsSize > 0) {
3080 // Check if the arguments are already laid out in the right way as
3081 // the caller's fixed stack objects.
3082 MachineFrameInfo &MFI = MF.getFrameInfo();
3083 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3084 const X86InstrInfo *TII = Subtarget.getInstrInfo();
3085 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
3086 const CCValAssign &VA = ArgLocs[I];
3087 SDValue Arg = OutVals[I];
3088 ISD::ArgFlagsTy Flags = Outs[I].Flags;
3090 return false;
3091 if (!VA.isRegLoc()) {
3092 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
3093 TII, VA))
3094 return false;
3095 }
3096 }
3097 }
3098
3099 bool PositionIndependent = isPositionIndependent();
3100 // If the tailcall address may be in a register, then make sure it's
3101 // possible to register allocate for it. In 32-bit, the call address can
3102 // only target EAX, EDX, or ECX since the tail call must be scheduled after
3103 // callee-saved registers are restored. These happen to be the same
3104 // registers used to pass 'inreg' arguments so watch out for those.
3105 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
3106 !isa<ExternalSymbolSDNode>(Callee)) ||
3107 PositionIndependent)) {
3108 unsigned NumInRegs = 0;
3109 // In PIC we need an extra register to formulate the address computation
3110 // for the callee.
3111 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
3112
3113 for (const auto &VA : ArgLocs) {
3114 if (!VA.isRegLoc())
3115 continue;
3116 Register Reg = VA.getLocReg();
3117 switch (Reg) {
3118 default: break;
3119 case X86::EAX: case X86::EDX: case X86::ECX:
3120 if (++NumInRegs == MaxInRegs)
3121 return false;
3122 break;
3123 }
3124 }
3125 }
3126
3127 const MachineRegisterInfo &MRI = MF.getRegInfo();
3128 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3129 return false;
3130 }
3131
3132 bool CalleeWillPop =
3133 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
3135
3136 if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
3137 // If we have bytes to pop, the callee must pop them.
3138 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
3139 if (!CalleePopMatches)
3140 return false;
3141 } else if (CalleeWillPop && StackArgsSize > 0) {
3142 // If we don't have bytes to pop, make sure the callee doesn't pop any.
3143 return false;
3144 }
3145
3146 return true;
3147}
3148
3149/// Determines whether the callee is required to pop its own arguments.
3150/// Callee pop is necessary to support tail calls.
3152 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
3153 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
3154 // can guarantee TCO.
3155 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
3156 return true;
3157
3158 switch (CallingConv) {
3159 default:
3160 return false;
3165 return !is64Bit;
3166 }
3167}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
return SDValue()
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const M68kInstrInfo *TII, const CCValAssign &VA)
Return true if the given stack call argument is already available in the same position (relatively) o...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain, const SDLoc &dl, Register Reg, EVT VT, SDValue Glue)
static bool mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI, Register CallerSRetReg)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
CCState - This class holds information needed while lowering arguments and return values.
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
void convertToReg(MCRegister Reg)
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:270
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:903
Constant * getPersonalityFn() const
Get the personality function associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:730
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
LLVMContext & getContext() const
Definition IRBuilder.h:203
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
LLVM_ABI void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
Class to represent struct types.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
RTLIB::LibcallImpl getLibcallImpl(RTLIB::Libcall Call) const
Get the libcall impl routine name for the specified libcall.
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual ArrayRef< MCPhysReg > getRoundingControlRegisters() const
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
unsigned EmitCallGraphSection
Emit section containing call graph metadata.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isAndroid() const
Tests whether the target is Android.
Definition Triple.h:847
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition Triple.h:743
bool isMusl() const
Tests whether the environment is musl-libc.
Definition Triple.h:862
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition Triple.h:775
bool isOSFuchsia() const
Definition Triple.h:663
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:159
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:280
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
const uint32_t * getNoPreservedMask() const override
bool hasSSE1() const
bool isTargetMCU() const
const Triple & getTargetTriple() const
bool useAVX512Regs() const
bool isCallingConvWin64(CallingConv::ID CC) const
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
bool useSoftFloat() const override
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
SDValue unwrapAddress(SDValue N) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:852
@ GlobalAddress
Definition ISDOpcodes.h:88
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:992
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:843
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:664
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:849
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:977
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:958
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:855
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ RET_GLUE
Return with a glue operand.
@ IRET
Return from interrupt. Operand 0 is the number of bytes to pop.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ TC_RETURN
Tail call return.
@ NT_CALL
Same as call except it adds the NoTrack prefix.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
@ POP_FROM_X87_REG
The same as ISD::CopyFromReg except that this node makes it explicit that it may lower to an x87 FPU ...
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isCFGuardCall(const CallBase *CB)
Definition CFGuard.cpp:314
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isCFGuardFunction(const GlobalValue *GV)
Definition CFGuard.cpp:319
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2198
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1968
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr Align Constant()
Allow constructions of constexpr Align.
Definition Alignment.h:88
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:395
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:74
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:300
ElementCount getVectorElementCount() const
Definition ValueTypes.h:350
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:373
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:102
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:316
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:207
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition ValueTypes.h:217
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:168
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:212
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:328
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:336
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
Type * RetTy
Same as OrigRetTy, or partially legalized for soft float libcalls.