LLVM 23.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
15#include "X86.h"
16#include "X86CallingConv.h"
17#include "X86FrameLowering.h"
18#include "X86ISelLowering.h"
19#include "X86InstrBuilder.h"
21#include "X86TargetMachine.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Module.h"
31
32#define DEBUG_TYPE "x86-isel"
33
34using namespace llvm;
35
36STATISTIC(NumTailCalls, "Number of tail calls");
37
38/// Call this when the user attempts to do something unsupported, like
39/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
40/// report_fatal_error, so calling code should attempt to recover without
41/// crashing.
42static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
43 const char *Msg) {
45 DAG.getContext()->diagnose(
47}
48
49/// Returns true if a CC can dynamically exclude a register from the list of
50/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
51/// the return registers.
53 switch (CC) {
54 default:
55 return false;
59 return true;
60 }
61}
62
63/// Returns true if a CC can dynamically exclude a register from the list of
64/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
65/// the parameters.
69
70static std::pair<MVT, unsigned>
72 const X86Subtarget &Subtarget) {
73 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
74 // convention is one that uses k registers.
75 if (NumElts == 2)
76 return {MVT::v2i64, 1};
77 if (NumElts == 4)
78 return {MVT::v4i32, 1};
79 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
81 return {MVT::v8i16, 1};
82 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
84 return {MVT::v16i8, 1};
85 // v32i1 passes in ymm unless we have BWI and the calling convention is
86 // regcall.
87 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
88 return {MVT::v32i8, 1};
89 // Split v64i1 vectors if we don't have v64i8 available.
90 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
91 if (Subtarget.useAVX512Regs())
92 return {MVT::v64i8, 1};
93 return {MVT::v32i8, 2};
94 }
95
96 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
97 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
98 NumElts > 64)
99 return {MVT::i8, NumElts};
100
102}
103
106 EVT VT) const {
107 if (VT.isVector()) {
108 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
109 unsigned NumElts = VT.getVectorNumElements();
110
111 MVT RegisterVT;
112 unsigned NumRegisters;
113 std::tie(RegisterVT, NumRegisters) =
114 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
115 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
116 return RegisterVT;
117 }
118
119 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
120 return MVT::v8f16;
121 }
122
123 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
124 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
125 !Subtarget.hasX87())
126 return MVT::i32;
127
128 if (isTypeLegal(MVT::f16)) {
129 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
131 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
132
133 if (VT == MVT::bf16)
134 return MVT::f16;
135 }
136
137 return TargetLowering::getRegisterTypeForCallingConv(Context, CC, VT);
138}
139
142 EVT VT) const {
143 if (VT.isVector()) {
144 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
145 unsigned NumElts = VT.getVectorNumElements();
146
147 MVT RegisterVT;
148 unsigned NumRegisters;
149 std::tie(RegisterVT, NumRegisters) =
150 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
151 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
152 return NumRegisters;
153 }
154
155 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
156 return 1;
157 }
158
159 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
160 // x87 is disabled.
161 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
162 if (VT == MVT::f64)
163 return 2;
164 if (VT == MVT::f80)
165 return 3;
166 }
167
168 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
169 isTypeLegal(MVT::f16))
171 Context, CC, VT.changeVectorElementType(Context, MVT::f16));
172
173 return TargetLowering::getNumRegistersForCallingConv(Context, CC, VT);
174}
175
177 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
178 unsigned &NumIntermediates, MVT &RegisterVT) const {
179 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
180 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
181 Subtarget.hasAVX512() &&
183 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
184 VT.getVectorNumElements() > 64)) {
185 RegisterVT = MVT::i8;
186 IntermediateVT = MVT::i1;
187 NumIntermediates = VT.getVectorNumElements();
188 return NumIntermediates;
189 }
190
191 // Split v64i1 vectors if we don't have v64i8 available.
192 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
194 RegisterVT = MVT::v32i8;
195 IntermediateVT = MVT::v32i1;
196 NumIntermediates = 2;
197 return 2;
198 }
199
200 // Split vNbf16 vectors according to vNf16.
201 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
202 isTypeLegal(MVT::f16))
203 VT = VT.changeVectorElementType(Context, MVT::f16);
204
205 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
206 NumIntermediates, RegisterVT);
207}
208
210 LLVMContext& Context,
211 EVT VT) const {
212 if (!VT.isVector())
213 return MVT::i8;
214
215 if (Subtarget.hasAVX512()) {
216 // Figure out what this type will be legalized to.
217 EVT LegalVT = VT;
218 while (getTypeAction(Context, LegalVT) != TypeLegal)
219 LegalVT = getTypeToTransformTo(Context, LegalVT);
220
221 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
222 if (LegalVT.getSimpleVT().is512BitVector())
223 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
224
225 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
226 // If we legalized to less than a 512-bit vector, then we will use a vXi1
227 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
228 // vXi16/vXi8.
229 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
230 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
231 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
232 }
233 }
234
236}
237
239 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
240 const DataLayout &DL) const {
241 // On x86-64 i128 is split into two i64s and needs to be allocated to two
242 // consecutive registers, or spilled to the stack as a whole. On x86-32 i128
243 // is split to four i32s and never actually passed in registers, but we use
244 // the consecutive register mark to match it in TableGen.
245 if (Ty->isIntegerTy(128))
246 return true;
247
248 // On x86-32, fp128 acts the same as i128.
249 if (Subtarget.is32Bit() && Ty->isFP128Ty())
250 return true;
251
252 return false;
253}
254
255/// Helper for getByValTypeAlignment to determine
256/// the desired ByVal argument alignment.
257static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
258 if (MaxAlign == 16)
259 return;
260 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
261 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
262 MaxAlign = Align(16);
263 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
264 Align EltAlign;
265 getMaxByValAlign(ATy->getElementType(), EltAlign);
266 if (EltAlign > MaxAlign)
267 MaxAlign = EltAlign;
268 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
269 for (auto *EltTy : STy->elements()) {
270 Align EltAlign;
271 getMaxByValAlign(EltTy, EltAlign);
272 if (EltAlign > MaxAlign)
273 MaxAlign = EltAlign;
274 if (MaxAlign == 16)
275 break;
276 }
277 }
278}
279
280/// Return the desired alignment for ByVal aggregate
281/// function arguments in the caller parameter area. For X86, aggregates
282/// that contain SSE vectors are placed at 16-byte boundaries while the rest
283/// are at 4-byte boundaries.
285 const DataLayout &DL) const {
286 if (Subtarget.is64Bit())
287 return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
288
289 Align Alignment(4);
290 if (Subtarget.hasSSE1())
291 getMaxByValAlign(Ty, Alignment);
292 return Alignment;
293}
294
295/// It returns EVT::Other if the type should be determined using generic
296/// target-independent logic.
297/// For vector ops we check that the overall size isn't larger than our
298/// preferred vector width.
300 LLVMContext &Context, const MemOp &Op,
301 const AttributeList &FuncAttributes) const {
302 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
303 if (Op.size() >= 16 &&
304 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
305 // FIXME: Check if unaligned 64-byte accesses are slow.
306 if (Op.size() >= 64 && Subtarget.hasAVX512() &&
307 (Subtarget.getPreferVectorWidth() >= 512)) {
308 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
309 }
310 // FIXME: Check if unaligned 32-byte accesses are slow.
311 if (Op.size() >= 32 && Subtarget.hasAVX() &&
312 Subtarget.useLight256BitInstructions()) {
313 // Although this isn't a well-supported type for AVX1, we'll let
314 // legalization and shuffle lowering produce the optimal codegen. If we
315 // choose an optimal type with a vector element larger than a byte,
316 // getMemsetStores() may create an intermediate splat (using an integer
317 // multiply) before we splat as a vector.
318 return MVT::v32i8;
319 }
320 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
321 return MVT::v16i8;
322 // TODO: Can SSE1 handle a byte vector?
323 // If we have SSE1 registers we should be able to use them.
324 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
325 (Subtarget.getPreferVectorWidth() >= 128))
326 return MVT::v4f32;
327 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
328 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
329 // Do not use f64 to lower memcpy if source is string constant. It's
330 // better to use i32 to avoid the loads.
331 // Also, do not use f64 to lower memset unless this is a memset of zeros.
332 // The gymnastics of splatting a byte value into an XMM register and then
333 // only using 8-byte stores (because this is a CPU with slow unaligned
334 // 16-byte accesses) makes that a loser.
335 return MVT::f64;
336 }
337 }
338 // This is a compromise. If we reach here, unaligned accesses may be slow on
339 // this target. However, creating smaller, aligned accesses could be even
340 // slower and would certainly be a lot more code.
341 if (Subtarget.is64Bit() && Op.size() >= 8)
342 return MVT::i64;
343 return MVT::i32;
344}
345
347 if (VT == MVT::f32)
348 return Subtarget.hasSSE1();
349 if (VT == MVT::f64)
350 return Subtarget.hasSSE2();
351 return true;
352}
353
354static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
355 return (8 * Alignment.value()) % SizeInBits == 0;
356}
357
359 if (isBitAligned(Alignment, VT.getSizeInBits()))
360 return true;
361 switch (VT.getSizeInBits()) {
362 default:
363 // 8-byte and under are always assumed to be fast.
364 return true;
365 case 128:
366 return !Subtarget.isUnalignedMem16Slow();
367 case 256:
368 return !Subtarget.isUnalignedMem32Slow();
369 // TODO: What about AVX-512 (512-bit) accesses?
370 }
371}
372
374 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
375 unsigned *Fast) const {
376 if (Fast)
377 *Fast = isMemoryAccessFast(VT, Alignment);
378 // NonTemporal vector memory ops must be aligned.
379 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
380 // NT loads can only be vector aligned, so if its less aligned than the
381 // minimum vector size (which we can split the vector down to), we might as
382 // well use a regular unaligned vector load.
383 // We don't have any NT loads pre-SSE41.
384 if (!!(Flags & MachineMemOperand::MOLoad))
385 return (Alignment < 16 || !Subtarget.hasSSE41());
386 return false;
387 }
388 // Misaligned accesses of any size are always allowed.
389 return true;
390}
391
393 const DataLayout &DL, EVT VT,
394 unsigned AddrSpace, Align Alignment,
396 unsigned *Fast) const {
397 if (Fast)
398 *Fast = isMemoryAccessFast(VT, Alignment);
399 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
400 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
401 /*Fast=*/nullptr))
402 return true;
403 // NonTemporal vector memory ops are special, and must be aligned.
404 if (!isBitAligned(Alignment, VT.getSizeInBits()))
405 return false;
406 switch (VT.getSizeInBits()) {
407 case 128:
408 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
409 return true;
410 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
411 return true;
412 return false;
413 case 256:
414 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
415 return true;
416 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
417 return true;
418 return false;
419 case 512:
420 if (Subtarget.hasAVX512())
421 return true;
422 return false;
423 default:
424 return false; // Don't have NonTemporal vector memory ops of this size.
425 }
426 }
427 return true;
428}
429
430/// Return the entry encoding for a jump table in the
431/// current function. The returned value is a member of the
432/// MachineJumpTableInfo::JTEntryKind enum.
434 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
435 // symbol.
436 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
438 if (isPositionIndependent() &&
440 !Subtarget.isTargetCOFF())
442
443 // Otherwise, use the normal jump table encoding heuristics.
445}
446
448 return Subtarget.useSoftFloat();
449}
450
452 ArgListTy &Args) const {
453
454 // Only relabel X86-32 for C / Stdcall CCs.
455 if (Subtarget.is64Bit())
456 return;
457 if (CC != CallingConv::C && CC != CallingConv::X86_StdCall)
458 return;
459 unsigned ParamRegs = 0;
460 if (auto *M = MF->getFunction().getParent())
461 ParamRegs = M->getNumberRegisterParameters();
462
463 // Mark the first N int arguments as having reg
464 for (auto &Arg : Args) {
465 Type *T = Arg.Ty;
466 if (T->isIntOrPtrTy())
467 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
468 unsigned numRegs = 1;
469 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
470 numRegs = 2;
471 if (ParamRegs < numRegs)
472 return;
473 ParamRegs -= numRegs;
474 Arg.IsInReg = true;
475 }
476 }
477}
478
479const MCExpr *
481 const MachineBasicBlock *MBB,
482 unsigned uid,MCContext &Ctx) const{
483 assert(isPositionIndependent() && Subtarget.isPICStyleGOT());
484 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
485 // entries.
486 return MCSymbolRefExpr::create(MBB->getSymbol(), X86::S_GOTOFF, Ctx);
487}
488
489/// Returns relocation base for the given PIC jumptable.
491 SelectionDAG &DAG) const {
492 if (!Subtarget.is64Bit())
493 // This doesn't have SDLoc associated with it, but is not really the
494 // same as a Register.
495 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
497 return Table;
498}
499
500/// This returns the relocation base for the given PIC jumptable,
501/// the same as getPICJumpTableRelocBase, but as an MCExpr.
503getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
504 MCContext &Ctx) const {
505 // X86-64 uses RIP relative addressing based on the jump table label.
506 if (Subtarget.isPICStyleRIPRel() ||
507 (Subtarget.is64Bit() &&
510
511 // Otherwise, the reference is relative to the PIC base.
512 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
513}
514
515std::pair<const TargetRegisterClass *, uint8_t>
517 MVT VT) const {
518 const TargetRegisterClass *RRC = nullptr;
519 uint8_t Cost = 1;
520 switch (VT.SimpleTy) {
521 default:
523 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
524 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
525 break;
526 case MVT::x86mmx:
527 RRC = &X86::VR64RegClass;
528 break;
529 case MVT::f32: case MVT::f64:
530 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
531 case MVT::v4f32: case MVT::v2f64:
532 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
533 case MVT::v8f32: case MVT::v4f64:
534 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
535 case MVT::v16f32: case MVT::v8f64:
536 RRC = &X86::VR128XRegClass;
537 break;
538 }
539 return std::make_pair(RRC, Cost);
540}
541
542unsigned X86TargetLowering::getAddressSpace() const {
543 if (Subtarget.is64Bit())
545 : X86AS::FS;
546 return X86AS::GS;
547}
548
549static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
550 return TargetTriple.isOSGlibc() || TargetTriple.isMusl() ||
551 TargetTriple.isOSFuchsia() || TargetTriple.isAndroid();
552}
553
560
561Value *
563 const LibcallLoweringInfo &Libcalls) const {
564 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
565 // tcbhead_t; use it instead of the usual global variable (see
566 // sysdeps/{i386,x86_64}/nptl/tls.h)
567 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
568 unsigned AddressSpace = getAddressSpace();
569
570 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
571 if (Subtarget.isTargetFuchsia())
572 return SegmentOffset(IRB, 0x10, AddressSpace);
573
574 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
575 // Specially, some users may customize the base reg and offset.
576 int Offset = M->getStackProtectorGuardOffset();
577 // If we don't set -stack-protector-guard-offset value:
578 // %fs:0x28, unless we're using a Kernel code model, in which case
579 // it's %gs:0x28. gs:0x14 on i386.
580 if (Offset == INT_MAX)
581 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
582
583 StringRef GuardReg = M->getStackProtectorGuardReg();
584 if (GuardReg == "fs")
586 else if (GuardReg == "gs")
588
589 // Use symbol guard if user specify.
590 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
591 if (!GuardSymb.empty()) {
592 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
593 if (!GV) {
594 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
595 : Type::getInt32Ty(M->getContext());
596 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
597 nullptr, GuardSymb, nullptr,
599 if (!Subtarget.isTargetDarwin())
600 GV->setDSOLocal(M->getDirectAccessExternalData());
601 }
602 return GV;
603 }
604
605 return SegmentOffset(IRB, Offset, AddressSpace);
606 }
607 return TargetLowering::getIRStackGuard(IRB, Libcalls);
608}
609
611 Module &M, const LibcallLoweringInfo &Libcalls) const {
612 // MSVC CRT provides functionalities for stack protection.
613 RTLIB::LibcallImpl SecurityCheckCookieLibcall =
614 Libcalls.getLibcallImpl(RTLIB::SECURITY_CHECK_COOKIE);
615
616 RTLIB::LibcallImpl SecurityCookieVar =
617 Libcalls.getLibcallImpl(RTLIB::STACK_CHECK_GUARD);
618 if (SecurityCheckCookieLibcall != RTLIB::Unsupported &&
619 SecurityCookieVar != RTLIB::Unsupported) {
620 // MSVC CRT provides functionalities for stack protection.
621 // MSVC CRT has a global variable holding security cookie.
622 M.getOrInsertGlobal(getLibcallImplName(SecurityCookieVar),
623 PointerType::getUnqual(M.getContext()));
624
625 // MSVC CRT has a function to validate security cookie.
626 FunctionCallee SecurityCheckCookie =
627 M.getOrInsertFunction(getLibcallImplName(SecurityCheckCookieLibcall),
628 Type::getVoidTy(M.getContext()),
629 PointerType::getUnqual(M.getContext()));
630
631 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
632 F->setCallingConv(CallingConv::X86_FastCall);
633 F->addParamAttr(0, Attribute::AttrKind::InReg);
634 }
635 return;
636 }
637
638 StringRef GuardMode = M.getStackProtectorGuard();
639
640 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
641 if ((GuardMode == "tls" || GuardMode.empty()) &&
642 hasStackGuardSlotTLS(Subtarget.getTargetTriple()))
643 return;
645}
646
648 IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const {
649 // Android provides a fixed TLS slot for the SafeStack pointer. See the
650 // definition of TLS_SLOT_SAFESTACK in
651 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
652 if (Subtarget.isTargetAndroid()) {
653 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
654 // %gs:0x24 on i386
655 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
656 return SegmentOffset(IRB, Offset, getAddressSpace());
657 }
658
659 // Fuchsia is similar.
660 if (Subtarget.isTargetFuchsia()) {
661 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
662 return SegmentOffset(IRB, 0x18, getAddressSpace());
663 }
664
666}
667
668//===----------------------------------------------------------------------===//
669// Return Value Calling Convention Implementation
670//===----------------------------------------------------------------------===//
671
672bool X86TargetLowering::CanLowerReturn(
673 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
674 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
675 const Type *RetTy) const {
677 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
678 return CCInfo.CheckReturn(Outs, RetCC_X86);
679}
680
681const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
682 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
683 return ScratchRegs;
684}
685
687 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
688 return RCRegs;
689}
690
691/// Lowers masks values (v*i1) to the local register values
692/// \returns DAG node after lowering to register type
693static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
694 const SDLoc &DL, SelectionDAG &DAG) {
695 EVT ValVT = ValArg.getValueType();
696
697 if (ValVT == MVT::v1i1)
698 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
699 DAG.getIntPtrConstant(0, DL));
700
701 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
702 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
703 // Two stage lowering might be required
704 // bitcast: v8i1 -> i8 / v16i1 -> i16
705 // anyextend: i8 -> i32 / i16 -> i32
706 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
707 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
708 if (ValLoc == MVT::i32)
709 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
710 return ValToCopy;
711 }
712
713 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
714 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
715 // One stage lowering is required
716 // bitcast: v32i1 -> i32 / v64i1 -> i64
717 return DAG.getBitcast(ValLoc, ValArg);
718 }
719
720 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
721}
722
723/// Breaks v64i1 value into two registers and adds the new node to the DAG
725 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
726 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
727 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
728 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
729 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
730 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
731 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
732 "The value should reside in two registers");
733
734 // Before splitting the value we cast it to i64
735 Arg = DAG.getBitcast(MVT::i64, Arg);
736
737 // Splitting the value into two i32 types
738 SDValue Lo, Hi;
739 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
740
741 // Attach the two i32 types into corresponding registers
742 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
743 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
744}
745
747X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
748 bool isVarArg,
750 const SmallVectorImpl<SDValue> &OutVals,
751 const SDLoc &dl, SelectionDAG &DAG) const {
752 MachineFunction &MF = DAG.getMachineFunction();
753 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
754
755 // In some cases we need to disable registers from the default CSR list.
756 // For example, when they are used as return registers (preserve_* and X86's
757 // regcall) or for argument passing (X86's regcall).
758 bool ShouldDisableCalleeSavedRegister =
759 shouldDisableRetRegFromCSR(CallConv) ||
760 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
761
762 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
763 report_fatal_error("X86 interrupts may not return any value");
764
766 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
767 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
768
770 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
771 ++I, ++OutsIndex) {
772 CCValAssign &VA = RVLocs[I];
773 assert(VA.isRegLoc() && "Can only return in registers!");
774
775 // Add the register to the CalleeSaveDisableRegs list.
776 if (ShouldDisableCalleeSavedRegister)
778
779 SDValue ValToCopy = OutVals[OutsIndex];
780 EVT ValVT = ValToCopy.getValueType();
781
782 // Promote values to the appropriate types.
783 if (VA.getLocInfo() == CCValAssign::SExt)
784 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
785 else if (VA.getLocInfo() == CCValAssign::ZExt)
786 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
787 else if (VA.getLocInfo() == CCValAssign::AExt) {
788 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
789 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
790 else
791 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
792 }
793 else if (VA.getLocInfo() == CCValAssign::BCvt)
794 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
795
797 "Unexpected FP-extend for return value.");
798
799 // Report an error if we have attempted to return a value via an XMM
800 // register and SSE was disabled.
801 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
802 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
803 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
804 } else if (!Subtarget.hasSSE2() &&
805 X86::FR64XRegClass.contains(VA.getLocReg()) &&
806 ValVT == MVT::f64) {
807 // When returning a double via an XMM register, report an error if SSE2 is
808 // not enabled.
809 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
810 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
811 }
812
813 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
814 // the RET instruction and handled by the FP Stackifier.
815 if (VA.getLocReg() == X86::FP0 ||
816 VA.getLocReg() == X86::FP1) {
817 // If this is a copy from an xmm register to ST(0), use an FPExtend to
818 // change the value to the FP stack register class.
820 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
821 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
822 // Don't emit a copytoreg.
823 continue;
824 }
825
826 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
827 // which is returned in RAX / RDX.
828 if (Subtarget.is64Bit()) {
829 if (ValVT == MVT::x86mmx) {
830 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
831 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
832 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
833 ValToCopy);
834 // If we don't have SSE2 available, convert to v4f32 so the generated
835 // register is legal.
836 if (!Subtarget.hasSSE2())
837 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
838 }
839 }
840 }
841
842 if (VA.needsCustom()) {
843 assert(VA.getValVT() == MVT::v64i1 &&
844 "Currently the only custom case is when we split v64i1 to 2 regs");
845
846 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
847 Subtarget);
848
849 // Add the second register to the CalleeSaveDisableRegs list.
850 if (ShouldDisableCalleeSavedRegister)
851 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
852 } else {
853 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
854 }
855 }
856
857 SDValue Glue;
859 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
860 // Operand #1 = Bytes To Pop
861 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
862 MVT::i32));
863
864 // Copy the result values into the output registers.
865 for (auto &RetVal : RetVals) {
866 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
867 RetOps.push_back(RetVal.second);
868 continue; // Don't emit a copytoreg.
869 }
870
871 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
872 Glue = Chain.getValue(1);
873 RetOps.push_back(
874 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
875 }
876
877 // Swift calling convention does not require we copy the sret argument
878 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
879
880 // All x86 ABIs require that for returning structs by value we copy
881 // the sret argument into %rax/%eax (depending on ABI) for the return.
882 // We saved the argument into a virtual register in the entry block,
883 // so now we copy the value out and into %rax/%eax.
884 //
885 // Checking Function.hasStructRetAttr() here is insufficient because the IR
886 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
887 // false, then an sret argument may be implicitly inserted in the SelDAG. In
888 // either case FuncInfo->setSRetReturnReg() will have been called.
889 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
890 // When we have both sret and another return value, we should use the
891 // original Chain stored in RetOps[0], instead of the current Chain updated
892 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
893
894 // For the case of sret and another return value, we have
895 // Chain_0 at the function entry
896 // Chain_1 = getCopyToReg(Chain_0) in the above loop
897 // If we use Chain_1 in getCopyFromReg, we will have
898 // Val = getCopyFromReg(Chain_1)
899 // Chain_2 = getCopyToReg(Chain_1, Val) from below
900
901 // getCopyToReg(Chain_0) will be glued together with
902 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
903 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
904 // Data dependency from Unit B to Unit A due to usage of Val in
905 // getCopyToReg(Chain_1, Val)
906 // Chain dependency from Unit A to Unit B
907
908 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
909 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
911
912 Register RetValReg
913 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
914 X86::RAX : X86::EAX;
915 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
916 Glue = Chain.getValue(1);
917
918 // RAX/EAX now acts like a return value.
919 RetOps.push_back(
920 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
921
922 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
923 // this however for preserve_most/preserve_all to minimize the number of
924 // callee-saved registers for these CCs.
925 if (ShouldDisableCalleeSavedRegister &&
926 CallConv != CallingConv::PreserveAll &&
927 CallConv != CallingConv::PreserveMost)
929 }
930
931 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
932 const MCPhysReg *I =
933 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
934 if (I) {
935 for (; *I; ++I) {
936 if (X86::GR64RegClass.contains(*I))
937 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
938 else
939 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
940 }
941 }
942
943 RetOps[0] = Chain; // Update chain.
944
945 // Add the glue if we have it.
946 if (Glue.getNode())
947 RetOps.push_back(Glue);
948
949 unsigned RetOpcode = X86ISD::RET_GLUE;
950 if (CallConv == CallingConv::X86_INTR)
951 RetOpcode = X86ISD::IRET;
952 return DAG.getNode(RetOpcode, dl, MVT::Other, RetOps);
953}
954
955bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
956 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
957 return false;
958
959 SDValue TCChain = Chain;
960 SDNode *Copy = *N->user_begin();
961 if (Copy->getOpcode() == ISD::CopyToReg) {
962 // If the copy has a glue operand, we conservatively assume it isn't safe to
963 // perform a tail call.
964 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
965 return false;
966 TCChain = Copy->getOperand(0);
967 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
968 return false;
969
970 bool HasRet = false;
971 for (const SDNode *U : Copy->users()) {
972 if (U->getOpcode() != X86ISD::RET_GLUE)
973 return false;
974 // If we are returning more than one value, we can definitely
975 // not make a tail call see PR19530
976 if (U->getNumOperands() > 4)
977 return false;
978 if (U->getNumOperands() == 4 &&
979 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
980 return false;
981 HasRet = true;
982 }
983
984 if (!HasRet)
985 return false;
986
987 Chain = TCChain;
988 return true;
989}
990
991EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
992 ISD::NodeType ExtendKind) const {
993 MVT ReturnMVT = MVT::i32;
994
995 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
996 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
997 // The ABI does not require i1, i8 or i16 to be extended.
998 //
999 // On Darwin, there is code in the wild relying on Clang's old behaviour of
1000 // always extending i8/i16 return values, so keep doing that for now.
1001 // (PR26665).
1002 ReturnMVT = MVT::i8;
1003 }
1004
1005 EVT MinVT = getRegisterType(Context, ReturnMVT);
1006 return VT.bitsLT(MinVT) ? MinVT : VT;
1007}
1008
1009/// Reads two 32 bit registers and creates a 64 bit mask value.
1010/// \param VA The current 32 bit value that need to be assigned.
1011/// \param NextVA The next 32 bit value that need to be assigned.
1012/// \param Root The parent DAG node.
1013/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1014/// glue purposes. In the case the DAG is already using
1015/// physical register instead of virtual, we should glue
1016/// our new SDValue to InGlue SDvalue.
1017/// \return a new SDvalue of size 64bit.
1019 SDValue &Root, SelectionDAG &DAG,
1020 const SDLoc &DL, const X86Subtarget &Subtarget,
1021 SDValue *InGlue = nullptr) {
1022 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1023 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1024 assert(VA.getValVT() == MVT::v64i1 &&
1025 "Expecting first location of 64 bit width type");
1026 assert(NextVA.getValVT() == VA.getValVT() &&
1027 "The locations should have the same type");
1028 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1029 "The values should reside in two registers");
1030
1031 SDValue Lo, Hi;
1032 SDValue ArgValueLo, ArgValueHi;
1033
1035 const TargetRegisterClass *RC = &X86::GR32RegClass;
1036
1037 // Read a 32 bit value from the registers.
1038 if (nullptr == InGlue) {
1039 // When no physical register is present,
1040 // create an intermediate virtual register.
1041 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1042 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1043 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1044 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1045 } else {
1046 // When a physical register is available read the value from it and glue
1047 // the reads together.
1048 ArgValueLo =
1049 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1050 *InGlue = ArgValueLo.getValue(2);
1051 ArgValueHi =
1052 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1053 *InGlue = ArgValueHi.getValue(2);
1054 }
1055
1056 // Convert the i32 type into v32i1 type.
1057 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1058
1059 // Convert the i32 type into v32i1 type.
1060 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1061
1062 // Concatenate the two values together.
1063 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1064}
1065
1066/// The function will lower a register of various sizes (8/16/32/64)
1067/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1068/// \returns a DAG node contains the operand after lowering to mask type.
1069static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1070 const EVT &ValLoc, const SDLoc &DL,
1071 SelectionDAG &DAG) {
1072 SDValue ValReturned = ValArg;
1073
1074 if (ValVT == MVT::v1i1)
1075 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1076
1077 if (ValVT == MVT::v64i1) {
1078 // In 32 bit machine, this case is handled by getv64i1Argument
1079 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1080 // In 64 bit machine, There is no need to truncate the value only bitcast
1081 } else {
1082 MVT MaskLenVT;
1083 switch (ValVT.getSimpleVT().SimpleTy) {
1084 case MVT::v8i1:
1085 MaskLenVT = MVT::i8;
1086 break;
1087 case MVT::v16i1:
1088 MaskLenVT = MVT::i16;
1089 break;
1090 case MVT::v32i1:
1091 MaskLenVT = MVT::i32;
1092 break;
1093 default:
1094 llvm_unreachable("Expecting a vector of i1 types");
1095 }
1096
1097 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1098 }
1099 return DAG.getBitcast(ValVT, ValReturned);
1100}
1101
1103 const SDLoc &dl, Register Reg, EVT VT,
1104 SDValue Glue) {
1105 SDVTList VTs = DAG.getVTList(VT, MVT::Other, MVT::Glue);
1106 SDValue Ops[] = {Chain, DAG.getRegister(Reg, VT), Glue};
1107 return DAG.getNode(X86ISD::POP_FROM_X87_REG, dl, VTs,
1108 ArrayRef(Ops, Glue.getNode() ? 3 : 2));
1109}
1110
1111/// Lower the result values of a call into the
1112/// appropriate copies out of appropriate physical registers.
1113///
1114SDValue X86TargetLowering::LowerCallResult(
1115 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1116 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1118 uint32_t *RegMask) const {
1119
1120 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1121 // Assign locations to each value returned by this call.
1123 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1124 *DAG.getContext());
1125 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1126
1127 // Copy all of the result registers out of their specified physreg.
1128 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1129 ++I, ++InsIndex) {
1130 CCValAssign &VA = RVLocs[I];
1131 EVT CopyVT = VA.getLocVT();
1132
1133 // In some calling conventions we need to remove the used registers
1134 // from the register mask.
1135 if (RegMask) {
1136 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1137 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1138 }
1139
1140 // Report an error if there was an attempt to return FP values via XMM
1141 // registers.
1142 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1143 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1144 if (VA.getLocReg() == X86::XMM1)
1145 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1146 else
1147 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1148 } else if (!Subtarget.hasSSE2() &&
1149 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1150 CopyVT == MVT::f64) {
1151 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1152 if (VA.getLocReg() == X86::XMM1)
1153 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1154 else
1155 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1156 }
1157
1158 // If we prefer to use the value in xmm registers, copy it out as f80 and
1159 // use a truncate to move it from fp stack reg to xmm reg.
1160 bool RoundAfterCopy = false;
1161 bool X87Result = VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1;
1162 if (X87Result && isScalarFPTypeInSSEReg(VA.getValVT())) {
1163 if (!Subtarget.hasX87())
1164 report_fatal_error("X87 register return with X87 disabled");
1165 CopyVT = MVT::f80;
1166 RoundAfterCopy = (CopyVT != VA.getLocVT());
1167 }
1168
1169 SDValue Val;
1170 if (VA.needsCustom()) {
1171 assert(VA.getValVT() == MVT::v64i1 &&
1172 "Currently the only custom case is when we split v64i1 to 2 regs");
1173 Val =
1174 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1175 } else {
1176 Chain =
1177 X87Result
1178 ? getPopFromX87Reg(DAG, Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1179 .getValue(1)
1180 : DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1181 .getValue(1);
1182 Val = Chain.getValue(0);
1183 InGlue = Chain.getValue(2);
1184 }
1185
1186 if (RoundAfterCopy)
1187 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1188 // This truncation won't change the value.
1189 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1190
1191 if (VA.isExtInLoc()) {
1192 if (VA.getValVT().isVector() &&
1193 VA.getValVT().getScalarType() == MVT::i1 &&
1194 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1195 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1196 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1197 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1198 } else
1199 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1200 }
1201
1202 if (VA.getLocInfo() == CCValAssign::BCvt)
1203 Val = DAG.getBitcast(VA.getValVT(), Val);
1204
1205 InVals.push_back(Val);
1206 }
1207
1208 return Chain;
1209}
1210
1211/// Determines whether Args, either a set of outgoing arguments to a call, or a
1212/// set of incoming args of a call, contains an sret pointer that the callee
1213/// pops. This happens on most x86-32, System V platforms, unless register
1214/// parameters are in use (-mregparm=1+, regcallcc, etc).
1215template <typename T>
1216static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1217 const SmallVectorImpl<CCValAssign> &ArgLocs,
1218 const X86Subtarget &Subtarget) {
1219 // Not C++20 (yet), so no concepts available.
1220 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1221 std::is_same_v<T, ISD::InputArg>,
1222 "requires ISD::OutputArg or ISD::InputArg");
1223
1224 // Popping the sret pointer only happens on x86-32 System V ABI platforms
1225 // (Linux, Cygwin, BSDs, Mac, etc). That excludes Windows-minus-Cygwin and
1226 // MCU.
1227 const Triple &TT = Subtarget.getTargetTriple();
1228 if (!TT.isX86_32() || TT.isOSMSVCRT() || TT.isOSIAMCU())
1229 return false;
1230
1231 // Check if the first argument is marked sret and if it is passed in memory.
1232 bool IsSRetInMem = false;
1233 if (!Args.empty())
1234 IsSRetInMem = Args.front().Flags.isSRet() && ArgLocs.front().isMemLoc();
1235 return IsSRetInMem;
1236}
1237
1238/// Make a copy of an aggregate at address specified by "Src" to address
1239/// "Dst" with size and alignment information specified by the specific
1240/// parameter attribute. The copy will be passed as a byval function parameter.
1242 SDValue Chain, ISD::ArgFlagsTy Flags,
1243 SelectionDAG &DAG, const SDLoc &dl) {
1244 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1245
1246 return DAG.getMemcpy(
1247 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1248 /*isVolatile*/ false, /*AlwaysInline=*/true,
1249 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1250}
1251
1252/// Return true if the calling convention is one that we can guarantee TCO for.
1254 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1257}
1258
1259/// Return true if we might ever do TCO for calls with this calling convention.
1261 switch (CC) {
1262 // C calling conventions:
1263 case CallingConv::C:
1264 case CallingConv::Win64:
1267 // Callee pop conventions:
1272 // Swift:
1273 case CallingConv::Swift:
1274 return true;
1275 default:
1276 return canGuaranteeTCO(CC);
1277 }
1278}
1279
1280/// Return true if the function is being made into a tailcall target by
1281/// changing its ABI.
1282static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1283 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1285}
1286
1287bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1288 if (!CI->isTailCall())
1289 return false;
1290
1291 CallingConv::ID CalleeCC = CI->getCallingConv();
1292 if (!mayTailCallThisCC(CalleeCC))
1293 return false;
1294
1295 return true;
1296}
1297
1298SDValue
1299X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1301 const SDLoc &dl, SelectionDAG &DAG,
1302 const CCValAssign &VA,
1303 MachineFrameInfo &MFI, unsigned i) const {
1304 // Create the nodes corresponding to a load from this parameter slot.
1305 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1306 bool AlwaysUseMutable = shouldGuaranteeTCO(
1307 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1308 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1309 EVT ValVT;
1310 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1311
1312 // If value is passed by pointer we have address passed instead of the value
1313 // itself. No need to extend if the mask value and location share the same
1314 // absolute size.
1315 bool ExtendedInMem =
1316 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1318
1319 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1320 ValVT = VA.getLocVT();
1321 else
1322 ValVT = VA.getValVT();
1323
1324 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1325 // changed with more analysis.
1326 // In case of tail call optimization mark all arguments mutable. Since they
1327 // could be overwritten by lowering of arguments in case of a tail call.
1328 if (Flags.isByVal()) {
1329 unsigned Bytes = Flags.getByValSize();
1330 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1331
1332 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1333 // can be improved with deeper analysis.
1334 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1335 /*isAliased=*/true);
1336 return DAG.getFrameIndex(FI, PtrVT);
1337 }
1338
1339 EVT ArgVT = Ins[i].ArgVT;
1340
1341 // If this is a vector that has been split into multiple parts, don't elide
1342 // the copy. The layout on the stack may not match the packed in-memory
1343 // layout.
1344 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1345
1346 // This is an argument in memory. We might be able to perform copy elision.
1347 // If the argument is passed directly in memory without any extension, then we
1348 // can perform copy elision. Large vector types, for example, may be passed
1349 // indirectly by pointer.
1350 if (Flags.isCopyElisionCandidate() &&
1351 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1352 !ScalarizedVector) {
1353 SDValue PartAddr;
1354 if (Ins[i].PartOffset == 0) {
1355 // If this is a one-part value or the first part of a multi-part value,
1356 // create a stack object for the entire argument value type and return a
1357 // load from our portion of it. This assumes that if the first part of an
1358 // argument is in memory, the rest will also be in memory.
1359 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1360 /*IsImmutable=*/false);
1361 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1362 return DAG.getLoad(
1363 ValVT, dl, Chain, PartAddr,
1365 }
1366
1367 // This is not the first piece of an argument in memory. See if there is
1368 // already a fixed stack object including this offset. If so, assume it
1369 // was created by the PartOffset == 0 branch above and create a load from
1370 // the appropriate offset into it.
1371 int64_t PartBegin = VA.getLocMemOffset();
1372 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1373 int FI = MFI.getObjectIndexBegin();
1374 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1375 int64_t ObjBegin = MFI.getObjectOffset(FI);
1376 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1377 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1378 break;
1379 }
1380 if (MFI.isFixedObjectIndex(FI)) {
1381 SDValue Addr =
1382 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1383 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1384 return DAG.getLoad(ValVT, dl, Chain, Addr,
1386 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1387 }
1388 }
1389
1390 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1391 VA.getLocMemOffset(), isImmutable);
1392
1393 // Set SExt or ZExt flag.
1394 if (VA.getLocInfo() == CCValAssign::ZExt) {
1395 MFI.setObjectZExt(FI, true);
1396 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1397 MFI.setObjectSExt(FI, true);
1398 }
1399
1400 MaybeAlign Alignment;
1401 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1402 ValVT != MVT::f80)
1403 Alignment = MaybeAlign(4);
1404 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1405 SDValue Val = DAG.getLoad(
1406 ValVT, dl, Chain, FIN,
1408 Alignment);
1409 return ExtendedInMem
1410 ? (VA.getValVT().isVector()
1411 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1412 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1413 : Val;
1414}
1415
1416// FIXME: Get this from tablegen.
1418 const X86Subtarget &Subtarget) {
1419 assert(Subtarget.is64Bit());
1420
1421 if (Subtarget.isCallingConvWin64(CallConv)) {
1422 static const MCPhysReg GPR64ArgRegsWin64[] = {
1423 X86::RCX, X86::RDX, X86::R8, X86::R9
1424 };
1425 return GPR64ArgRegsWin64;
1426 }
1427
1428 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1429 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1430 };
1431 return GPR64ArgRegs64Bit;
1432}
1433
1434// FIXME: Get this from tablegen.
1436 CallingConv::ID CallConv,
1437 const X86Subtarget &Subtarget) {
1438 assert(Subtarget.is64Bit());
1439 if (Subtarget.isCallingConvWin64(CallConv)) {
1440 // The XMM registers which might contain var arg parameters are shadowed
1441 // in their paired GPR. So we only need to save the GPR to their home
1442 // slots.
1443 // TODO: __vectorcall will change this.
1444 return {};
1445 }
1446
1447 bool isSoftFloat = Subtarget.useSoftFloat();
1448 if (isSoftFloat || !Subtarget.hasSSE1())
1449 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1450 // registers.
1451 return {};
1452
1453 static const MCPhysReg XMMArgRegs64Bit[] = {
1454 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1455 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1456 };
1457 return XMMArgRegs64Bit;
1458}
1459
1460#ifndef NDEBUG
1462 return llvm::is_sorted(
1463 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1464 return A.getValNo() < B.getValNo();
1465 });
1466}
1467#endif
1468
1469namespace {
1470/// This is a helper class for lowering variable arguments parameters.
1471class VarArgsLoweringHelper {
1472public:
1473 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1474 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1475 CallingConv::ID CallConv, CCState &CCInfo)
1476 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1477 TheMachineFunction(DAG.getMachineFunction()),
1478 TheFunction(TheMachineFunction.getFunction()),
1479 FrameInfo(TheMachineFunction.getFrameInfo()),
1480 FrameLowering(*Subtarget.getFrameLowering()),
1481 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1482 CCInfo(CCInfo) {}
1483
1484 // Lower variable arguments parameters.
1485 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1486
1487private:
1488 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1489
1490 void forwardMustTailParameters(SDValue &Chain);
1491
1492 bool is64Bit() const { return Subtarget.is64Bit(); }
1493 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1494
1495 X86MachineFunctionInfo *FuncInfo;
1496 const SDLoc &DL;
1497 SelectionDAG &DAG;
1498 const X86Subtarget &Subtarget;
1499 MachineFunction &TheMachineFunction;
1500 const Function &TheFunction;
1501 MachineFrameInfo &FrameInfo;
1502 const TargetFrameLowering &FrameLowering;
1503 const TargetLowering &TargLowering;
1504 CallingConv::ID CallConv;
1505 CCState &CCInfo;
1506};
1507} // namespace
1508
1509void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1510 SDValue &Chain, unsigned StackSize) {
1511 // If the function takes variable number of arguments, make a frame index for
1512 // the start of the first vararg value... for expansion of llvm.va_start. We
1513 // can skip this if there are no va_start calls.
1514 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1515 CallConv != CallingConv::X86_ThisCall)) {
1516 FuncInfo->setVarArgsFrameIndex(
1517 FrameInfo.CreateFixedObject(1, StackSize, true));
1518 }
1519
1520 // 64-bit calling conventions support varargs and register parameters, so we
1521 // have to do extra work to spill them in the prologue.
1522 if (is64Bit()) {
1523 // Find the first unallocated argument registers.
1524 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1525 ArrayRef<MCPhysReg> ArgXMMs =
1526 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1527 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1528 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1529
1530 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1531 "SSE register cannot be used when SSE is disabled!");
1532
1533 if (isWin64()) {
1534 // Get to the caller-allocated home save location. Add 8 to account
1535 // for the return address.
1536 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1537 FuncInfo->setRegSaveFrameIndex(
1538 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1539 // Fixup to set vararg frame on shadow area (4 x i64).
1540 if (NumIntRegs < 4)
1541 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1542 } else {
1543 // For X86-64, if there are vararg parameters that are passed via
1544 // registers, then we must store them to their spots on the stack so
1545 // they may be loaded by dereferencing the result of va_next.
1546 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1547 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1548 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1549 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1550 }
1551
1553 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1554 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1555 // keeping live input value
1556 SDValue ALVal; // if applicable keeps SDValue for %al register
1557
1558 // Gather all the live in physical registers.
1559 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1560 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1561 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1562 }
1563 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1564 if (!AvailableXmms.empty()) {
1565 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1566 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1567 for (MCPhysReg Reg : AvailableXmms) {
1568 // FastRegisterAllocator spills virtual registers at basic
1569 // block boundary. That leads to usages of xmm registers
1570 // outside of check for %al. Pass physical registers to
1571 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1572 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1573 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1574 }
1575 }
1576
1577 // Store the integer parameter registers.
1579 SDValue RSFIN =
1580 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1581 TargLowering.getPointerTy(DAG.getDataLayout()));
1582 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1583 for (SDValue Val : LiveGPRs) {
1584 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1585 TargLowering.getPointerTy(DAG.getDataLayout()),
1586 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1587 SDValue Store =
1588 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1590 DAG.getMachineFunction(),
1591 FuncInfo->getRegSaveFrameIndex(), Offset));
1592 MemOps.push_back(Store);
1593 Offset += 8;
1594 }
1595
1596 // Now store the XMM (fp + vector) parameter registers.
1597 if (!LiveXMMRegs.empty()) {
1598 SmallVector<SDValue, 12> SaveXMMOps;
1599 SaveXMMOps.push_back(Chain);
1600 SaveXMMOps.push_back(ALVal);
1601 SaveXMMOps.push_back(RSFIN);
1602 SaveXMMOps.push_back(
1603 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1604 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1605 MachineMemOperand *StoreMMO =
1608 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1609 Offset),
1611 MemOps.push_back(DAG.getMemIntrinsicNode(X86ISD::VASTART_SAVE_XMM_REGS,
1612 DL, DAG.getVTList(MVT::Other),
1613 SaveXMMOps, MVT::i8, StoreMMO));
1614 }
1615
1616 if (!MemOps.empty())
1617 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1618 }
1619}
1620
1621void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1622 // Find the largest legal vector type.
1623 MVT VecVT = MVT::Other;
1624 // FIXME: Only some x86_32 calling conventions support AVX512.
1625 if (Subtarget.useAVX512Regs() &&
1626 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1627 CallConv == CallingConv::Intel_OCL_BI)))
1628 VecVT = MVT::v16f32;
1629 else if (Subtarget.hasAVX())
1630 VecVT = MVT::v8f32;
1631 else if (Subtarget.hasSSE2())
1632 VecVT = MVT::v4f32;
1633
1634 // We forward some GPRs and some vector types.
1635 SmallVector<MVT, 2> RegParmTypes;
1636 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1637 RegParmTypes.push_back(IntVT);
1638 if (VecVT != MVT::Other)
1639 RegParmTypes.push_back(VecVT);
1640
1641 // Compute the set of forwarded registers. The rest are scratch.
1643 FuncInfo->getForwardedMustTailRegParms();
1644 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1645
1646 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1647 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1648 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1649 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1650 }
1651
1652 // Copy all forwards from physical to virtual registers.
1653 for (ForwardedRegister &FR : Forwards) {
1654 // FIXME: Can we use a less constrained schedule?
1655 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1656 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1657 TargLowering.getRegClassFor(FR.VT));
1658 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1659 }
1660}
1661
1662void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1663 unsigned StackSize) {
1664 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1665 // If necessary, it would be set into the correct value later.
1666 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1667 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1668
1669 if (FrameInfo.hasVAStart())
1670 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1671
1672 if (FrameInfo.hasMustTailInVarArgFunc())
1673 forwardMustTailParameters(Chain);
1674}
1675
1676SDValue X86TargetLowering::LowerFormalArguments(
1677 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1678 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1679 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1680 MachineFunction &MF = DAG.getMachineFunction();
1681 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
1682
1683 const Function &F = MF.getFunction();
1684 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1685 F.getName() == "main")
1686 FuncInfo->setForceFramePointer(true);
1687
1688 MachineFrameInfo &MFI = MF.getFrameInfo();
1689 bool Is64Bit = Subtarget.is64Bit();
1690 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1691
1692 // On x86_64 with x87 disabled, x86_fp80 cannot be handled: the type would
1693 // need to be returned/passed in x87 registers (FP0/FP1) which are
1694 // unavailable. Emit a clear diagnostic instead of crashing later with
1695 // "Cannot select: build_pair".
1696 if (Is64Bit && !Subtarget.hasX87()) {
1697 if (F.getReturnType()->isX86_FP80Ty() ||
1698 any_of(F.args(), [](const Argument &Arg) {
1699 return Arg.getType()->isX86_FP80Ty();
1700 }))
1702 "cannot use x86_fp80 type with x87 disabled on x86_64 target");
1703 }
1704
1705 assert(
1706 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1707 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1708
1709 // Assign locations to all of the incoming arguments.
1711 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1712
1713 // Allocate shadow area for Win64.
1714 if (IsWin64)
1715 CCInfo.AllocateStack(32, Align(8));
1716
1717 CCInfo.AnalyzeArguments(Ins, CC_X86);
1718
1719 // In vectorcall calling convention a second pass is required for the HVA
1720 // types.
1721 if (CallingConv::X86_VectorCall == CallConv) {
1722 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1723 }
1724
1725 // The next loop assumes that the locations are in the same order of the
1726 // input arguments.
1727 assert(isSortedByValueNo(ArgLocs) &&
1728 "Argument Location list must be sorted before lowering");
1729
1730 SDValue ArgValue;
1731 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1732 ++I, ++InsIndex) {
1733 assert(InsIndex < Ins.size() && "Invalid Ins index");
1734 CCValAssign &VA = ArgLocs[I];
1735
1736 if (VA.isRegLoc()) {
1737 EVT RegVT = VA.getLocVT();
1738 if (VA.needsCustom()) {
1739 assert(
1740 VA.getValVT() == MVT::v64i1 &&
1741 "Currently the only custom case is when we split v64i1 to 2 regs");
1742
1743 // v64i1 values, in regcall calling convention, that are
1744 // compiled to 32 bit arch, are split up into two registers.
1745 ArgValue =
1746 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1747 } else {
1748 const TargetRegisterClass *RC;
1749 if (RegVT == MVT::i8)
1750 RC = &X86::GR8RegClass;
1751 else if (RegVT == MVT::i16)
1752 RC = &X86::GR16RegClass;
1753 else if (RegVT == MVT::i32)
1754 RC = &X86::GR32RegClass;
1755 else if (Is64Bit && RegVT == MVT::i64)
1756 RC = &X86::GR64RegClass;
1757 else if (RegVT == MVT::f16)
1758 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1759 else if (RegVT == MVT::f32)
1760 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1761 else if (RegVT == MVT::f64)
1762 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1763 else if (RegVT == MVT::f80)
1764 RC = &X86::RFP80RegClass;
1765 else if (RegVT == MVT::f128)
1766 RC = &X86::VR128RegClass;
1767 else if (RegVT.is512BitVector())
1768 RC = &X86::VR512RegClass;
1769 else if (RegVT.is256BitVector())
1770 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1771 else if (RegVT.is128BitVector())
1772 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1773 else if (RegVT == MVT::x86mmx)
1774 RC = &X86::VR64RegClass;
1775 else if (RegVT == MVT::v1i1)
1776 RC = &X86::VK1RegClass;
1777 else if (RegVT == MVT::v8i1)
1778 RC = &X86::VK8RegClass;
1779 else if (RegVT == MVT::v16i1)
1780 RC = &X86::VK16RegClass;
1781 else if (RegVT == MVT::v32i1)
1782 RC = &X86::VK32RegClass;
1783 else if (RegVT == MVT::v64i1)
1784 RC = &X86::VK64RegClass;
1785 else
1786 llvm_unreachable("Unknown argument type!");
1787
1788 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1789 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1790 }
1791
1792 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1793 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1794 // right size.
1795 if (VA.getLocInfo() == CCValAssign::SExt)
1796 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1797 DAG.getValueType(VA.getValVT()));
1798 else if (VA.getLocInfo() == CCValAssign::ZExt)
1799 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1800 DAG.getValueType(VA.getValVT()));
1801 else if (VA.getLocInfo() == CCValAssign::BCvt)
1802 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1803
1804 if (VA.isExtInLoc()) {
1805 // Handle MMX values passed in XMM regs.
1806 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1807 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1808 else if (VA.getValVT().isVector() &&
1809 VA.getValVT().getScalarType() == MVT::i1 &&
1810 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1811 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1812 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1813 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1814 } else
1815 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1816 }
1817 } else {
1818 assert(VA.isMemLoc());
1819 ArgValue =
1820 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1821 }
1822
1823 // If value is passed via pointer - do a load.
1824 if (VA.getLocInfo() == CCValAssign::Indirect &&
1825 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1826 ArgValue =
1827 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1828 }
1829
1830 InVals.push_back(ArgValue);
1831 }
1832
1833 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1834 if (Ins[I].Flags.isSwiftAsync()) {
1835 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1836 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1837 X86FI->setHasSwiftAsyncContext(true);
1838 else {
1839 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1840 int FI =
1841 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1842 X86FI->setSwiftAsyncContextFrameIdx(FI);
1843 SDValue St = DAG.getStore(
1844 DAG.getEntryNode(), dl, InVals[I],
1845 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1847 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1848 }
1849 }
1850
1851 // Swift calling convention does not require we copy the sret argument
1852 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1853 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1854 continue;
1855
1856 // All x86 ABIs require that for returning structs by value we copy the
1857 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1858 // the argument into a virtual register so that we can access it from the
1859 // return points.
1860 if (Ins[I].Flags.isSRet()) {
1861 assert(!FuncInfo->getSRetReturnReg() &&
1862 "SRet return has already been set");
1863 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1864 Register Reg =
1866 FuncInfo->setSRetReturnReg(Reg);
1867 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1868 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1869 break;
1870 }
1871 }
1872
1873 unsigned StackSize = CCInfo.getStackSize();
1874 // Align stack specially for tail calls.
1875 if (shouldGuaranteeTCO(CallConv,
1877 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1878
1879 if (IsVarArg)
1880 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1881 .lowerVarArgsParameters(Chain, StackSize);
1882
1883 // Some CCs need callee pop.
1884 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1886 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1887 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1888 // X86 interrupts must pop the error code (and the alignment padding) if
1889 // present.
1890 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1891 } else {
1892 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1893 // If this is an sret function, the return should pop the hidden pointer.
1894 if (hasCalleePopSRet(Ins, ArgLocs, Subtarget))
1895 FuncInfo->setBytesToPopOnReturn(4);
1896 }
1897
1898 if (!Is64Bit) {
1899 // RegSaveFrameIndex is X86-64 only.
1900 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1901 }
1902
1903 FuncInfo->setArgumentStackSize(StackSize);
1904
1905 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1906 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1907 if (Personality == EHPersonality::CoreCLR) {
1908 assert(Is64Bit);
1909 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1910 // that we'd prefer this slot be allocated towards the bottom of the frame
1911 // (i.e. near the stack pointer after allocating the frame). Every
1912 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1913 // offset from the bottom of this and each funclet's frame must be the
1914 // same, so the size of funclets' (mostly empty) frames is dictated by
1915 // how far this slot is from the bottom (since they allocate just enough
1916 // space to accommodate holding this slot at the correct offset).
1917 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1918 EHInfo->PSPSymFrameIdx = PSPSymFI;
1919 }
1920 }
1921
1922 if (shouldDisableArgRegFromCSR(CallConv) ||
1923 F.hasFnAttribute("no_caller_saved_registers")) {
1924 MachineRegisterInfo &MRI = MF.getRegInfo();
1925 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1926 MRI.disableCalleeSavedRegister(Pair.first);
1927 }
1928
1929 if (CallingConv::PreserveNone == CallConv)
1930 for (const ISD::InputArg &In : Ins) {
1931 if (In.Flags.isSwiftSelf() || In.Flags.isSwiftAsync() ||
1932 In.Flags.isSwiftError()) {
1933 errorUnsupported(DAG, dl,
1934 "Swift attributes can't be used with preserve_none");
1935 break;
1936 }
1937 }
1938
1939 return Chain;
1940}
1941
1942SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1943 SDValue Arg, const SDLoc &dl,
1944 SelectionDAG &DAG,
1945 const CCValAssign &VA,
1946 ISD::ArgFlagsTy Flags,
1947 bool isByVal) const {
1948 unsigned LocMemOffset = VA.getLocMemOffset();
1949 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1950 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1951 StackPtr, PtrOff);
1952 if (isByVal)
1953 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1954
1955 MaybeAlign Alignment;
1956 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1957 Arg.getSimpleValueType() != MVT::f80)
1958 Alignment = MaybeAlign(4);
1959 return DAG.getStore(
1960 Chain, dl, Arg, PtrOff,
1962 Alignment);
1963}
1964
1965/// Emit a load of return address if tail call
1966/// optimization is performed and it is required.
1967SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1968 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1969 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1970 // Adjust the Return address stack slot.
1971 EVT VT = getPointerTy(DAG.getDataLayout());
1972 OutRetAddr = getReturnAddressFrameIndex(DAG);
1973
1974 // Load the "old" Return address.
1975 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1976 return SDValue(OutRetAddr.getNode(), 1);
1977}
1978
1979/// Emit a store of the return address if tail call
1980/// optimization is performed and it is required (FPDiff!=0).
1982 SDValue Chain, SDValue RetAddrFrIdx,
1983 EVT PtrVT, unsigned SlotSize,
1984 int FPDiff, const SDLoc &dl) {
1985 // Store the return address to the appropriate stack slot.
1986 if (!FPDiff) return Chain;
1987 // Calculate the new stack slot for the return address.
1988 int NewReturnAddrFI =
1989 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1990 false);
1991 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1992 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1994 DAG.getMachineFunction(), NewReturnAddrFI));
1995 return Chain;
1996}
1997
1998/// Returns a vector_shuffle mask for an movs{s|d}, movd
1999/// operation of specified width.
2000SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
2001 SDValue V1, SDValue V2) const {
2002 unsigned NumElems = VT.getVectorNumElements();
2003 SmallVector<int, 8> Mask;
2004 Mask.push_back(NumElems);
2005 for (unsigned i = 1; i != NumElems; ++i)
2006 Mask.push_back(i);
2007 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
2008}
2009
2010// Returns the type of copying which is required to set up a byval argument to
2011// a tail-called function. This isn't needed for non-tail calls, because they
2012// always need the equivalent of CopyOnce, but tail-calls sometimes need two to
2013// avoid clobbering another argument (CopyViaTemp), and sometimes can be
2014// optimised to zero copies when forwarding an argument from the caller's
2015// caller (NoCopy).
2016X86TargetLowering::ByValCopyKind X86TargetLowering::ByValNeedsCopyForTailCall(
2017 SelectionDAG &DAG, SDValue Src, SDValue Dst, ISD::ArgFlagsTy Flags) const {
2018 MachineFrameInfo &MFI = DAG.getMachineFunction().getFrameInfo();
2019
2020 // Globals are always safe to copy from.
2022 return CopyOnce;
2023
2024 // Can only analyse frame index nodes, conservatively assume we need a
2025 // temporary.
2026 auto *SrcFrameIdxNode = dyn_cast<FrameIndexSDNode>(Src);
2027 auto *DstFrameIdxNode = dyn_cast<FrameIndexSDNode>(Dst);
2028 if (!SrcFrameIdxNode || !DstFrameIdxNode)
2029 return CopyViaTemp;
2030
2031 int SrcFI = SrcFrameIdxNode->getIndex();
2032 int DstFI = DstFrameIdxNode->getIndex();
2033 assert(MFI.isFixedObjectIndex(DstFI) &&
2034 "byval passed in non-fixed stack slot");
2035
2036 int64_t SrcOffset = MFI.getObjectOffset(SrcFI);
2037 int64_t DstOffset = MFI.getObjectOffset(DstFI);
2038
2039 // If the source is in the local frame, then the copy to the argument
2040 // memory is always valid.
2041 bool FixedSrc = MFI.isFixedObjectIndex(SrcFI);
2042 if (!FixedSrc || (FixedSrc && SrcOffset < 0))
2043 return CopyOnce;
2044
2045 // If the value is already in the correct location, then no copying is
2046 // needed. If not, then we need to copy via a temporary.
2047 if (SrcOffset == DstOffset)
2048 return NoCopy;
2049 else
2050 return CopyViaTemp;
2051}
2052
2053SDValue
2054X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2055 SmallVectorImpl<SDValue> &InVals) const {
2056 SelectionDAG &DAG = CLI.DAG;
2057 SDLoc &dl = CLI.DL;
2058 SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2059 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2060 SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2061 SDValue Chain = CLI.Chain;
2062 SDValue Callee = CLI.Callee;
2063 CallingConv::ID CallConv = CLI.CallConv;
2064 bool &isTailCall = CLI.IsTailCall;
2065 bool isVarArg = CLI.IsVarArg;
2066 const auto *CB = CLI.CB;
2067
2068 MachineFunction &MF = DAG.getMachineFunction();
2069 bool Is64Bit = Subtarget.is64Bit();
2070 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2071 bool ShouldGuaranteeTCO = shouldGuaranteeTCO(
2072 CallConv, MF.getTarget().Options.GuaranteedTailCallOpt);
2073 X86MachineFunctionInfo *X86Info = MF.getInfo<X86MachineFunctionInfo>();
2074 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2075 CB->hasFnAttr("no_caller_saved_registers"));
2076 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2077 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2078 const Module *M = MF.getFunction().getParent();
2079
2080 // If the indirect call target has the nocf_check attribute, the call needs
2081 // the NOTRACK prefix. For simplicity just disable tail calls as there are
2082 // so many variants.
2083 // FIXME: This will cause backend errors if the user forces the issue.
2084 bool IsNoTrackIndirectCall = IsIndirectCall && CB->doesNoCfCheck() &&
2085 M->getModuleFlag("cf-protection-branch");
2086 if (IsNoTrackIndirectCall)
2087 isTailCall = false;
2088
2089 MachineFunction::CallSiteInfo CSInfo;
2090 if (CallConv == CallingConv::X86_INTR)
2091 report_fatal_error("X86 interrupts may not be called directly");
2092
2093 // Set type id for call site info.
2094 setTypeIdForCallsiteInfo(CB, MF, CSInfo);
2095
2096 if (IsIndirectCall && !IsWin64 &&
2097 M->getModuleFlag("import-call-optimization"))
2098 errorUnsupported(DAG, dl,
2099 "Indirect calls must have a normal calling convention if "
2100 "Import Call Optimization is enabled");
2101
2102 // Analyze operands of the call, assigning locations to each operand.
2104 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2105
2106 // Allocate shadow area for Win64.
2107 if (IsWin64)
2108 CCInfo.AllocateStack(32, Align(8));
2109
2110 CCInfo.AnalyzeArguments(Outs, CC_X86);
2111
2112 // In vectorcall calling convention a second pass is required for the HVA
2113 // types.
2114 if (CallingConv::X86_VectorCall == CallConv) {
2115 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2116 }
2117
2118 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2119 bool IsSibcall = false;
2120 if (isTailCall && ShouldGuaranteeTCO) {
2121 // If we need to guarantee TCO for a non-musttail call, we just need to make
2122 // sure the conventions match. If a tail call uses one of the supported TCO
2123 // conventions and the caller and callee match, we can tail call any
2124 // function prototype.
2125 CallingConv::ID CallerCC = MF.getFunction().getCallingConv();
2126 isTailCall = (CallConv == CallerCC);
2127 IsSibcall = IsMustTail;
2128 } else if (isTailCall) {
2129 // Check if this tail call is a "sibling" call, which is loosely defined to
2130 // be a tail call that doesn't require heroics like moving the return
2131 // address or swapping byval arguments. We treat some musttail calls as
2132 // sibling calls to avoid unnecessary argument copies.
2133 IsSibcall = isEligibleForSiblingCallOpt(CLI, CCInfo, ArgLocs);
2134 isTailCall = IsSibcall || IsMustTail;
2135 }
2136
2137 if (isTailCall)
2138 ++NumTailCalls;
2139
2140 if (IsMustTail && !isTailCall)
2141 report_fatal_error("failed to perform tail call elimination on a call "
2142 "site marked musttail");
2143
2144 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2145 "Var args not supported with calling convention fastcc, ghc or hipe");
2146
2147 // Get a count of how many bytes are to be pushed on the stack.
2148 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2149 if (IsSibcall)
2150 // This is a sibcall. The memory operands are available in caller's
2151 // own caller's stack.
2152 NumBytes = 0;
2153 else if (ShouldGuaranteeTCO && canGuaranteeTCO(CallConv))
2154 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2155
2156 // A sibcall is ABI-compatible and does not need to adjust the stack pointer.
2157 int FPDiff = 0;
2158 if (isTailCall && ShouldGuaranteeTCO && !IsSibcall) {
2159 // Lower arguments at fp - stackoffset + fpdiff.
2160 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2161
2162 FPDiff = NumBytesCallerPushed - NumBytes;
2163
2164 // Set the delta of movement of the returnaddr stackslot.
2165 // But only set if delta is greater than previous delta.
2166 if (FPDiff < X86Info->getTCReturnAddrDelta())
2167 X86Info->setTCReturnAddrDelta(FPDiff);
2168 }
2169
2170 unsigned NumBytesToPush = NumBytes;
2171 unsigned NumBytesToPop = NumBytes;
2172
2174 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2175
2176 // If we are doing a tail-call, any byval arguments will be written to stack
2177 // space which was used for incoming arguments. If any the values being used
2178 // are incoming byval arguments to this function, then they might be
2179 // overwritten by the stores of the outgoing arguments. To avoid this, we
2180 // need to make a temporary copy of them in local stack space, then copy back
2181 // to the argument area.
2182 // FIXME: There's potential to improve the code by using virtual registers for
2183 // temporary storage, and letting the register allocator spill if needed.
2184 SmallVector<SDValue, 8> ByValTemporaries;
2185 SDValue ByValTempChain;
2186 if (isTailCall) {
2187 // Use null SDValue to mean "no temporary recorded for this arg index".
2188 ByValTemporaries.assign(OutVals.size(), SDValue());
2189
2190 SmallVector<SDValue, 8> ByValCopyChains;
2191 for (const CCValAssign &VA : ArgLocs) {
2192 unsigned ArgIdx = VA.getValNo();
2193 SDValue Src = OutVals[ArgIdx];
2194 ISD::ArgFlagsTy Flags = Outs[ArgIdx].Flags;
2195
2196 if (!Flags.isByVal())
2197 continue;
2198
2199 auto PtrVT = getPointerTy(DAG.getDataLayout());
2200
2201 if (!StackPtr.getNode())
2202 StackPtr =
2203 DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(), PtrVT);
2204
2205 // Destination: where this byval should live in the callee’s frame
2206 // after the tail call.
2207 int64_t Offset = VA.getLocMemOffset() + FPDiff;
2208 uint64_t Size = VA.getLocVT().getFixedSizeInBits() / 8;
2210 /*IsImmutable=*/true);
2211 SDValue Dst = DAG.getFrameIndex(FI, PtrVT);
2212
2213 ByValCopyKind Copy = ByValNeedsCopyForTailCall(DAG, Src, Dst, Flags);
2214
2215 if (Copy == NoCopy) {
2216 // If the argument is already at the correct offset on the stack
2217 // (because we are forwarding a byval argument from our caller), we
2218 // don't need any copying.
2219 continue;
2220 } else if (Copy == CopyOnce) {
2221 // If the argument is in our local stack frame, no other argument
2222 // preparation can clobber it, so we can copy it to the final location
2223 // later.
2224 ByValTemporaries[ArgIdx] = Src;
2225 } else {
2226 assert(Copy == CopyViaTemp && "unexpected enum value");
2227 // If we might be copying this argument from the outgoing argument
2228 // stack area, we need to copy via a temporary in the local stack
2229 // frame.
2230 MachineFrameInfo &MFI = MF.getFrameInfo();
2231 int TempFrameIdx = MFI.CreateStackObject(Flags.getByValSize(),
2232 Flags.getNonZeroByValAlign(),
2233 /*isSS=*/false);
2234 SDValue Temp =
2235 DAG.getFrameIndex(TempFrameIdx, getPointerTy(DAG.getDataLayout()));
2236
2237 SDValue CopyChain =
2238 CreateCopyOfByValArgument(Src, Temp, Chain, Flags, DAG, dl);
2239 ByValCopyChains.push_back(CopyChain);
2240 ByValTemporaries[ArgIdx] = Temp;
2241 }
2242 }
2243 if (!ByValCopyChains.empty())
2244 ByValTempChain =
2245 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, ByValCopyChains);
2246 }
2247
2248 // If we have an inalloca argument, all stack space has already been allocated
2249 // for us and be right at the top of the stack. We don't support multiple
2250 // arguments passed in memory when using inalloca.
2251 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2252 NumBytesToPush = 0;
2253 if (!ArgLocs.back().isMemLoc())
2254 report_fatal_error("cannot use inalloca attribute on a register "
2255 "parameter");
2256 if (ArgLocs.back().getLocMemOffset() != 0)
2257 report_fatal_error("any parameter with the inalloca attribute must be "
2258 "the only memory argument");
2259 } else if (CLI.IsPreallocated) {
2260 assert(ArgLocs.back().isMemLoc() &&
2261 "cannot use preallocated attribute on a register "
2262 "parameter");
2263 SmallVector<size_t, 4> PreallocatedOffsets;
2264 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2265 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2266 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2267 }
2268 }
2269 auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
2270 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2271 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2272 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2273 NumBytesToPush = 0;
2274 }
2275
2276 if (!IsSibcall && !IsMustTail)
2277 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2278 NumBytes - NumBytesToPush, dl);
2279
2280 SDValue RetAddrFrIdx;
2281 // Load return address for tail calls.
2282 if (isTailCall && FPDiff)
2283 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2284 Is64Bit, FPDiff, dl);
2285
2287 SmallVector<SDValue, 8> MemOpChains;
2288
2289 // The next loop assumes that the locations are in the same order of the
2290 // input arguments.
2291 assert(isSortedByValueNo(ArgLocs) &&
2292 "Argument Location list must be sorted before lowering");
2293
2294 // Walk the register/memloc assignments, inserting copies/loads. In the case
2295 // of tail call optimization arguments are handle later.
2296 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2297 ++I, ++OutIndex) {
2298 assert(OutIndex < Outs.size() && "Invalid Out index");
2299 // Skip inalloca/preallocated arguments, they have already been written.
2300 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2301 if (Flags.isInAlloca() || Flags.isPreallocated())
2302 continue;
2303
2304 CCValAssign &VA = ArgLocs[I];
2305 EVT RegVT = VA.getLocVT();
2306 SDValue Arg = OutVals[OutIndex];
2307 bool isByVal = Flags.isByVal();
2308
2309 // Promote the value if needed.
2310 switch (VA.getLocInfo()) {
2311 default: llvm_unreachable("Unknown loc info!");
2312 case CCValAssign::Full: break;
2313 case CCValAssign::SExt:
2314 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2315 break;
2316 case CCValAssign::ZExt:
2317 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2318 break;
2319 case CCValAssign::AExt:
2320 if (Arg.getValueType().isVector() &&
2321 Arg.getValueType().getVectorElementType() == MVT::i1)
2322 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2323 else if (RegVT.is128BitVector()) {
2324 // Special case: passing MMX values in XMM registers.
2325 Arg = DAG.getBitcast(MVT::i64, Arg);
2326 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2327 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2328 } else
2329 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2330 break;
2331 case CCValAssign::BCvt:
2332 Arg = DAG.getBitcast(RegVT, Arg);
2333 break;
2334 case CCValAssign::Indirect: {
2335 if (isByVal) {
2336 // Memcpy the argument to a temporary stack slot to prevent
2337 // the caller from seeing any modifications the callee may make
2338 // as guaranteed by the `byval` attribute.
2339 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2340 Flags.getByValSize(),
2341 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2342 SDValue StackSlot =
2343 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2344 Chain =
2345 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2346 // From now on treat this as a regular pointer
2347 Arg = StackSlot;
2348 isByVal = false;
2349 } else {
2350 // Store the argument.
2351 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2352 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2353 Chain = DAG.getStore(
2354 Chain, dl, Arg, SpillSlot,
2356 Arg = SpillSlot;
2357 }
2358 break;
2359 }
2360 }
2361
2362 if (VA.needsCustom()) {
2363 assert(VA.getValVT() == MVT::v64i1 &&
2364 "Currently the only custom case is when we split v64i1 to 2 regs");
2365 // Split v64i1 value into two registers
2366 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2367 } else if (VA.isRegLoc()) {
2368 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2369 const TargetOptions &Options = DAG.getTarget().Options;
2370 if (Options.EmitCallSiteInfo)
2371 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2372 if (isVarArg && IsWin64) {
2373 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2374 // shadow reg if callee is a varargs function.
2375 Register ShadowReg;
2376 switch (VA.getLocReg()) {
2377 case X86::XMM0: ShadowReg = X86::RCX; break;
2378 case X86::XMM1: ShadowReg = X86::RDX; break;
2379 case X86::XMM2: ShadowReg = X86::R8; break;
2380 case X86::XMM3: ShadowReg = X86::R9; break;
2381 }
2382 if (ShadowReg)
2383 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2384 }
2385 } else if (!IsSibcall && (!isTailCall || (isByVal && !IsMustTail))) {
2386 assert(VA.isMemLoc());
2387 if (!StackPtr.getNode())
2388 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2390 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2391 dl, DAG, VA, Flags, isByVal));
2392 }
2393 }
2394
2395 if (!MemOpChains.empty())
2396 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2397
2398 if (Subtarget.isPICStyleGOT()) {
2399 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2400 // GOT pointer (except regcall).
2401 if (!isTailCall) {
2402 // Indirect call with RegCall calling convertion may use up all the
2403 // general registers, so it is not suitable to bind EBX reister for
2404 // GOT address, just let register allocator handle it.
2405 if (CallConv != CallingConv::X86_RegCall)
2406 RegsToPass.push_back(std::make_pair(
2407 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2408 getPointerTy(DAG.getDataLayout()))));
2409 } else {
2410 // If we are tail calling and generating PIC/GOT style code load the
2411 // address of the callee into ECX. The value in ecx is used as target of
2412 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2413 // for tail calls on PIC/GOT architectures. Normally we would just put the
2414 // address of GOT into ebx and then call target@PLT. But for tail calls
2415 // ebx would be restored (since ebx is callee saved) before jumping to the
2416 // target@PLT.
2417
2418 // Note: The actual moving to ECX is done further down.
2419 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2420 if (G && !G->getGlobal()->hasLocalLinkage() &&
2421 G->getGlobal()->hasDefaultVisibility())
2422 Callee = LowerGlobalAddress(Callee, DAG);
2423 else if (isa<ExternalSymbolSDNode>(Callee))
2424 Callee = LowerExternalSymbol(Callee, DAG);
2425 }
2426 }
2427
2428 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2429 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2430 // From AMD64 ABI document:
2431 // For calls that may call functions that use varargs or stdargs
2432 // (prototype-less calls or calls to functions containing ellipsis (...) in
2433 // the declaration) %al is used as hidden argument to specify the number
2434 // of SSE registers used. The contents of %al do not need to match exactly
2435 // the number of registers, but must be an ubound on the number of SSE
2436 // registers used and is in the range 0 - 8 inclusive.
2437
2438 // Count the number of XMM registers allocated.
2439 static const MCPhysReg XMMArgRegs[] = {
2440 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2441 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2442 };
2443 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2444 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2445 && "SSE registers cannot be used when SSE is disabled");
2446 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2447 DAG.getConstant(NumXMMRegs, dl,
2448 MVT::i8)));
2449 }
2450
2451 if (isVarArg && IsMustTail) {
2452 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2453 for (const auto &F : Forwards) {
2454 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2455 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2456 }
2457 }
2458
2459 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2460 // don't need this because the eligibility check rejects calls that require
2461 // shuffling arguments passed in memory.
2462 if (isTailCall && !IsSibcall) {
2463 // Force all the incoming stack arguments to be loaded from the stack
2464 // before any new outgoing arguments or the return address are stored to the
2465 // stack, because the outgoing stack slots may alias the incoming argument
2466 // stack slots, and the alias isn't otherwise explicit. This is slightly
2467 // more conservative than necessary, because it means that each store
2468 // effectively depends on every argument instead of just those arguments it
2469 // would clobber.
2470 Chain = DAG.getStackArgumentTokenFactor(Chain);
2471
2472 if (ByValTempChain)
2473 Chain =
2474 DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Chain, ByValTempChain);
2475
2476 SmallVector<SDValue, 8> MemOpChains2;
2477 SDValue FIN;
2478 int FI = 0;
2479 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2480 ++I, ++OutsIndex) {
2481 CCValAssign &VA = ArgLocs[I];
2482
2483 if (VA.isRegLoc()) {
2484 if (VA.needsCustom()) {
2485 assert((CallConv == CallingConv::X86_RegCall) &&
2486 "Expecting custom case only in regcall calling convention");
2487 // This means that we are in special case where one argument was
2488 // passed through two register locations - Skip the next location
2489 ++I;
2490 }
2491
2492 continue;
2493 }
2494
2495 assert(VA.isMemLoc());
2496 SDValue Arg = OutVals[OutsIndex];
2497 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2498 // Skip inalloca/preallocated arguments. They don't require any work.
2499 if (Flags.isInAlloca() || Flags.isPreallocated())
2500 continue;
2501 // Create frame index.
2502 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2503 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2504 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2505 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2506
2507 if (Flags.isByVal()) {
2508 if (SDValue ByValSrc = ByValTemporaries[OutsIndex]) {
2509 auto PtrVT = getPointerTy(DAG.getDataLayout());
2510 SDValue DstAddr = DAG.getFrameIndex(FI, PtrVT);
2511
2513 ByValSrc, DstAddr, Chain, Flags, DAG, dl));
2514 }
2515 } else {
2516 // Store relative to framepointer.
2517 MemOpChains2.push_back(DAG.getStore(
2518 Chain, dl, Arg, FIN,
2520 }
2521 }
2522
2523 if (!MemOpChains2.empty())
2524 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2525
2526 // Store the return address to the appropriate stack slot.
2527 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2529 RegInfo->getSlotSize(), FPDiff, dl);
2530 }
2531
2532 // Build a sequence of copy-to-reg nodes chained together with token chain
2533 // and glue operands which copy the outgoing args into registers.
2534 SDValue InGlue;
2535 for (const auto &[Reg, N] : RegsToPass) {
2536 Chain = DAG.getCopyToReg(Chain, dl, Reg, N, InGlue);
2537 InGlue = Chain.getValue(1);
2538 }
2539
2540 bool IsImpCall = false;
2541 bool IsCFGuardCall = false;
2542 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2543 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2544 // In the 64-bit large code model, we have to make all calls
2545 // through a register, since the call instruction's 32-bit
2546 // pc-relative offset may not be large enough to hold the whole
2547 // address.
2548 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2549 Callee->getOpcode() == ISD::ExternalSymbol) {
2550 // Lower direct calls to global addresses and external symbols. Setting
2551 // ForCall to true here has the effect of removing WrapperRIP when possible
2552 // to allow direct calls to be selected without first materializing the
2553 // address into a register.
2554 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true, &IsImpCall);
2555 } else if (Subtarget.isTarget64BitILP32() &&
2556 Callee.getValueType() == MVT::i32) {
2557 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2558 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2559 } else if (Is64Bit && CB && isCFGuardCall(CB)) {
2560 // We'll use a specific psuedo instruction for tail calls to control flow
2561 // guard functions to guarantee the instruction used for the call. To do
2562 // this we need to unwrap the load now and use the CFG Func GV as the
2563 // callee.
2564 IsCFGuardCall = true;
2565 auto *LoadNode = cast<LoadSDNode>(Callee);
2566 GlobalAddressSDNode *GA =
2567 cast<GlobalAddressSDNode>(unwrapAddress(LoadNode->getBasePtr()));
2569 "CFG Call should be to a guard function");
2570 assert(LoadNode->getOffset()->isUndef() &&
2571 "CFG Function load should not have an offset");
2573 GA->getGlobal(), dl, GA->getValueType(0), 0, X86II::MO_NO_FLAG);
2574 }
2575
2577
2578 if (!IsSibcall && isTailCall && !IsMustTail) {
2579 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2580 InGlue = Chain.getValue(1);
2581 }
2582
2583 Ops.push_back(Chain);
2584 Ops.push_back(Callee);
2585
2586 if (isTailCall)
2587 Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2588
2589 // Add argument registers to the end of the list so that they are known live
2590 // into the call.
2591 for (const auto &[Reg, N] : RegsToPass)
2592 Ops.push_back(DAG.getRegister(Reg, N.getValueType()));
2593
2594 // Add a register mask operand representing the call-preserved registers.
2595 const uint32_t *Mask = [&]() {
2596 auto AdaptedCC = CallConv;
2597 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2598 // use X86_INTR calling convention because it has the same CSR mask
2599 // (same preserved registers).
2600 if (HasNCSR)
2602 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2603 // to use the CSR_NoRegs_RegMask.
2604 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2605 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2606 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2607 }();
2608 assert(Mask && "Missing call preserved mask for calling convention");
2609
2610 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2611 X86Info->setFPClobberedByCall(true);
2612 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2613 X86Info->setFPClobberedByInvoke(true);
2614 }
2615 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2616 X86Info->setBPClobberedByCall(true);
2617 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2618 X86Info->setBPClobberedByInvoke(true);
2619 }
2620
2621 // If this is an invoke in a 32-bit function using a funclet-based
2622 // personality, assume the function clobbers all registers. If an exception
2623 // is thrown, the runtime will not restore CSRs.
2624 // FIXME: Model this more precisely so that we can register allocate across
2625 // the normal edge and spill and fill across the exceptional edge.
2626 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2627 const Function &CallerFn = MF.getFunction();
2628 EHPersonality Pers =
2629 CallerFn.hasPersonalityFn()
2632 if (isFuncletEHPersonality(Pers))
2633 Mask = RegInfo->getNoPreservedMask();
2634 }
2635
2636 // Define a new register mask from the existing mask.
2637 uint32_t *RegMask = nullptr;
2638
2639 // In some calling conventions we need to remove the used physical registers
2640 // from the reg mask. Create a new RegMask for such calling conventions.
2641 // RegMask for calling conventions that disable only return registers (e.g.
2642 // preserve_most) will be modified later in LowerCallResult.
2643 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2644 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2645 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2646
2647 // Allocate a new Reg Mask and copy Mask.
2648 RegMask = MF.allocateRegMask();
2649 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2650 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2651
2652 // Make sure all sub registers of the argument registers are reset
2653 // in the RegMask.
2654 if (ShouldDisableArgRegs) {
2655 for (auto const &RegPair : RegsToPass)
2656 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2657 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2658 }
2659
2660 // Create the RegMask Operand according to our updated mask.
2661 Ops.push_back(DAG.getRegisterMask(RegMask));
2662 } else {
2663 // Create the RegMask Operand according to the static mask.
2664 Ops.push_back(DAG.getRegisterMask(Mask));
2665 }
2666
2667 if (InGlue.getNode())
2668 Ops.push_back(InGlue);
2669
2670 if (isTailCall) {
2671 // We used to do:
2672 //// If this is the first return lowered for this function, add the regs
2673 //// to the liveout set for the function.
2674 // This isn't right, although it's probably harmless on x86; liveouts
2675 // should be computed from returns not tail calls. Consider a void
2676 // function making a tail call to a function returning int.
2678 auto Opcode =
2679 IsCFGuardCall ? X86ISD::TC_RETURN_GLOBALADDR : X86ISD::TC_RETURN;
2680 SDValue Ret = DAG.getNode(Opcode, dl, MVT::Other, Ops);
2681
2682 if (IsCFICall)
2683 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2684
2685 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2686 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2687 return Ret;
2688 }
2689
2690 // Returns a chain & a glue for retval copy to use.
2691 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2692 if (IsImpCall) {
2693 Chain = DAG.getNode(X86ISD::IMP_CALL, dl, NodeTys, Ops);
2694 } else if (IsNoTrackIndirectCall) {
2695 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2696 } else if (IsCFGuardCall) {
2697 Chain = DAG.getNode(X86ISD::CALL_GLOBALADDR, dl, NodeTys, Ops);
2698 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2699 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2700 // expanded to the call, directly followed by a special marker sequence and
2701 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2702 assert(!isTailCall &&
2703 "tail calls cannot be marked with clang.arc.attachedcall");
2704 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2705
2706 // Add a target global address for the retainRV/claimRV runtime function
2707 // just before the call target.
2709 auto PtrVT = getPointerTy(DAG.getDataLayout());
2710 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2711 Ops.insert(Ops.begin() + 1, GA);
2712 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2713 } else {
2714 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2715 }
2716
2717 if (IsCFICall)
2718 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2719
2720 InGlue = Chain.getValue(1);
2721 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2722 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2723
2724 // Save heapallocsite metadata.
2725 if (CLI.CB)
2726 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2727 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2728
2729 // Create the CALLSEQ_END node.
2730 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2731 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2733 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2734 } else if (hasCalleePopSRet(Outs, ArgLocs, Subtarget)) {
2735 // If this call passes a struct-return pointer, the callee
2736 // pops that struct pointer.
2737 NumBytesForCalleeToPop = 4;
2738 }
2739
2740 // Returns a glue for retval copy to use.
2741 if (!IsSibcall) {
2742 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2743 InGlue, dl);
2744 InGlue = Chain.getValue(1);
2745 }
2746
2747 if (CallingConv::PreserveNone == CallConv)
2748 for (const ISD::OutputArg &Out : Outs) {
2749 if (Out.Flags.isSwiftSelf() || Out.Flags.isSwiftAsync() ||
2750 Out.Flags.isSwiftError()) {
2751 errorUnsupported(DAG, dl,
2752 "Swift attributes can't be used with preserve_none");
2753 break;
2754 }
2755 }
2756
2757 // Handle result values, copying them out of physregs into vregs that we
2758 // return.
2759 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2760 InVals, RegMask);
2761}
2762
2763//===----------------------------------------------------------------------===//
2764// Fast Calling Convention (tail call) implementation
2765//===----------------------------------------------------------------------===//
2766
2767// Like std call, callee cleans arguments, convention except that ECX is
2768// reserved for storing the tail called function address. Only 2 registers are
2769// free for argument passing (inreg). Tail call optimization is performed
2770// provided:
2771// * tailcallopt is enabled
2772// * caller/callee are fastcc
2773// On X86_64 architecture with GOT-style position independent code only local
2774// (within module) calls are supported at the moment.
2775// To keep the stack aligned according to platform abi the function
2776// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2777// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2778// If a tail called function callee has more arguments than the caller the
2779// caller needs to make sure that there is room to move the RETADDR to. This is
2780// achieved by reserving an area the size of the argument delta right after the
2781// original RETADDR, but before the saved framepointer or the spilled registers
2782// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2783// stack layout:
2784// arg1
2785// arg2
2786// RETADDR
2787// [ new RETADDR
2788// move area ]
2789// (possible EBP)
2790// ESI
2791// EDI
2792// local1 ..
2793
2794/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2795/// requirement.
2796unsigned
2797X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2798 SelectionDAG &DAG) const {
2799 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2800 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2801 assert(StackSize % SlotSize == 0 &&
2802 "StackSize must be a multiple of SlotSize");
2803 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2804}
2805
2806/// Return true if the given stack call argument is already available in the
2807/// same position (relatively) of the caller's incoming argument stack.
2808static
2810 MachineFrameInfo &MFI, const MachineRegisterInfo *MRI,
2811 const X86InstrInfo *TII, const CCValAssign &VA) {
2812 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2813
2814 for (;;) {
2815 // Look through nodes that don't alter the bits of the incoming value.
2816 unsigned Op = Arg.getOpcode();
2817 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2818 Op == ISD::AssertZext) {
2819 Arg = Arg.getOperand(0);
2820 continue;
2821 }
2822 if (Op == ISD::TRUNCATE) {
2823 const SDValue &TruncInput = Arg.getOperand(0);
2824 if (TruncInput.getOpcode() == ISD::AssertZext &&
2825 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2826 Arg.getValueType()) {
2827 Arg = TruncInput.getOperand(0);
2828 continue;
2829 }
2830 }
2831 break;
2832 }
2833
2834 int FI = INT_MAX;
2835 if (Arg.getOpcode() == ISD::CopyFromReg) {
2836 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2837 if (!VR.isVirtual())
2838 return false;
2839 MachineInstr *Def = MRI->getVRegDef(VR);
2840 if (!Def)
2841 return false;
2842 if (!Flags.isByVal()) {
2843 if (!TII->isLoadFromStackSlot(*Def, FI))
2844 return false;
2845 } else {
2846 unsigned Opcode = Def->getOpcode();
2847 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2848 Opcode == X86::LEA64_32r) &&
2849 Def->getOperand(1).isFI()) {
2850 FI = Def->getOperand(1).getIndex();
2851 Bytes = Flags.getByValSize();
2852 } else
2853 return false;
2854 }
2855 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2856 if (Flags.isByVal())
2857 // ByVal argument is passed in as a pointer but it's now being
2858 // dereferenced. e.g.
2859 // define @foo(%struct.X* %A) {
2860 // tail call @bar(%struct.X* byval %A)
2861 // }
2862 return false;
2863 SDValue Ptr = Ld->getBasePtr();
2865 if (!FINode)
2866 return false;
2867 FI = FINode->getIndex();
2868 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2870 FI = FINode->getIndex();
2871 Bytes = Flags.getByValSize();
2872 } else
2873 return false;
2874
2875 assert(FI != INT_MAX);
2876 if (!MFI.isFixedObjectIndex(FI))
2877 return false;
2878
2879 if (Offset != MFI.getObjectOffset(FI))
2880 return false;
2881
2882 // If this is not byval, check that the argument stack object is immutable.
2883 // inalloca and argument copy elision can create mutable argument stack
2884 // objects. Byval objects can be mutated, but a byval call intends to pass the
2885 // mutated memory.
2886 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2887 return false;
2888
2889 if (VA.getLocVT().getFixedSizeInBits() >
2891 // If the argument location is wider than the argument type, check that any
2892 // extension flags match.
2893 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2894 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2895 return false;
2896 }
2897 }
2898
2899 return Bytes == MFI.getObjectSize(FI);
2900}
2901
2902static bool
2904 Register CallerSRetReg) {
2905 const auto &Outs = CLI.Outs;
2906 const auto &OutVals = CLI.OutVals;
2907
2908 // We know the caller has a sret pointer argument (CallerSRetReg). Locate the
2909 // operand index within the callee that may have a sret pointer too.
2910 unsigned Pos = 0;
2911 for (unsigned E = Outs.size(); Pos != E; ++Pos)
2912 if (Outs[Pos].Flags.isSRet())
2913 break;
2914 // Bail out if the callee has not any sret argument.
2915 if (Pos == Outs.size())
2916 return false;
2917
2918 // At this point, either the caller is forwarding its sret argument to the
2919 // callee, or the callee is being passed a different sret pointer. We now look
2920 // for a CopyToReg, where the callee sret argument is written into a new vreg
2921 // (which should later be %rax/%eax, if this is returned).
2922 SDValue SRetArgVal = OutVals[Pos];
2923 for (SDNode *User : SRetArgVal->users()) {
2924 if (User->getOpcode() != ISD::CopyToReg)
2925 continue;
2927 if (Reg == CallerSRetReg && User->getOperand(2) == SRetArgVal)
2928 return true;
2929 }
2930
2931 return false;
2932}
2933
2934/// Check whether the call is eligible for sibling call optimization. Sibling
2935/// calls are loosely defined to be simple, profitable tail calls that only
2936/// require adjusting register parameters. We do not speculatively to optimize
2937/// complex calls that require lots of argument memory operations that may
2938/// alias.
2939///
2940/// Note that LLVM supports multiple ways, such as musttail, to force tail call
2941/// emission. Returning false from this function will not prevent tail call
2942/// emission in all cases.
2943bool X86TargetLowering::isEligibleForSiblingCallOpt(
2945 SmallVectorImpl<CCValAssign> &ArgLocs) const {
2946 SelectionDAG &DAG = CLI.DAG;
2947 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2948 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2949 const SmallVectorImpl<ISD::InputArg> &Ins = CLI.Ins;
2950 SDValue Callee = CLI.Callee;
2951 CallingConv::ID CalleeCC = CLI.CallConv;
2952 bool isVarArg = CLI.IsVarArg;
2953
2954 if (!mayTailCallThisCC(CalleeCC))
2955 return false;
2956
2957 // If -tailcallopt is specified, make fastcc functions tail-callable.
2958 MachineFunction &MF = DAG.getMachineFunction();
2959 X86MachineFunctionInfo *FuncInfo = MF.getInfo<X86MachineFunctionInfo>();
2960 const Function &CallerF = MF.getFunction();
2961
2962 // If the function return type is x86_fp80 and the callee return type is not,
2963 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2964 // perform a tailcall optimization here.
2965 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2966 return false;
2967
2968 // Win64 functions have extra shadow space for argument homing. Don't do the
2969 // sibcall if the caller and callee have mismatched expectations for this
2970 // space.
2971 CallingConv::ID CallerCC = CallerF.getCallingConv();
2972 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2973 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2974 if (IsCalleeWin64 != IsCallerWin64)
2975 return false;
2976
2977 // If we are using a GOT, don't generate sibling calls to non-local,
2978 // default-visibility symbols. Tail calling such a symbol requires using a GOT
2979 // relocation, which forces early binding of the symbol. This breaks code that
2980 // require lazy function symbol resolution. Using musttail or
2981 // GuaranteedTailCallOpt will override this.
2982 if (Subtarget.isPICStyleGOT()) {
2983 if (isa<ExternalSymbolSDNode>(Callee))
2984 return false;
2985 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
2986 if (!G->getGlobal()->hasLocalLinkage() &&
2987 G->getGlobal()->hasDefaultVisibility())
2988 return false;
2989 }
2990 }
2991
2992 // Look for obvious safe cases to perform tail call optimization that do not
2993 // require ABI changes. This is what gcc calls sibcall.
2994
2995 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2996 // emit a special epilogue.
2997 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2998 if (RegInfo->hasStackRealignment(MF))
2999 return false;
3000
3001 // Avoid sibcall optimization if we are an sret return function and the callee
3002 // is incompatible, unless such premises are proven wrong. See comment in
3003 // LowerReturn about why hasStructRetAttr is insufficient.
3004 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
3005 // For a compatible tail call the callee must return our sret pointer. So it
3006 // needs to be (a) an sret function itself and (b) we pass our sret as its
3007 // sret. Condition #b is harder to determine.
3008 if (!mayBeSRetTailCallCompatible(CLI, SRetReg))
3009 return false;
3010 } else if (hasCalleePopSRet(Outs, ArgLocs, Subtarget))
3011 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
3012 // expect that.
3013 return false;
3014
3015 // Do not sibcall optimize vararg calls unless all arguments are passed via
3016 // registers.
3017 LLVMContext &C = *DAG.getContext();
3018 if (isVarArg && !Outs.empty()) {
3019 // Optimizing for varargs on Win64 is unlikely to be safe without
3020 // additional testing.
3021 if (IsCalleeWin64 || IsCallerWin64)
3022 return false;
3023
3024 for (const auto &VA : ArgLocs)
3025 if (!VA.isRegLoc())
3026 return false;
3027 }
3028
3029 // If the call result is in ST0 / ST1, it needs to be popped off the x87
3030 // stack. Therefore, if it's not used by the call it is not safe to optimize
3031 // this into a sibcall.
3032 bool Unused = false;
3033 for (const auto &In : Ins) {
3034 if (!In.Used) {
3035 Unused = true;
3036 break;
3037 }
3038 }
3039 if (Unused) {
3041 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
3042 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
3043 for (const auto &VA : RVLocs) {
3044 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
3045 return false;
3046 }
3047 }
3048
3049 // Check that the call results are passed in the same way.
3050 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
3052 return false;
3053 // The callee has to preserve all registers the caller needs to preserve.
3054 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
3055 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
3056 if (CallerCC != CalleeCC) {
3057 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
3058 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
3059 return false;
3060 }
3061
3062 // The stack frame of the caller cannot be replaced by the tail-callee one's
3063 // if the function is required to preserve all the registers. Conservatively
3064 // prevent tail optimization even if hypothetically all the registers are used
3065 // for passing formal parameters or returning values.
3066 if (CallerF.hasFnAttribute("no_caller_saved_registers"))
3067 return false;
3068
3069 unsigned StackArgsSize = CCInfo.getStackSize();
3070
3071 // If the callee takes no arguments then go on to check the results of the
3072 // call.
3073 if (!Outs.empty()) {
3074 if (StackArgsSize > 0) {
3075 // Check if the arguments are already laid out in the right way as
3076 // the caller's fixed stack objects.
3077 MachineFrameInfo &MFI = MF.getFrameInfo();
3078 const MachineRegisterInfo *MRI = &MF.getRegInfo();
3079 const X86InstrInfo *TII = Subtarget.getInstrInfo();
3080 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
3081 const CCValAssign &VA = ArgLocs[I];
3082 SDValue Arg = OutVals[I];
3083 ISD::ArgFlagsTy Flags = Outs[I].Flags;
3085 return false;
3086 if (!VA.isRegLoc()) {
3087 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
3088 TII, VA))
3089 return false;
3090 }
3091 }
3092 }
3093
3094 bool PositionIndependent = isPositionIndependent();
3095 // If the tailcall address may be in a register, then make sure it's
3096 // possible to register allocate for it. In 32-bit, the call address can
3097 // only target EAX, EDX, or ECX since the tail call must be scheduled after
3098 // callee-saved registers are restored. These happen to be the same
3099 // registers used to pass 'inreg' arguments so watch out for those.
3100 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
3101 !isa<ExternalSymbolSDNode>(Callee)) ||
3102 PositionIndependent)) {
3103 unsigned NumInRegs = 0;
3104 // In PIC we need an extra register to formulate the address computation
3105 // for the callee.
3106 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
3107
3108 for (const auto &VA : ArgLocs) {
3109 if (!VA.isRegLoc())
3110 continue;
3111 Register Reg = VA.getLocReg();
3112 switch (Reg) {
3113 default: break;
3114 case X86::EAX: case X86::EDX: case X86::ECX:
3115 if (++NumInRegs == MaxInRegs)
3116 return false;
3117 break;
3118 }
3119 }
3120 }
3121
3122 const MachineRegisterInfo &MRI = MF.getRegInfo();
3123 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
3124 return false;
3125 }
3126
3127 bool CalleeWillPop =
3128 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
3130
3131 if (unsigned BytesToPop = FuncInfo->getBytesToPopOnReturn()) {
3132 // If we have bytes to pop, the callee must pop them.
3133 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
3134 if (!CalleePopMatches)
3135 return false;
3136 } else if (CalleeWillPop && StackArgsSize > 0) {
3137 // If we don't have bytes to pop, make sure the callee doesn't pop any.
3138 return false;
3139 }
3140
3141 return true;
3142}
3143
3144/// Determines whether the callee is required to pop its own arguments.
3145/// Callee pop is necessary to support tail calls.
3147 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
3148 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
3149 // can guarantee TCO.
3150 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
3151 return true;
3152
3153 switch (CallingConv) {
3154 default:
3155 return false;
3160 return !is64Bit;
3161 }
3162}
return SDValue()
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
const HexagonInstrInfo * TII
static bool IsIndirectCall(const MachineInstr *MI)
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static LVOptions Options
Definition LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const M68kInstrInfo *TII, const CCValAssign &VA)
Return true if the given stack call argument is already available in the same position (relatively) o...
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define G(x, y, z)
Definition MD5.cpp:55
Machine Check Debug Module
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
static Function * getFunction(FunctionType *Ty, const Twine &Name, Module *M)
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static SDValue getPopFromX87Reg(SelectionDAG &DAG, SDValue Chain, const SDLoc &dl, Register Reg, EVT VT, SDValue Glue)
static bool mayBeSRetTailCallCompatible(const TargetLowering::CallLoweringInfo &CLI, Register CallerSRetReg)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const SmallVectorImpl< CCValAssign > &ArgLocs, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition ArrayRef.h:186
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
CCState - This class holds information needed while lowering arguments and return values.
static LLVM_ABI bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
void convertToReg(MCRegister Reg)
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
unsigned getValNo() const
CallingConv::ID getCallingConv() const
LLVM_ABI bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
LLVM_ABI bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static ConstantInt * getSigned(IntegerType *Ty, int64_t V, bool ImplicitTrunc=false)
Return a ConstantInt with the specified value for the specified type.
Definition Constants.h:135
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
LLVM_ABI TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition Function.h:272
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition Function.cpp:728
const GlobalValue * getGlobal() const
Module * getParent()
Get the module that this global value is contained inside of...
void setDSOLocal(bool Local)
@ ExternalLinkage
Externally visible function.
Definition GlobalValue.h:53
Common base class shared among various IRBuilders.
Definition IRBuilder.h:114
BasicBlock * GetInsertBlock() const
Definition IRBuilder.h:201
LLVMContext & getContext() const
Definition IRBuilder.h:203
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:622
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
Tracks which library functions to use for a particular subtarget.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx, SMLoc Loc=SMLoc())
Definition MCExpr.h:214
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
LLVM_ABI int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
LLVM_ABI int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
LLVM_ABI Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
ArrayRef< std::pair< MCRegister, Register > > liveins() const
LLVM_ABI void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Wrapper class representing virtual and physical registers.
Definition Register.h:20
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
EVT getValueType(unsigned ResNo) const
Return the type of a specified result.
void setCFIType(uint32_t Type)
iterator_range< user_iterator > users()
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
LLVM_ABI SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
LLVM_ABI SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
LLVM_ABI SDValue getRegister(Register Reg, EVT VT)
LLVM_ABI SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
LLVM_ABI SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=LocationSize::precise(0), const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
LLVM_ABI SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
LLVM_ABI SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
const DataLayout & getDataLayout() const
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
LLVM_ABI SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
LLVM_ABI SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
const TargetMachine & getTarget() const
LLVM_ABI SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
LLVM_ABI SDValue getValueType(EVT)
LLVM_ABI SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
MachineFunction & getMachineFunction() const
LLVM_ABI SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
LLVM_ABI SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
LLVM_ABI SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
LLVM_ABI std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
LLVM_ABI SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void assign(size_type NumElts, ValueParamT Elt)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
Class to represent struct types.
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const
Inserts necessary declarations for SSP (stack protection) purpose.
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
static StringRef getLibcallImplName(RTLIB::LibcallImpl Call)
Get the libcall routine name for the specified libcall implementation.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const
Returns the target-specific address of the unsafe stack pointer.
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual ArrayRef< MCPhysReg > getRoundingControlRegisters() const
Returns a 0 terminated array of rounding control registers that can be attached into strict FP call.
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
void setTypeIdForCallsiteInfo(const CallBase *CB, MachineFunction &MF, MachineFunction::CallSiteInfo &CSInfo) const
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
bool isAndroid() const
Tests whether the target is Android.
Definition Triple.h:866
bool isMusl() const
Tests whether the environment is musl-libc.
Definition Triple.h:881
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition Triple.h:791
bool isOSFuchsia() const
Definition Triple.h:679
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition Type.h:161
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
static LLVM_ABI Type * getVoidTy(LLVMContext &C)
Definition Type.cpp:286
Value * getOperand(unsigned i) const
Definition User.h:207
LLVM Value Representation.
Definition Value.h:75
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
Register getStackRegister() const
unsigned getSlotSize() const
Register getFramePtr() const
Returns physical register used as frame pointer.
Register getBaseRegister() const
const uint32_t * getNoPreservedMask() const override
bool hasSSE1() const
const Triple & getTargetTriple() const
bool useAVX512Regs() const
bool isCallingConvWin64(CallingConv::ID CC) const
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
Value * getIRStackGuard(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
bool useSoftFloat() const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB, const LibcallLoweringInfo &Libcalls) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
EVT getOptimalMemOpType(LLVMContext &Context, const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
void insertSSPDeclarations(Module &M, const LibcallLoweringInfo &Libcalls) const override
Inserts necessary declarations for SSP (stack protection) purpose.
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
SDValue unwrapAddress(SDValue N) const override
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr char Align[]
Key for Kernel::Arg::Metadata::mAlign.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
CallingConv Namespace - This namespace contains an enum with a value for the well-known calling conve...
Definition CallingConv.h:21
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition ISDOpcodes.h:41
@ ADD
Simple integer binary arithmetic operators.
Definition ISDOpcodes.h:264
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition ISDOpcodes.h:853
@ GlobalAddress
Definition ISDOpcodes.h:88
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition ISDOpcodes.h:584
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition ISDOpcodes.h:993
@ SIGN_EXTEND
Conversion operators.
Definition ISDOpcodes.h:844
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition ISDOpcodes.h:665
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition ISDOpcodes.h:230
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition ISDOpcodes.h:576
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition ISDOpcodes.h:224
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition ISDOpcodes.h:850
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition ISDOpcodes.h:978
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition ISDOpcodes.h:53
@ ExternalSymbol
Definition ISDOpcodes.h:93
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition ISDOpcodes.h:959
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition ISDOpcodes.h:856
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition ISDOpcodes.h:62
LLVM_ABI LegalityPredicate isVector(unsigned TypeIdx)
True iff the specified type index is a vector.
@ MO_NO_FLAG
MO_NO_FLAG - No flag for the operand.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ POP_FROM_X87_REG
The same as ISD::CopyFromReg except that this node makes it explicit that it may lower to an x87 FPU ...
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
LLVM_ABI bool isCFGuardCall(const CallBase *CB)
Definition CFGuard.cpp:314
InstructionCost Cost
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isCFGuardFunction(const GlobalValue *GV)
Definition CFGuard.cpp:319
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
constexpr uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition Alignment.h:144
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition STLExtras.h:1970
LLVM_ABI EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition MCRegister.h:21
DWARFExpression::Operation Op
ArrayRef(const T &OneElt) -> ArrayRef< T >
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, Type *OrigTy, CCState &State)
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
constexpr uint64_t value() const
This is a hole in the type system and should not be abused.
Definition Alignment.h:77
static constexpr Align Constant()
Allow constructions of constexpr Align.
Definition Alignment.h:88
Extended Value Type.
Definition ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition ValueTypes.h:90
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition ValueTypes.h:403
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition ValueTypes.h:70
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition ValueTypes.h:308
ElementCount getVectorElementCount() const
Definition ValueTypes.h:358
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition ValueTypes.h:381
EVT changeVectorElementType(LLVMContext &Context, EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition ValueTypes.h:98
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition ValueTypes.h:324
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition ValueTypes.h:215
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition ValueTypes.h:225
bool isVector() const
Return true if this is a vector value type.
Definition ValueTypes.h:176
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition ValueTypes.h:220
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition ValueTypes.h:336
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition ValueTypes.h:344
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static LLVM_ABI MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static LLVM_ABI MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
Type * RetTy
Same as OrigRetTy, or partially legalized for soft float libcalls.