LLVM 20.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86CallingConv.h"
16#include "X86FrameLowering.h"
17#include "X86ISelLowering.h"
18#include "X86InstrBuilder.h"
20#include "X86TargetMachine.h"
21#include "X86TargetObjectFile.h"
22#include "llvm/ADT/Statistic.h"
28#include "llvm/IR/IRBuilder.h"
29#include "llvm/IR/Module.h"
30
31#define DEBUG_TYPE "x86-isel"
32
33using namespace llvm;
34
35STATISTIC(NumTailCalls, "Number of tail calls");
36
37/// Call this when the user attempts to do something unsupported, like
38/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
39/// report_fatal_error, so calling code should attempt to recover without
40/// crashing.
41static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
42 const char *Msg) {
44 DAG.getContext()->diagnose(
46}
47
48/// Returns true if a CC can dynamically exclude a register from the list of
49/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
50/// the return registers.
52 switch (CC) {
53 default:
54 return false;
58 return true;
59 }
60}
61
62/// Returns true if a CC can dynamically exclude a register from the list of
63/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
64/// the parameters.
67}
68
69static std::pair<MVT, unsigned>
71 const X86Subtarget &Subtarget) {
72 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
73 // convention is one that uses k registers.
74 if (NumElts == 2)
75 return {MVT::v2i64, 1};
76 if (NumElts == 4)
77 return {MVT::v4i32, 1};
78 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
80 return {MVT::v8i16, 1};
81 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
83 return {MVT::v16i8, 1};
84 // v32i1 passes in ymm unless we have BWI and the calling convention is
85 // regcall.
86 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
87 return {MVT::v32i8, 1};
88 // Split v64i1 vectors if we don't have v64i8 available.
89 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
90 if (Subtarget.useAVX512Regs())
91 return {MVT::v64i8, 1};
92 return {MVT::v32i8, 2};
93 }
94
95 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
96 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
97 NumElts > 64)
98 return {MVT::i8, NumElts};
99
101}
102
105 EVT VT) const {
106 if (VT.isVector()) {
107 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
108 unsigned NumElts = VT.getVectorNumElements();
109
110 MVT RegisterVT;
111 unsigned NumRegisters;
112 std::tie(RegisterVT, NumRegisters) =
113 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
114 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
115 return RegisterVT;
116 }
117
118 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
119 return MVT::v8f16;
120 }
121
122 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
123 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
124 !Subtarget.hasX87())
125 return MVT::i32;
126
127 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
128 return getRegisterTypeForCallingConv(Context, CC,
129 VT.changeVectorElementType(MVT::f16));
130
131 if (VT == MVT::bf16)
132 return MVT::f16;
133
135}
136
139 EVT VT) const {
140 if (VT.isVector()) {
141 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
142 unsigned NumElts = VT.getVectorNumElements();
143
144 MVT RegisterVT;
145 unsigned NumRegisters;
146 std::tie(RegisterVT, NumRegisters) =
147 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
148 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
149 return NumRegisters;
150 }
151
152 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
153 return 1;
154 }
155
156 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
157 // x87 is disabled.
158 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
159 if (VT == MVT::f64)
160 return 2;
161 if (VT == MVT::f80)
162 return 3;
163 }
164
165 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
166 return getNumRegistersForCallingConv(Context, CC,
167 VT.changeVectorElementType(MVT::f16));
168
170}
171
173 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
174 unsigned &NumIntermediates, MVT &RegisterVT) const {
175 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
176 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
177 Subtarget.hasAVX512() &&
179 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
180 VT.getVectorNumElements() > 64)) {
181 RegisterVT = MVT::i8;
182 IntermediateVT = MVT::i1;
183 NumIntermediates = VT.getVectorNumElements();
184 return NumIntermediates;
185 }
186
187 // Split v64i1 vectors if we don't have v64i8 available.
188 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
190 RegisterVT = MVT::v32i8;
191 IntermediateVT = MVT::v32i1;
192 NumIntermediates = 2;
193 return 2;
194 }
195
196 // Split vNbf16 vectors according to vNf16.
197 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
198 VT = VT.changeVectorElementType(MVT::f16);
199
200 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
201 NumIntermediates, RegisterVT);
202}
203
205 LLVMContext& Context,
206 EVT VT) const {
207 if (!VT.isVector())
208 return MVT::i8;
209
210 if (Subtarget.hasAVX512()) {
211 // Figure out what this type will be legalized to.
212 EVT LegalVT = VT;
213 while (getTypeAction(Context, LegalVT) != TypeLegal)
214 LegalVT = getTypeToTransformTo(Context, LegalVT);
215
216 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
217 if (LegalVT.getSimpleVT().is512BitVector())
218 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
219
220 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
221 // If we legalized to less than a 512-bit vector, then we will use a vXi1
222 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
223 // vXi16/vXi8.
224 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
225 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
226 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
227 }
228 }
229
231}
232
233/// Helper for getByValTypeAlignment to determine
234/// the desired ByVal argument alignment.
235static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
236 if (MaxAlign == 16)
237 return;
238 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
239 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
240 MaxAlign = Align(16);
241 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
242 Align EltAlign;
243 getMaxByValAlign(ATy->getElementType(), EltAlign);
244 if (EltAlign > MaxAlign)
245 MaxAlign = EltAlign;
246 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
247 for (auto *EltTy : STy->elements()) {
248 Align EltAlign;
249 getMaxByValAlign(EltTy, EltAlign);
250 if (EltAlign > MaxAlign)
251 MaxAlign = EltAlign;
252 if (MaxAlign == 16)
253 break;
254 }
255 }
256}
257
258/// Return the desired alignment for ByVal aggregate
259/// function arguments in the caller parameter area. For X86, aggregates
260/// that contain SSE vectors are placed at 16-byte boundaries while the rest
261/// are at 4-byte boundaries.
263 const DataLayout &DL) const {
264 if (Subtarget.is64Bit()) {
265 // Max of 8 and alignment of type.
266 Align TyAlign = DL.getABITypeAlign(Ty);
267 if (TyAlign > 8)
268 return TyAlign.value();
269 return 8;
270 }
271
272 Align Alignment(4);
273 if (Subtarget.hasSSE1())
274 getMaxByValAlign(Ty, Alignment);
275 return Alignment.value();
276}
277
278/// It returns EVT::Other if the type should be determined using generic
279/// target-independent logic.
280/// For vector ops we check that the overall size isn't larger than our
281/// preferred vector width.
283 const MemOp &Op, const AttributeList &FuncAttributes) const {
284 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
285 if (Op.size() >= 16 &&
286 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
287 // FIXME: Check if unaligned 64-byte accesses are slow.
288 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
289 (Subtarget.getPreferVectorWidth() >= 512)) {
290 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
291 }
292 // FIXME: Check if unaligned 32-byte accesses are slow.
293 if (Op.size() >= 32 && Subtarget.hasAVX() &&
294 Subtarget.useLight256BitInstructions()) {
295 // Although this isn't a well-supported type for AVX1, we'll let
296 // legalization and shuffle lowering produce the optimal codegen. If we
297 // choose an optimal type with a vector element larger than a byte,
298 // getMemsetStores() may create an intermediate splat (using an integer
299 // multiply) before we splat as a vector.
300 return MVT::v32i8;
301 }
302 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
303 return MVT::v16i8;
304 // TODO: Can SSE1 handle a byte vector?
305 // If we have SSE1 registers we should be able to use them.
306 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
307 (Subtarget.getPreferVectorWidth() >= 128))
308 return MVT::v4f32;
309 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
310 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
311 // Do not use f64 to lower memcpy if source is string constant. It's
312 // better to use i32 to avoid the loads.
313 // Also, do not use f64 to lower memset unless this is a memset of zeros.
314 // The gymnastics of splatting a byte value into an XMM register and then
315 // only using 8-byte stores (because this is a CPU with slow unaligned
316 // 16-byte accesses) makes that a loser.
317 return MVT::f64;
318 }
319 }
320 // This is a compromise. If we reach here, unaligned accesses may be slow on
321 // this target. However, creating smaller, aligned accesses could be even
322 // slower and would certainly be a lot more code.
323 if (Subtarget.is64Bit() && Op.size() >= 8)
324 return MVT::i64;
325 return MVT::i32;
326}
327
329 if (VT == MVT::f32)
330 return Subtarget.hasSSE1();
331 if (VT == MVT::f64)
332 return Subtarget.hasSSE2();
333 return true;
334}
335
336static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
337 return (8 * Alignment.value()) % SizeInBits == 0;
338}
339
341 if (isBitAligned(Alignment, VT.getSizeInBits()))
342 return true;
343 switch (VT.getSizeInBits()) {
344 default:
345 // 8-byte and under are always assumed to be fast.
346 return true;
347 case 128:
348 return !Subtarget.isUnalignedMem16Slow();
349 case 256:
350 return !Subtarget.isUnalignedMem32Slow();
351 // TODO: What about AVX-512 (512-bit) accesses?
352 }
353}
354
356 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
357 unsigned *Fast) const {
358 if (Fast)
359 *Fast = isMemoryAccessFast(VT, Alignment);
360 // NonTemporal vector memory ops must be aligned.
361 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
362 // NT loads can only be vector aligned, so if its less aligned than the
363 // minimum vector size (which we can split the vector down to), we might as
364 // well use a regular unaligned vector load.
365 // We don't have any NT loads pre-SSE41.
366 if (!!(Flags & MachineMemOperand::MOLoad))
367 return (Alignment < 16 || !Subtarget.hasSSE41());
368 return false;
369 }
370 // Misaligned accesses of any size are always allowed.
371 return true;
372}
373
375 const DataLayout &DL, EVT VT,
376 unsigned AddrSpace, Align Alignment,
378 unsigned *Fast) const {
379 if (Fast)
380 *Fast = isMemoryAccessFast(VT, Alignment);
381 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
382 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
383 /*Fast=*/nullptr))
384 return true;
385 // NonTemporal vector memory ops are special, and must be aligned.
386 if (!isBitAligned(Alignment, VT.getSizeInBits()))
387 return false;
388 switch (VT.getSizeInBits()) {
389 case 128:
390 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
391 return true;
392 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
393 return true;
394 return false;
395 case 256:
396 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
397 return true;
398 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
399 return true;
400 return false;
401 case 512:
402 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
403 return true;
404 return false;
405 default:
406 return false; // Don't have NonTemporal vector memory ops of this size.
407 }
408 }
409 return true;
410}
411
412/// Return the entry encoding for a jump table in the
413/// current function. The returned value is a member of the
414/// MachineJumpTableInfo::JTEntryKind enum.
416 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
417 // symbol.
418 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
420 if (isPositionIndependent() &&
422 !Subtarget.isTargetCOFF())
424
425 // Otherwise, use the normal jump table encoding heuristics.
427}
428
430 return Subtarget.useSoftFloat();
431}
432
434 ArgListTy &Args) const {
435
436 // Only relabel X86-32 for C / Stdcall CCs.
437 if (Subtarget.is64Bit())
438 return;
440 return;
441 unsigned ParamRegs = 0;
442 if (auto *M = MF->getFunction().getParent())
443 ParamRegs = M->getNumberRegisterParameters();
444
445 // Mark the first N int arguments as having reg
446 for (auto &Arg : Args) {
447 Type *T = Arg.Ty;
448 if (T->isIntOrPtrTy())
449 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
450 unsigned numRegs = 1;
451 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
452 numRegs = 2;
453 if (ParamRegs < numRegs)
454 return;
455 ParamRegs -= numRegs;
456 Arg.IsInReg = true;
457 }
458 }
459}
460
461const MCExpr *
463 const MachineBasicBlock *MBB,
464 unsigned uid,MCContext &Ctx) const{
466 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
467 // entries.
470}
471
472/// Returns relocation base for the given PIC jumptable.
474 SelectionDAG &DAG) const {
475 if (!Subtarget.is64Bit())
476 // This doesn't have SDLoc associated with it, but is not really the
477 // same as a Register.
478 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
480 return Table;
481}
482
483/// This returns the relocation base for the given PIC jumptable,
484/// the same as getPICJumpTableRelocBase, but as an MCExpr.
486getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
487 MCContext &Ctx) const {
488 // X86-64 uses RIP relative addressing based on the jump table label.
489 if (Subtarget.isPICStyleRIPRel() ||
490 (Subtarget.is64Bit() &&
493
494 // Otherwise, the reference is relative to the PIC base.
495 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
496}
497
498std::pair<const TargetRegisterClass *, uint8_t>
500 MVT VT) const {
501 const TargetRegisterClass *RRC = nullptr;
502 uint8_t Cost = 1;
503 switch (VT.SimpleTy) {
504 default:
506 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
507 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
508 break;
509 case MVT::x86mmx:
510 RRC = &X86::VR64RegClass;
511 break;
512 case MVT::f32: case MVT::f64:
513 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
514 case MVT::v4f32: case MVT::v2f64:
515 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
516 case MVT::v8f32: case MVT::v4f64:
517 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
518 case MVT::v16f32: case MVT::v8f64:
519 RRC = &X86::VR128XRegClass;
520 break;
521 }
522 return std::make_pair(RRC, Cost);
523}
524
525unsigned X86TargetLowering::getAddressSpace() const {
526 if (Subtarget.is64Bit())
527 return (getTargetMachine().getCodeModel() == CodeModel::Kernel) ? 256 : 257;
528 return 256;
529}
530
531static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
532 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
533 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
534}
535
537 int Offset, unsigned AddressSpace) {
539 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
541}
542
544 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
545 // tcbhead_t; use it instead of the usual global variable (see
546 // sysdeps/{i386,x86_64}/nptl/tls.h)
547 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
548 unsigned AddressSpace = getAddressSpace();
549
550 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
551 if (Subtarget.isTargetFuchsia())
552 return SegmentOffset(IRB, 0x10, AddressSpace);
553
554 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
555 // Specially, some users may customize the base reg and offset.
556 int Offset = M->getStackProtectorGuardOffset();
557 // If we don't set -stack-protector-guard-offset value:
558 // %fs:0x28, unless we're using a Kernel code model, in which case
559 // it's %gs:0x28. gs:0x14 on i386.
560 if (Offset == INT_MAX)
561 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
562
563 StringRef GuardReg = M->getStackProtectorGuardReg();
564 if (GuardReg == "fs")
566 else if (GuardReg == "gs")
568
569 // Use symbol guard if user specify.
570 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
571 if (!GuardSymb.empty()) {
572 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
573 if (!GV) {
574 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
575 : Type::getInt32Ty(M->getContext());
576 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
577 nullptr, GuardSymb, nullptr,
579 if (!Subtarget.isTargetDarwin())
580 GV->setDSOLocal(M->getDirectAccessExternalData());
581 }
582 return GV;
583 }
584
585 return SegmentOffset(IRB, Offset, AddressSpace);
586 }
588}
589
591 // MSVC CRT provides functionalities for stack protection.
592 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
594 // MSVC CRT has a global variable holding security cookie.
595 M.getOrInsertGlobal("__security_cookie",
596 PointerType::getUnqual(M.getContext()));
597
598 // MSVC CRT has a function to validate security cookie.
599 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
600 "__security_check_cookie", Type::getVoidTy(M.getContext()),
601 PointerType::getUnqual(M.getContext()));
602 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
603 F->setCallingConv(CallingConv::X86_FastCall);
604 F->addParamAttr(0, Attribute::AttrKind::InReg);
605 }
606 return;
607 }
608
609 StringRef GuardMode = M.getStackProtectorGuard();
610
611 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
612 if ((GuardMode == "tls" || GuardMode.empty()) &&
614 return;
616}
617
619 // MSVC CRT has a global variable holding security cookie.
620 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
622 return M.getGlobalVariable("__security_cookie");
623 }
625}
626
628 // MSVC CRT has a function to validate security cookie.
629 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
631 return M.getFunction("__security_check_cookie");
632 }
634}
635
636Value *
638 // Android provides a fixed TLS slot for the SafeStack pointer. See the
639 // definition of TLS_SLOT_SAFESTACK in
640 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
641 if (Subtarget.isTargetAndroid()) {
642 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
643 // %gs:0x24 on i386
644 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
645 return SegmentOffset(IRB, Offset, getAddressSpace());
646 }
647
648 // Fuchsia is similar.
649 if (Subtarget.isTargetFuchsia()) {
650 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
651 return SegmentOffset(IRB, 0x18, getAddressSpace());
652 }
653
655}
656
657//===----------------------------------------------------------------------===//
658// Return Value Calling Convention Implementation
659//===----------------------------------------------------------------------===//
660
661bool X86TargetLowering::CanLowerReturn(
662 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
663 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context) const {
665 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
666 return CCInfo.CheckReturn(Outs, RetCC_X86);
667}
668
669const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
670 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
671 return ScratchRegs;
672}
673
674ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
675 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
676 return RCRegs;
677}
678
679/// Lowers masks values (v*i1) to the local register values
680/// \returns DAG node after lowering to register type
681static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
682 const SDLoc &DL, SelectionDAG &DAG) {
683 EVT ValVT = ValArg.getValueType();
684
685 if (ValVT == MVT::v1i1)
686 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
687 DAG.getIntPtrConstant(0, DL));
688
689 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
690 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
691 // Two stage lowering might be required
692 // bitcast: v8i1 -> i8 / v16i1 -> i16
693 // anyextend: i8 -> i32 / i16 -> i32
694 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
695 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
696 if (ValLoc == MVT::i32)
697 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
698 return ValToCopy;
699 }
700
701 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
702 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
703 // One stage lowering is required
704 // bitcast: v32i1 -> i32 / v64i1 -> i64
705 return DAG.getBitcast(ValLoc, ValArg);
706 }
707
708 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
709}
710
711/// Breaks v64i1 value into two registers and adds the new node to the DAG
713 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
714 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
715 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
716 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
717 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
718 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
719 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
720 "The value should reside in two registers");
721
722 // Before splitting the value we cast it to i64
723 Arg = DAG.getBitcast(MVT::i64, Arg);
724
725 // Splitting the value into two i32 types
726 SDValue Lo, Hi;
727 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
728
729 // Attach the two i32 types into corresponding registers
730 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
731 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
732}
733
735X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
736 bool isVarArg,
738 const SmallVectorImpl<SDValue> &OutVals,
739 const SDLoc &dl, SelectionDAG &DAG) const {
742
743 // In some cases we need to disable registers from the default CSR list.
744 // For example, when they are used as return registers (preserve_* and X86's
745 // regcall) or for argument passing (X86's regcall).
746 bool ShouldDisableCalleeSavedRegister =
747 shouldDisableRetRegFromCSR(CallConv) ||
748 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
749
750 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
751 report_fatal_error("X86 interrupts may not return any value");
752
754 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
755 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
756
758 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
759 ++I, ++OutsIndex) {
760 CCValAssign &VA = RVLocs[I];
761 assert(VA.isRegLoc() && "Can only return in registers!");
762
763 // Add the register to the CalleeSaveDisableRegs list.
764 if (ShouldDisableCalleeSavedRegister)
766
767 SDValue ValToCopy = OutVals[OutsIndex];
768 EVT ValVT = ValToCopy.getValueType();
769
770 // Promote values to the appropriate types.
771 if (VA.getLocInfo() == CCValAssign::SExt)
772 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
773 else if (VA.getLocInfo() == CCValAssign::ZExt)
774 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
775 else if (VA.getLocInfo() == CCValAssign::AExt) {
776 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
777 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
778 else
779 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
780 }
781 else if (VA.getLocInfo() == CCValAssign::BCvt)
782 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
783
785 "Unexpected FP-extend for return value.");
786
787 // Report an error if we have attempted to return a value via an XMM
788 // register and SSE was disabled.
789 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
790 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
791 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
792 } else if (!Subtarget.hasSSE2() &&
793 X86::FR64XRegClass.contains(VA.getLocReg()) &&
794 ValVT == MVT::f64) {
795 // When returning a double via an XMM register, report an error if SSE2 is
796 // not enabled.
797 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
798 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
799 }
800
801 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
802 // the RET instruction and handled by the FP Stackifier.
803 if (VA.getLocReg() == X86::FP0 ||
804 VA.getLocReg() == X86::FP1) {
805 // If this is a copy from an xmm register to ST(0), use an FPExtend to
806 // change the value to the FP stack register class.
808 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
809 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
810 // Don't emit a copytoreg.
811 continue;
812 }
813
814 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
815 // which is returned in RAX / RDX.
816 if (Subtarget.is64Bit()) {
817 if (ValVT == MVT::x86mmx) {
818 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
819 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
820 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
821 ValToCopy);
822 // If we don't have SSE2 available, convert to v4f32 so the generated
823 // register is legal.
824 if (!Subtarget.hasSSE2())
825 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
826 }
827 }
828 }
829
830 if (VA.needsCustom()) {
831 assert(VA.getValVT() == MVT::v64i1 &&
832 "Currently the only custom case is when we split v64i1 to 2 regs");
833
834 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
835 Subtarget);
836
837 // Add the second register to the CalleeSaveDisableRegs list.
838 if (ShouldDisableCalleeSavedRegister)
839 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
840 } else {
841 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
842 }
843 }
844
845 SDValue Glue;
847 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
848 // Operand #1 = Bytes To Pop
849 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
850 MVT::i32));
851
852 // Copy the result values into the output registers.
853 for (auto &RetVal : RetVals) {
854 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
855 RetOps.push_back(RetVal.second);
856 continue; // Don't emit a copytoreg.
857 }
858
859 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
860 Glue = Chain.getValue(1);
861 RetOps.push_back(
862 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
863 }
864
865 // Swift calling convention does not require we copy the sret argument
866 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
867
868 // All x86 ABIs require that for returning structs by value we copy
869 // the sret argument into %rax/%eax (depending on ABI) for the return.
870 // We saved the argument into a virtual register in the entry block,
871 // so now we copy the value out and into %rax/%eax.
872 //
873 // Checking Function.hasStructRetAttr() here is insufficient because the IR
874 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
875 // false, then an sret argument may be implicitly inserted in the SelDAG. In
876 // either case FuncInfo->setSRetReturnReg() will have been called.
877 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
878 // When we have both sret and another return value, we should use the
879 // original Chain stored in RetOps[0], instead of the current Chain updated
880 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
881
882 // For the case of sret and another return value, we have
883 // Chain_0 at the function entry
884 // Chain_1 = getCopyToReg(Chain_0) in the above loop
885 // If we use Chain_1 in getCopyFromReg, we will have
886 // Val = getCopyFromReg(Chain_1)
887 // Chain_2 = getCopyToReg(Chain_1, Val) from below
888
889 // getCopyToReg(Chain_0) will be glued together with
890 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
891 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
892 // Data dependency from Unit B to Unit A due to usage of Val in
893 // getCopyToReg(Chain_1, Val)
894 // Chain dependency from Unit A to Unit B
895
896 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
897 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
899
900 Register RetValReg
901 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
902 X86::RAX : X86::EAX;
903 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
904 Glue = Chain.getValue(1);
905
906 // RAX/EAX now acts like a return value.
907 RetOps.push_back(
908 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
909
910 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
911 // this however for preserve_most/preserve_all to minimize the number of
912 // callee-saved registers for these CCs.
913 if (ShouldDisableCalleeSavedRegister &&
914 CallConv != CallingConv::PreserveAll &&
915 CallConv != CallingConv::PreserveMost)
917 }
918
919 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
920 const MCPhysReg *I =
921 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
922 if (I) {
923 for (; *I; ++I) {
924 if (X86::GR64RegClass.contains(*I))
925 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
926 else
927 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
928 }
929 }
930
931 RetOps[0] = Chain; // Update chain.
932
933 // Add the glue if we have it.
934 if (Glue.getNode())
935 RetOps.push_back(Glue);
936
938 if (CallConv == CallingConv::X86_INTR)
939 opcode = X86ISD::IRET;
940 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
941}
942
943bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
944 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
945 return false;
946
947 SDValue TCChain = Chain;
948 SDNode *Copy = *N->use_begin();
949 if (Copy->getOpcode() == ISD::CopyToReg) {
950 // If the copy has a glue operand, we conservatively assume it isn't safe to
951 // perform a tail call.
952 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
953 return false;
954 TCChain = Copy->getOperand(0);
955 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
956 return false;
957
958 bool HasRet = false;
959 for (const SDNode *U : Copy->uses()) {
960 if (U->getOpcode() != X86ISD::RET_GLUE)
961 return false;
962 // If we are returning more than one value, we can definitely
963 // not make a tail call see PR19530
964 if (U->getNumOperands() > 4)
965 return false;
966 if (U->getNumOperands() == 4 &&
967 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
968 return false;
969 HasRet = true;
970 }
971
972 if (!HasRet)
973 return false;
974
975 Chain = TCChain;
976 return true;
977}
978
979EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
980 ISD::NodeType ExtendKind) const {
981 MVT ReturnMVT = MVT::i32;
982
983 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
984 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
985 // The ABI does not require i1, i8 or i16 to be extended.
986 //
987 // On Darwin, there is code in the wild relying on Clang's old behaviour of
988 // always extending i8/i16 return values, so keep doing that for now.
989 // (PR26665).
990 ReturnMVT = MVT::i8;
991 }
992
993 EVT MinVT = getRegisterType(Context, ReturnMVT);
994 return VT.bitsLT(MinVT) ? MinVT : VT;
995}
996
997/// Reads two 32 bit registers and creates a 64 bit mask value.
998/// \param VA The current 32 bit value that need to be assigned.
999/// \param NextVA The next 32 bit value that need to be assigned.
1000/// \param Root The parent DAG node.
1001/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1002/// glue purposes. In the case the DAG is already using
1003/// physical register instead of virtual, we should glue
1004/// our new SDValue to InGlue SDvalue.
1005/// \return a new SDvalue of size 64bit.
1007 SDValue &Root, SelectionDAG &DAG,
1008 const SDLoc &DL, const X86Subtarget &Subtarget,
1009 SDValue *InGlue = nullptr) {
1010 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1011 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1012 assert(VA.getValVT() == MVT::v64i1 &&
1013 "Expecting first location of 64 bit width type");
1014 assert(NextVA.getValVT() == VA.getValVT() &&
1015 "The locations should have the same type");
1016 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1017 "The values should reside in two registers");
1018
1019 SDValue Lo, Hi;
1020 SDValue ArgValueLo, ArgValueHi;
1021
1023 const TargetRegisterClass *RC = &X86::GR32RegClass;
1024
1025 // Read a 32 bit value from the registers.
1026 if (nullptr == InGlue) {
1027 // When no physical register is present,
1028 // create an intermediate virtual register.
1029 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1030 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1031 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1032 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1033 } else {
1034 // When a physical register is available read the value from it and glue
1035 // the reads together.
1036 ArgValueLo =
1037 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1038 *InGlue = ArgValueLo.getValue(2);
1039 ArgValueHi =
1040 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1041 *InGlue = ArgValueHi.getValue(2);
1042 }
1043
1044 // Convert the i32 type into v32i1 type.
1045 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1046
1047 // Convert the i32 type into v32i1 type.
1048 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1049
1050 // Concatenate the two values together.
1051 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1052}
1053
1054/// The function will lower a register of various sizes (8/16/32/64)
1055/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1056/// \returns a DAG node contains the operand after lowering to mask type.
1057static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1058 const EVT &ValLoc, const SDLoc &DL,
1059 SelectionDAG &DAG) {
1060 SDValue ValReturned = ValArg;
1061
1062 if (ValVT == MVT::v1i1)
1063 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1064
1065 if (ValVT == MVT::v64i1) {
1066 // In 32 bit machine, this case is handled by getv64i1Argument
1067 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1068 // In 64 bit machine, There is no need to truncate the value only bitcast
1069 } else {
1070 MVT MaskLenVT;
1071 switch (ValVT.getSimpleVT().SimpleTy) {
1072 case MVT::v8i1:
1073 MaskLenVT = MVT::i8;
1074 break;
1075 case MVT::v16i1:
1076 MaskLenVT = MVT::i16;
1077 break;
1078 case MVT::v32i1:
1079 MaskLenVT = MVT::i32;
1080 break;
1081 default:
1082 llvm_unreachable("Expecting a vector of i1 types");
1083 }
1084
1085 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1086 }
1087 return DAG.getBitcast(ValVT, ValReturned);
1088}
1089
1090/// Lower the result values of a call into the
1091/// appropriate copies out of appropriate physical registers.
1092///
1093SDValue X86TargetLowering::LowerCallResult(
1094 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1095 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1097 uint32_t *RegMask) const {
1098
1099 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1100 // Assign locations to each value returned by this call.
1102 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1103 *DAG.getContext());
1104 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1105
1106 // Copy all of the result registers out of their specified physreg.
1107 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1108 ++I, ++InsIndex) {
1109 CCValAssign &VA = RVLocs[I];
1110 EVT CopyVT = VA.getLocVT();
1111
1112 // In some calling conventions we need to remove the used registers
1113 // from the register mask.
1114 if (RegMask) {
1115 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1116 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1117 }
1118
1119 // Report an error if there was an attempt to return FP values via XMM
1120 // registers.
1121 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1122 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1123 if (VA.getLocReg() == X86::XMM1)
1124 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1125 else
1126 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1127 } else if (!Subtarget.hasSSE2() &&
1128 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1129 CopyVT == MVT::f64) {
1130 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1131 if (VA.getLocReg() == X86::XMM1)
1132 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1133 else
1134 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1135 }
1136
1137 // If we prefer to use the value in xmm registers, copy it out as f80 and
1138 // use a truncate to move it from fp stack reg to xmm reg.
1139 bool RoundAfterCopy = false;
1140 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1142 if (!Subtarget.hasX87())
1143 report_fatal_error("X87 register return with X87 disabled");
1144 CopyVT = MVT::f80;
1145 RoundAfterCopy = (CopyVT != VA.getLocVT());
1146 }
1147
1148 SDValue Val;
1149 if (VA.needsCustom()) {
1150 assert(VA.getValVT() == MVT::v64i1 &&
1151 "Currently the only custom case is when we split v64i1 to 2 regs");
1152 Val =
1153 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1154 } else {
1155 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1156 .getValue(1);
1157 Val = Chain.getValue(0);
1158 InGlue = Chain.getValue(2);
1159 }
1160
1161 if (RoundAfterCopy)
1162 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1163 // This truncation won't change the value.
1164 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1165
1166 if (VA.isExtInLoc()) {
1167 if (VA.getValVT().isVector() &&
1168 VA.getValVT().getScalarType() == MVT::i1 &&
1169 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1170 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1171 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1172 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1173 } else
1174 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1175 }
1176
1177 if (VA.getLocInfo() == CCValAssign::BCvt)
1178 Val = DAG.getBitcast(VA.getValVT(), Val);
1179
1180 InVals.push_back(Val);
1181 }
1182
1183 return Chain;
1184}
1185
1186//===----------------------------------------------------------------------===//
1187// C & StdCall & Fast Calling Convention implementation
1188//===----------------------------------------------------------------------===//
1189// StdCall calling convention seems to be standard for many Windows' API
1190// routines and around. It differs from C calling convention just a little:
1191// callee should clean up the stack, not caller. Symbols should be also
1192// decorated in some fancy way :) It doesn't support any vector arguments.
1193// For info on fast calling convention see Fast Calling Convention (tail call)
1194// implementation LowerX86_32FastCCCallTo.
1195
1196/// Determines whether Args, either a set of outgoing arguments to a call, or a
1197/// set of incoming args of a call, contains an sret pointer that the callee
1198/// pops
1199template <typename T>
1200static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1201 const X86Subtarget &Subtarget) {
1202 // Not C++20 (yet), so no concepts available.
1203 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1204 std::is_same_v<T, ISD::InputArg>,
1205 "requires ISD::OutputArg or ISD::InputArg");
1206
1207 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1208 // for most compilations.
1209 if (!Subtarget.is32Bit())
1210 return false;
1211
1212 if (Args.empty())
1213 return false;
1214
1215 // Most calls do not have an sret argument, check the arg next.
1216 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1217 if (!Flags.isSRet() || Flags.isInReg())
1218 return false;
1219
1220 // The MSVCabi does not pop the sret.
1221 if (Subtarget.getTargetTriple().isOSMSVCRT())
1222 return false;
1223
1224 // MCUs don't pop the sret
1225 if (Subtarget.isTargetMCU())
1226 return false;
1227
1228 // Callee pops argument
1229 return true;
1230}
1231
1232/// Make a copy of an aggregate at address specified by "Src" to address
1233/// "Dst" with size and alignment information specified by the specific
1234/// parameter attribute. The copy will be passed as a byval function parameter.
1236 SDValue Chain, ISD::ArgFlagsTy Flags,
1237 SelectionDAG &DAG, const SDLoc &dl) {
1238 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1239
1240 return DAG.getMemcpy(
1241 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1242 /*isVolatile*/ false, /*AlwaysInline=*/true,
1243 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1244}
1245
1246/// Return true if the calling convention is one that we can guarantee TCO for.
1248 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1251}
1252
1253/// Return true if we might ever do TCO for calls with this calling convention.
1255 switch (CC) {
1256 // C calling conventions:
1257 case CallingConv::C:
1258 case CallingConv::Win64:
1261 // Callee pop conventions:
1266 // Swift:
1267 case CallingConv::Swift:
1268 return true;
1269 default:
1270 return canGuaranteeTCO(CC);
1271 }
1272}
1273
1274/// Return true if the function is being made into a tailcall target by
1275/// changing its ABI.
1276static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1277 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1279}
1280
1281bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1282 if (!CI->isTailCall())
1283 return false;
1284
1285 CallingConv::ID CalleeCC = CI->getCallingConv();
1286 if (!mayTailCallThisCC(CalleeCC))
1287 return false;
1288
1289 return true;
1290}
1291
1292SDValue
1293X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1295 const SDLoc &dl, SelectionDAG &DAG,
1296 const CCValAssign &VA,
1297 MachineFrameInfo &MFI, unsigned i) const {
1298 // Create the nodes corresponding to a load from this parameter slot.
1299 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1300 bool AlwaysUseMutable = shouldGuaranteeTCO(
1301 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1302 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1303 EVT ValVT;
1304 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1305
1306 // If value is passed by pointer we have address passed instead of the value
1307 // itself. No need to extend if the mask value and location share the same
1308 // absolute size.
1309 bool ExtendedInMem =
1310 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1312
1313 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1314 ValVT = VA.getLocVT();
1315 else
1316 ValVT = VA.getValVT();
1317
1318 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1319 // changed with more analysis.
1320 // In case of tail call optimization mark all arguments mutable. Since they
1321 // could be overwritten by lowering of arguments in case of a tail call.
1322 if (Flags.isByVal()) {
1323 unsigned Bytes = Flags.getByValSize();
1324 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1325
1326 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1327 // can be improved with deeper analysis.
1328 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1329 /*isAliased=*/true);
1330 return DAG.getFrameIndex(FI, PtrVT);
1331 }
1332
1333 EVT ArgVT = Ins[i].ArgVT;
1334
1335 // If this is a vector that has been split into multiple parts, don't elide
1336 // the copy. The layout on the stack may not match the packed in-memory
1337 // layout.
1338 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1339
1340 // This is an argument in memory. We might be able to perform copy elision.
1341 // If the argument is passed directly in memory without any extension, then we
1342 // can perform copy elision. Large vector types, for example, may be passed
1343 // indirectly by pointer.
1344 if (Flags.isCopyElisionCandidate() &&
1345 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1346 !ScalarizedVector) {
1347 SDValue PartAddr;
1348 if (Ins[i].PartOffset == 0) {
1349 // If this is a one-part value or the first part of a multi-part value,
1350 // create a stack object for the entire argument value type and return a
1351 // load from our portion of it. This assumes that if the first part of an
1352 // argument is in memory, the rest will also be in memory.
1353 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1354 /*IsImmutable=*/false);
1355 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1356 return DAG.getLoad(
1357 ValVT, dl, Chain, PartAddr,
1359 }
1360
1361 // This is not the first piece of an argument in memory. See if there is
1362 // already a fixed stack object including this offset. If so, assume it
1363 // was created by the PartOffset == 0 branch above and create a load from
1364 // the appropriate offset into it.
1365 int64_t PartBegin = VA.getLocMemOffset();
1366 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1367 int FI = MFI.getObjectIndexBegin();
1368 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1369 int64_t ObjBegin = MFI.getObjectOffset(FI);
1370 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1371 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1372 break;
1373 }
1374 if (MFI.isFixedObjectIndex(FI)) {
1375 SDValue Addr =
1376 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1377 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1378 return DAG.getLoad(ValVT, dl, Chain, Addr,
1380 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1381 }
1382 }
1383
1384 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1385 VA.getLocMemOffset(), isImmutable);
1386
1387 // Set SExt or ZExt flag.
1388 if (VA.getLocInfo() == CCValAssign::ZExt) {
1389 MFI.setObjectZExt(FI, true);
1390 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1391 MFI.setObjectSExt(FI, true);
1392 }
1393
1394 MaybeAlign Alignment;
1395 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1396 ValVT != MVT::f80)
1397 Alignment = MaybeAlign(4);
1398 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1399 SDValue Val = DAG.getLoad(
1400 ValVT, dl, Chain, FIN,
1402 Alignment);
1403 return ExtendedInMem
1404 ? (VA.getValVT().isVector()
1405 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1406 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1407 : Val;
1408}
1409
1410// FIXME: Get this from tablegen.
1412 const X86Subtarget &Subtarget) {
1413 assert(Subtarget.is64Bit());
1414
1415 if (Subtarget.isCallingConvWin64(CallConv)) {
1416 static const MCPhysReg GPR64ArgRegsWin64[] = {
1417 X86::RCX, X86::RDX, X86::R8, X86::R9
1418 };
1419 return ArrayRef(std::begin(GPR64ArgRegsWin64), std::end(GPR64ArgRegsWin64));
1420 }
1421
1422 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1423 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1424 };
1425 return ArrayRef(std::begin(GPR64ArgRegs64Bit), std::end(GPR64ArgRegs64Bit));
1426}
1427
1428// FIXME: Get this from tablegen.
1430 CallingConv::ID CallConv,
1431 const X86Subtarget &Subtarget) {
1432 assert(Subtarget.is64Bit());
1433 if (Subtarget.isCallingConvWin64(CallConv)) {
1434 // The XMM registers which might contain var arg parameters are shadowed
1435 // in their paired GPR. So we only need to save the GPR to their home
1436 // slots.
1437 // TODO: __vectorcall will change this.
1438 return std::nullopt;
1439 }
1440
1441 bool isSoftFloat = Subtarget.useSoftFloat();
1442 if (isSoftFloat || !Subtarget.hasSSE1())
1443 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1444 // registers.
1445 return std::nullopt;
1446
1447 static const MCPhysReg XMMArgRegs64Bit[] = {
1448 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1449 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1450 };
1451 return ArrayRef(std::begin(XMMArgRegs64Bit), std::end(XMMArgRegs64Bit));
1452}
1453
1454#ifndef NDEBUG
1456 return llvm::is_sorted(
1457 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1458 return A.getValNo() < B.getValNo();
1459 });
1460}
1461#endif
1462
1463namespace {
1464/// This is a helper class for lowering variable arguments parameters.
1465class VarArgsLoweringHelper {
1466public:
1467 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1468 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1469 CallingConv::ID CallConv, CCState &CCInfo)
1470 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1471 TheMachineFunction(DAG.getMachineFunction()),
1472 TheFunction(TheMachineFunction.getFunction()),
1473 FrameInfo(TheMachineFunction.getFrameInfo()),
1474 FrameLowering(*Subtarget.getFrameLowering()),
1475 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1476 CCInfo(CCInfo) {}
1477
1478 // Lower variable arguments parameters.
1479 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1480
1481private:
1482 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1483
1484 void forwardMustTailParameters(SDValue &Chain);
1485
1486 bool is64Bit() const { return Subtarget.is64Bit(); }
1487 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1488
1489 X86MachineFunctionInfo *FuncInfo;
1490 const SDLoc &DL;
1491 SelectionDAG &DAG;
1492 const X86Subtarget &Subtarget;
1493 MachineFunction &TheMachineFunction;
1494 const Function &TheFunction;
1495 MachineFrameInfo &FrameInfo;
1496 const TargetFrameLowering &FrameLowering;
1497 const TargetLowering &TargLowering;
1498 CallingConv::ID CallConv;
1499 CCState &CCInfo;
1500};
1501} // namespace
1502
1503void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1504 SDValue &Chain, unsigned StackSize) {
1505 // If the function takes variable number of arguments, make a frame index for
1506 // the start of the first vararg value... for expansion of llvm.va_start. We
1507 // can skip this if there are no va_start calls.
1508 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1509 CallConv != CallingConv::X86_ThisCall)) {
1510 FuncInfo->setVarArgsFrameIndex(
1511 FrameInfo.CreateFixedObject(1, StackSize, true));
1512 }
1513
1514 // 64-bit calling conventions support varargs and register parameters, so we
1515 // have to do extra work to spill them in the prologue.
1516 if (is64Bit()) {
1517 // Find the first unallocated argument registers.
1518 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1519 ArrayRef<MCPhysReg> ArgXMMs =
1520 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1521 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1522 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1523
1524 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1525 "SSE register cannot be used when SSE is disabled!");
1526
1527 if (isWin64()) {
1528 // Get to the caller-allocated home save location. Add 8 to account
1529 // for the return address.
1530 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1531 FuncInfo->setRegSaveFrameIndex(
1532 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1533 // Fixup to set vararg frame on shadow area (4 x i64).
1534 if (NumIntRegs < 4)
1535 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1536 } else {
1537 // For X86-64, if there are vararg parameters that are passed via
1538 // registers, then we must store them to their spots on the stack so
1539 // they may be loaded by dereferencing the result of va_next.
1540 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1541 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1542 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1543 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1544 }
1545
1547 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1548 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1549 // keeping live input value
1550 SDValue ALVal; // if applicable keeps SDValue for %al register
1551
1552 // Gather all the live in physical registers.
1553 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1554 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1555 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1556 }
1557 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1558 if (!AvailableXmms.empty()) {
1559 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1560 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1561 for (MCPhysReg Reg : AvailableXmms) {
1562 // FastRegisterAllocator spills virtual registers at basic
1563 // block boundary. That leads to usages of xmm registers
1564 // outside of check for %al. Pass physical registers to
1565 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1566 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1567 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1568 }
1569 }
1570
1571 // Store the integer parameter registers.
1573 SDValue RSFIN =
1574 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1575 TargLowering.getPointerTy(DAG.getDataLayout()));
1576 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1577 for (SDValue Val : LiveGPRs) {
1578 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1579 TargLowering.getPointerTy(DAG.getDataLayout()),
1580 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1581 SDValue Store =
1582 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1584 DAG.getMachineFunction(),
1585 FuncInfo->getRegSaveFrameIndex(), Offset));
1586 MemOps.push_back(Store);
1587 Offset += 8;
1588 }
1589
1590 // Now store the XMM (fp + vector) parameter registers.
1591 if (!LiveXMMRegs.empty()) {
1592 SmallVector<SDValue, 12> SaveXMMOps;
1593 SaveXMMOps.push_back(Chain);
1594 SaveXMMOps.push_back(ALVal);
1595 SaveXMMOps.push_back(RSFIN);
1596 SaveXMMOps.push_back(
1597 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1598 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1599 MachineMemOperand *StoreMMO =
1602 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1603 Offset),
1606 DL, DAG.getVTList(MVT::Other),
1607 SaveXMMOps, MVT::i8, StoreMMO));
1608 }
1609
1610 if (!MemOps.empty())
1611 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1612 }
1613}
1614
1615void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1616 // Find the largest legal vector type.
1617 MVT VecVT = MVT::Other;
1618 // FIXME: Only some x86_32 calling conventions support AVX512.
1619 if (Subtarget.useAVX512Regs() &&
1620 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1621 CallConv == CallingConv::Intel_OCL_BI)))
1622 VecVT = MVT::v16f32;
1623 else if (Subtarget.hasAVX())
1624 VecVT = MVT::v8f32;
1625 else if (Subtarget.hasSSE2())
1626 VecVT = MVT::v4f32;
1627
1628 // We forward some GPRs and some vector types.
1629 SmallVector<MVT, 2> RegParmTypes;
1630 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1631 RegParmTypes.push_back(IntVT);
1632 if (VecVT != MVT::Other)
1633 RegParmTypes.push_back(VecVT);
1634
1635 // Compute the set of forwarded registers. The rest are scratch.
1637 FuncInfo->getForwardedMustTailRegParms();
1638 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1639
1640 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1641 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1642 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1643 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1644 }
1645
1646 // Copy all forwards from physical to virtual registers.
1647 for (ForwardedRegister &FR : Forwards) {
1648 // FIXME: Can we use a less constrained schedule?
1649 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1650 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1651 TargLowering.getRegClassFor(FR.VT));
1652 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1653 }
1654}
1655
1656void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1657 unsigned StackSize) {
1658 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1659 // If necessary, it would be set into the correct value later.
1660 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1661 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1662
1663 if (FrameInfo.hasVAStart())
1664 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1665
1666 if (FrameInfo.hasMustTailInVarArgFunc())
1667 forwardMustTailParameters(Chain);
1668}
1669
1670SDValue X86TargetLowering::LowerFormalArguments(
1671 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1672 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1673 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1676
1677 const Function &F = MF.getFunction();
1678 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1679 F.getName() == "main")
1680 FuncInfo->setForceFramePointer(true);
1681
1682 MachineFrameInfo &MFI = MF.getFrameInfo();
1683 bool Is64Bit = Subtarget.is64Bit();
1684 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1685
1686 assert(
1687 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1688 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1689
1690 // Assign locations to all of the incoming arguments.
1692 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1693
1694 // Allocate shadow area for Win64.
1695 if (IsWin64)
1696 CCInfo.AllocateStack(32, Align(8));
1697
1698 CCInfo.AnalyzeArguments(Ins, CC_X86);
1699
1700 // In vectorcall calling convention a second pass is required for the HVA
1701 // types.
1702 if (CallingConv::X86_VectorCall == CallConv) {
1703 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1704 }
1705
1706 // The next loop assumes that the locations are in the same order of the
1707 // input arguments.
1708 assert(isSortedByValueNo(ArgLocs) &&
1709 "Argument Location list must be sorted before lowering");
1710
1711 SDValue ArgValue;
1712 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1713 ++I, ++InsIndex) {
1714 assert(InsIndex < Ins.size() && "Invalid Ins index");
1715 CCValAssign &VA = ArgLocs[I];
1716
1717 if (VA.isRegLoc()) {
1718 EVT RegVT = VA.getLocVT();
1719 if (VA.needsCustom()) {
1720 assert(
1721 VA.getValVT() == MVT::v64i1 &&
1722 "Currently the only custom case is when we split v64i1 to 2 regs");
1723
1724 // v64i1 values, in regcall calling convention, that are
1725 // compiled to 32 bit arch, are split up into two registers.
1726 ArgValue =
1727 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1728 } else {
1729 const TargetRegisterClass *RC;
1730 if (RegVT == MVT::i8)
1731 RC = &X86::GR8RegClass;
1732 else if (RegVT == MVT::i16)
1733 RC = &X86::GR16RegClass;
1734 else if (RegVT == MVT::i32)
1735 RC = &X86::GR32RegClass;
1736 else if (Is64Bit && RegVT == MVT::i64)
1737 RC = &X86::GR64RegClass;
1738 else if (RegVT == MVT::f16)
1739 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1740 else if (RegVT == MVT::f32)
1741 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1742 else if (RegVT == MVT::f64)
1743 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1744 else if (RegVT == MVT::f80)
1745 RC = &X86::RFP80RegClass;
1746 else if (RegVT == MVT::f128)
1747 RC = &X86::VR128RegClass;
1748 else if (RegVT.is512BitVector())
1749 RC = &X86::VR512RegClass;
1750 else if (RegVT.is256BitVector())
1751 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1752 else if (RegVT.is128BitVector())
1753 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1754 else if (RegVT == MVT::x86mmx)
1755 RC = &X86::VR64RegClass;
1756 else if (RegVT == MVT::v1i1)
1757 RC = &X86::VK1RegClass;
1758 else if (RegVT == MVT::v8i1)
1759 RC = &X86::VK8RegClass;
1760 else if (RegVT == MVT::v16i1)
1761 RC = &X86::VK16RegClass;
1762 else if (RegVT == MVT::v32i1)
1763 RC = &X86::VK32RegClass;
1764 else if (RegVT == MVT::v64i1)
1765 RC = &X86::VK64RegClass;
1766 else
1767 llvm_unreachable("Unknown argument type!");
1768
1769 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1770 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1771 }
1772
1773 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1774 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1775 // right size.
1776 if (VA.getLocInfo() == CCValAssign::SExt)
1777 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1778 DAG.getValueType(VA.getValVT()));
1779 else if (VA.getLocInfo() == CCValAssign::ZExt)
1780 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1781 DAG.getValueType(VA.getValVT()));
1782 else if (VA.getLocInfo() == CCValAssign::BCvt)
1783 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1784
1785 if (VA.isExtInLoc()) {
1786 // Handle MMX values passed in XMM regs.
1787 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1788 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1789 else if (VA.getValVT().isVector() &&
1790 VA.getValVT().getScalarType() == MVT::i1 &&
1791 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1792 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1793 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1794 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1795 } else
1796 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1797 }
1798 } else {
1799 assert(VA.isMemLoc());
1800 ArgValue =
1801 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1802 }
1803
1804 // If value is passed via pointer - do a load.
1805 if (VA.getLocInfo() == CCValAssign::Indirect &&
1806 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1807 ArgValue =
1808 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1809 }
1810
1811 InVals.push_back(ArgValue);
1812 }
1813
1814 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1815 if (Ins[I].Flags.isSwiftAsync()) {
1816 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1817 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1818 X86FI->setHasSwiftAsyncContext(true);
1819 else {
1820 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1821 int FI =
1822 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1823 X86FI->setSwiftAsyncContextFrameIdx(FI);
1824 SDValue St = DAG.getStore(
1825 DAG.getEntryNode(), dl, InVals[I],
1826 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1828 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1829 }
1830 }
1831
1832 // Swift calling convention does not require we copy the sret argument
1833 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1834 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1835 continue;
1836
1837 // All x86 ABIs require that for returning structs by value we copy the
1838 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1839 // the argument into a virtual register so that we can access it from the
1840 // return points.
1841 if (Ins[I].Flags.isSRet()) {
1842 assert(!FuncInfo->getSRetReturnReg() &&
1843 "SRet return has already been set");
1844 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1845 Register Reg =
1847 FuncInfo->setSRetReturnReg(Reg);
1848 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1849 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1850 break;
1851 }
1852 }
1853
1854 unsigned StackSize = CCInfo.getStackSize();
1855 // Align stack specially for tail calls.
1856 if (shouldGuaranteeTCO(CallConv,
1858 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1859
1860 if (IsVarArg)
1861 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1862 .lowerVarArgsParameters(Chain, StackSize);
1863
1864 // Some CCs need callee pop.
1865 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1867 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1868 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1869 // X86 interrupts must pop the error code (and the alignment padding) if
1870 // present.
1871 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1872 } else {
1873 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1874 // If this is an sret function, the return should pop the hidden pointer.
1875 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1876 FuncInfo->setBytesToPopOnReturn(4);
1877 }
1878
1879 if (!Is64Bit) {
1880 // RegSaveFrameIndex is X86-64 only.
1881 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1882 }
1883
1884 FuncInfo->setArgumentStackSize(StackSize);
1885
1886 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1887 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1888 if (Personality == EHPersonality::CoreCLR) {
1889 assert(Is64Bit);
1890 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1891 // that we'd prefer this slot be allocated towards the bottom of the frame
1892 // (i.e. near the stack pointer after allocating the frame). Every
1893 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1894 // offset from the bottom of this and each funclet's frame must be the
1895 // same, so the size of funclets' (mostly empty) frames is dictated by
1896 // how far this slot is from the bottom (since they allocate just enough
1897 // space to accommodate holding this slot at the correct offset).
1898 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1899 EHInfo->PSPSymFrameIdx = PSPSymFI;
1900 }
1901 }
1902
1903 if (shouldDisableArgRegFromCSR(CallConv) ||
1904 F.hasFnAttribute("no_caller_saved_registers")) {
1906 for (std::pair<Register, Register> Pair : MRI.liveins())
1907 MRI.disableCalleeSavedRegister(Pair.first);
1908 }
1909
1910 if (CallingConv::PreserveNone == CallConv)
1911 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1912 if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1913 Ins[I].Flags.isSwiftError()) {
1914 errorUnsupported(DAG, dl,
1915 "Swift attributes can't be used with preserve_none");
1916 break;
1917 }
1918 }
1919
1920 return Chain;
1921}
1922
1923SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1924 SDValue Arg, const SDLoc &dl,
1925 SelectionDAG &DAG,
1926 const CCValAssign &VA,
1927 ISD::ArgFlagsTy Flags,
1928 bool isByVal) const {
1929 unsigned LocMemOffset = VA.getLocMemOffset();
1930 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1931 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1932 StackPtr, PtrOff);
1933 if (isByVal)
1934 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1935
1936 MaybeAlign Alignment;
1937 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1938 Arg.getSimpleValueType() != MVT::f80)
1939 Alignment = MaybeAlign(4);
1940 return DAG.getStore(
1941 Chain, dl, Arg, PtrOff,
1943 Alignment);
1944}
1945
1946/// Emit a load of return address if tail call
1947/// optimization is performed and it is required.
1948SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1949 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1950 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1951 // Adjust the Return address stack slot.
1952 EVT VT = getPointerTy(DAG.getDataLayout());
1953 OutRetAddr = getReturnAddressFrameIndex(DAG);
1954
1955 // Load the "old" Return address.
1956 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1957 return SDValue(OutRetAddr.getNode(), 1);
1958}
1959
1960/// Emit a store of the return address if tail call
1961/// optimization is performed and it is required (FPDiff!=0).
1963 SDValue Chain, SDValue RetAddrFrIdx,
1964 EVT PtrVT, unsigned SlotSize,
1965 int FPDiff, const SDLoc &dl) {
1966 // Store the return address to the appropriate stack slot.
1967 if (!FPDiff) return Chain;
1968 // Calculate the new stack slot for the return address.
1969 int NewReturnAddrFI =
1970 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1971 false);
1972 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1973 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1975 DAG.getMachineFunction(), NewReturnAddrFI));
1976 return Chain;
1977}
1978
1979/// Returns a vector_shuffle mask for an movs{s|d}, movd
1980/// operation of specified width.
1981SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1982 SDValue V1, SDValue V2) const {
1983 unsigned NumElems = VT.getVectorNumElements();
1985 Mask.push_back(NumElems);
1986 for (unsigned i = 1; i != NumElems; ++i)
1987 Mask.push_back(i);
1988 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
1989}
1990
1991SDValue
1992X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
1993 SmallVectorImpl<SDValue> &InVals) const {
1994 SelectionDAG &DAG = CLI.DAG;
1995 SDLoc &dl = CLI.DL;
1997 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
1999 SDValue Chain = CLI.Chain;
2000 SDValue Callee = CLI.Callee;
2001 CallingConv::ID CallConv = CLI.CallConv;
2002 bool &isTailCall = CLI.IsTailCall;
2003 bool isVarArg = CLI.IsVarArg;
2004 const auto *CB = CLI.CB;
2005
2007 bool Is64Bit = Subtarget.is64Bit();
2008 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2009 bool IsSibcall = false;
2010 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2011 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2012 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2014 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2015 CB->hasFnAttr("no_caller_saved_registers"));
2016 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2017 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2018 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2019 const Module *M = MF.getFunction().getParent();
2020 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2021
2023 if (CallConv == CallingConv::X86_INTR)
2024 report_fatal_error("X86 interrupts may not be called directly");
2025
2026 // Analyze operands of the call, assigning locations to each operand.
2028 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2029
2030 // Allocate shadow area for Win64.
2031 if (IsWin64)
2032 CCInfo.AllocateStack(32, Align(8));
2033
2034 CCInfo.AnalyzeArguments(Outs, CC_X86);
2035
2036 // In vectorcall calling convention a second pass is required for the HVA
2037 // types.
2038 if (CallingConv::X86_VectorCall == CallConv) {
2039 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2040 }
2041
2042 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2043 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2044 // If we are using a GOT, disable tail calls to external symbols with
2045 // default visibility. Tail calling such a symbol requires using a GOT
2046 // relocation, which forces early binding of the symbol. This breaks code
2047 // that require lazy function symbol resolution. Using musttail or
2048 // GuaranteedTailCallOpt will override this.
2049 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2050 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2051 G->getGlobal()->hasDefaultVisibility()))
2052 isTailCall = false;
2053 }
2054
2055 if (isTailCall && !IsMustTail) {
2056 // Check if it's really possible to do a tail call.
2057 isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2058 IsCalleePopSRet);
2059
2060 // Sibcalls are automatically detected tailcalls which do not require
2061 // ABI changes.
2062 if (!IsGuaranteeTCO && isTailCall)
2063 IsSibcall = true;
2064
2065 if (isTailCall)
2066 ++NumTailCalls;
2067 }
2068
2069 if (IsMustTail && !isTailCall)
2070 report_fatal_error("failed to perform tail call elimination on a call "
2071 "site marked musttail");
2072
2073 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2074 "Var args not supported with calling convention fastcc, ghc or hipe");
2075
2076 // Get a count of how many bytes are to be pushed on the stack.
2077 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2078 if (IsSibcall)
2079 // This is a sibcall. The memory operands are available in caller's
2080 // own caller's stack.
2081 NumBytes = 0;
2082 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2083 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2084
2085 int FPDiff = 0;
2086 if (isTailCall &&
2087 shouldGuaranteeTCO(CallConv,
2089 // Lower arguments at fp - stackoffset + fpdiff.
2090 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2091
2092 FPDiff = NumBytesCallerPushed - NumBytes;
2093
2094 // Set the delta of movement of the returnaddr stackslot.
2095 // But only set if delta is greater than previous delta.
2096 if (FPDiff < X86Info->getTCReturnAddrDelta())
2097 X86Info->setTCReturnAddrDelta(FPDiff);
2098 }
2099
2100 unsigned NumBytesToPush = NumBytes;
2101 unsigned NumBytesToPop = NumBytes;
2102
2103 // If we have an inalloca argument, all stack space has already been allocated
2104 // for us and be right at the top of the stack. We don't support multiple
2105 // arguments passed in memory when using inalloca.
2106 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2107 NumBytesToPush = 0;
2108 if (!ArgLocs.back().isMemLoc())
2109 report_fatal_error("cannot use inalloca attribute on a register "
2110 "parameter");
2111 if (ArgLocs.back().getLocMemOffset() != 0)
2112 report_fatal_error("any parameter with the inalloca attribute must be "
2113 "the only memory argument");
2114 } else if (CLI.IsPreallocated) {
2115 assert(ArgLocs.back().isMemLoc() &&
2116 "cannot use preallocated attribute on a register "
2117 "parameter");
2118 SmallVector<size_t, 4> PreallocatedOffsets;
2119 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2120 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2121 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2122 }
2123 }
2125 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2126 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2127 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2128 NumBytesToPush = 0;
2129 }
2130
2131 if (!IsSibcall && !IsMustTail)
2132 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2133 NumBytes - NumBytesToPush, dl);
2134
2135 SDValue RetAddrFrIdx;
2136 // Load return address for tail calls.
2137 if (isTailCall && FPDiff)
2138 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2139 Is64Bit, FPDiff, dl);
2140
2142 SmallVector<SDValue, 8> MemOpChains;
2144
2145 // The next loop assumes that the locations are in the same order of the
2146 // input arguments.
2147 assert(isSortedByValueNo(ArgLocs) &&
2148 "Argument Location list must be sorted before lowering");
2149
2150 // Walk the register/memloc assignments, inserting copies/loads. In the case
2151 // of tail call optimization arguments are handle later.
2152 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2153 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2154 ++I, ++OutIndex) {
2155 assert(OutIndex < Outs.size() && "Invalid Out index");
2156 // Skip inalloca/preallocated arguments, they have already been written.
2157 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2158 if (Flags.isInAlloca() || Flags.isPreallocated())
2159 continue;
2160
2161 CCValAssign &VA = ArgLocs[I];
2162 EVT RegVT = VA.getLocVT();
2163 SDValue Arg = OutVals[OutIndex];
2164 bool isByVal = Flags.isByVal();
2165
2166 // Promote the value if needed.
2167 switch (VA.getLocInfo()) {
2168 default: llvm_unreachable("Unknown loc info!");
2169 case CCValAssign::Full: break;
2170 case CCValAssign::SExt:
2171 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2172 break;
2173 case CCValAssign::ZExt:
2174 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2175 break;
2176 case CCValAssign::AExt:
2177 if (Arg.getValueType().isVector() &&
2178 Arg.getValueType().getVectorElementType() == MVT::i1)
2179 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2180 else if (RegVT.is128BitVector()) {
2181 // Special case: passing MMX values in XMM registers.
2182 Arg = DAG.getBitcast(MVT::i64, Arg);
2183 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2184 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2185 } else
2186 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2187 break;
2188 case CCValAssign::BCvt:
2189 Arg = DAG.getBitcast(RegVT, Arg);
2190 break;
2191 case CCValAssign::Indirect: {
2192 if (isByVal) {
2193 // Memcpy the argument to a temporary stack slot to prevent
2194 // the caller from seeing any modifications the callee may make
2195 // as guaranteed by the `byval` attribute.
2196 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2197 Flags.getByValSize(),
2198 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2199 SDValue StackSlot =
2200 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2201 Chain =
2202 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2203 // From now on treat this as a regular pointer
2204 Arg = StackSlot;
2205 isByVal = false;
2206 } else {
2207 // Store the argument.
2208 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2209 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2210 Chain = DAG.getStore(
2211 Chain, dl, Arg, SpillSlot,
2213 Arg = SpillSlot;
2214 }
2215 break;
2216 }
2217 }
2218
2219 if (VA.needsCustom()) {
2220 assert(VA.getValVT() == MVT::v64i1 &&
2221 "Currently the only custom case is when we split v64i1 to 2 regs");
2222 // Split v64i1 value into two registers
2223 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2224 } else if (VA.isRegLoc()) {
2225 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2226 const TargetOptions &Options = DAG.getTarget().Options;
2227 if (Options.EmitCallSiteInfo)
2228 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2229 if (isVarArg && IsWin64) {
2230 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2231 // shadow reg if callee is a varargs function.
2232 Register ShadowReg;
2233 switch (VA.getLocReg()) {
2234 case X86::XMM0: ShadowReg = X86::RCX; break;
2235 case X86::XMM1: ShadowReg = X86::RDX; break;
2236 case X86::XMM2: ShadowReg = X86::R8; break;
2237 case X86::XMM3: ShadowReg = X86::R9; break;
2238 }
2239 if (ShadowReg)
2240 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2241 }
2242 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2243 assert(VA.isMemLoc());
2244 if (!StackPtr.getNode())
2245 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2247 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2248 dl, DAG, VA, Flags, isByVal));
2249 }
2250 }
2251
2252 if (!MemOpChains.empty())
2253 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2254
2255 if (Subtarget.isPICStyleGOT()) {
2256 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2257 // GOT pointer (except regcall).
2258 if (!isTailCall) {
2259 // Indirect call with RegCall calling convertion may use up all the
2260 // general registers, so it is not suitable to bind EBX reister for
2261 // GOT address, just let register allocator handle it.
2262 if (CallConv != CallingConv::X86_RegCall)
2263 RegsToPass.push_back(std::make_pair(
2264 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2265 getPointerTy(DAG.getDataLayout()))));
2266 } else {
2267 // If we are tail calling and generating PIC/GOT style code load the
2268 // address of the callee into ECX. The value in ecx is used as target of
2269 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2270 // for tail calls on PIC/GOT architectures. Normally we would just put the
2271 // address of GOT into ebx and then call target@PLT. But for tail calls
2272 // ebx would be restored (since ebx is callee saved) before jumping to the
2273 // target@PLT.
2274
2275 // Note: The actual moving to ECX is done further down.
2276 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2277 if (G && !G->getGlobal()->hasLocalLinkage() &&
2278 G->getGlobal()->hasDefaultVisibility())
2279 Callee = LowerGlobalAddress(Callee, DAG);
2280 else if (isa<ExternalSymbolSDNode>(Callee))
2281 Callee = LowerExternalSymbol(Callee, DAG);
2282 }
2283 }
2284
2285 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2286 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2287 // From AMD64 ABI document:
2288 // For calls that may call functions that use varargs or stdargs
2289 // (prototype-less calls or calls to functions containing ellipsis (...) in
2290 // the declaration) %al is used as hidden argument to specify the number
2291 // of SSE registers used. The contents of %al do not need to match exactly
2292 // the number of registers, but must be an ubound on the number of SSE
2293 // registers used and is in the range 0 - 8 inclusive.
2294
2295 // Count the number of XMM registers allocated.
2296 static const MCPhysReg XMMArgRegs[] = {
2297 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2298 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2299 };
2300 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2301 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2302 && "SSE registers cannot be used when SSE is disabled");
2303 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2304 DAG.getConstant(NumXMMRegs, dl,
2305 MVT::i8)));
2306 }
2307
2308 if (isVarArg && IsMustTail) {
2309 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2310 for (const auto &F : Forwards) {
2311 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2312 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2313 }
2314 }
2315
2316 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2317 // don't need this because the eligibility check rejects calls that require
2318 // shuffling arguments passed in memory.
2319 if (!IsSibcall && isTailCall) {
2320 // Force all the incoming stack arguments to be loaded from the stack
2321 // before any new outgoing arguments are stored to the stack, because the
2322 // outgoing stack slots may alias the incoming argument stack slots, and
2323 // the alias isn't otherwise explicit. This is slightly more conservative
2324 // than necessary, because it means that each store effectively depends
2325 // on every argument instead of just those arguments it would clobber.
2326 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2327
2328 SmallVector<SDValue, 8> MemOpChains2;
2329 SDValue FIN;
2330 int FI = 0;
2331 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2332 ++I, ++OutsIndex) {
2333 CCValAssign &VA = ArgLocs[I];
2334
2335 if (VA.isRegLoc()) {
2336 if (VA.needsCustom()) {
2337 assert((CallConv == CallingConv::X86_RegCall) &&
2338 "Expecting custom case only in regcall calling convention");
2339 // This means that we are in special case where one argument was
2340 // passed through two register locations - Skip the next location
2341 ++I;
2342 }
2343
2344 continue;
2345 }
2346
2347 assert(VA.isMemLoc());
2348 SDValue Arg = OutVals[OutsIndex];
2349 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2350 // Skip inalloca/preallocated arguments. They don't require any work.
2351 if (Flags.isInAlloca() || Flags.isPreallocated())
2352 continue;
2353 // Create frame index.
2354 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2355 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2356 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2357 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2358
2359 if (Flags.isByVal()) {
2360 // Copy relative to framepointer.
2362 if (!StackPtr.getNode())
2363 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2366 StackPtr, Source);
2367
2368 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2369 ArgChain,
2370 Flags, DAG, dl));
2371 } else {
2372 // Store relative to framepointer.
2373 MemOpChains2.push_back(DAG.getStore(
2374 ArgChain, dl, Arg, FIN,
2376 }
2377 }
2378
2379 if (!MemOpChains2.empty())
2380 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2381
2382 // Store the return address to the appropriate stack slot.
2383 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2385 RegInfo->getSlotSize(), FPDiff, dl);
2386 }
2387
2388 // Build a sequence of copy-to-reg nodes chained together with token chain
2389 // and glue operands which copy the outgoing args into registers.
2390 SDValue InGlue;
2391 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2392 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2393 RegsToPass[i].second, InGlue);
2394 InGlue = Chain.getValue(1);
2395 }
2396
2397 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2398 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2399 // In the 64-bit large code model, we have to make all calls
2400 // through a register, since the call instruction's 32-bit
2401 // pc-relative offset may not be large enough to hold the whole
2402 // address.
2403 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2404 Callee->getOpcode() == ISD::ExternalSymbol) {
2405 // Lower direct calls to global addresses and external symbols. Setting
2406 // ForCall to true here has the effect of removing WrapperRIP when possible
2407 // to allow direct calls to be selected without first materializing the
2408 // address into a register.
2409 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2410 } else if (Subtarget.isTarget64BitILP32() &&
2411 Callee.getValueType() == MVT::i32) {
2412 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2413 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2414 }
2415
2416 // Returns a chain & a glue for retval copy to use.
2417 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2419
2420 if (!IsSibcall && isTailCall && !IsMustTail) {
2421 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2422 InGlue = Chain.getValue(1);
2423 }
2424
2425 Ops.push_back(Chain);
2426 Ops.push_back(Callee);
2427
2428 if (isTailCall)
2429 Ops.push_back(DAG.getTargetConstant(FPDiff, dl, MVT::i32));
2430
2431 // Add argument registers to the end of the list so that they are known live
2432 // into the call.
2433 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2434 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2435 RegsToPass[i].second.getValueType()));
2436
2437 // Add a register mask operand representing the call-preserved registers.
2438 const uint32_t *Mask = [&]() {
2439 auto AdaptedCC = CallConv;
2440 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2441 // use X86_INTR calling convention because it has the same CSR mask
2442 // (same preserved registers).
2443 if (HasNCSR)
2445 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2446 // to use the CSR_NoRegs_RegMask.
2447 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2448 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2449 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2450 }();
2451 assert(Mask && "Missing call preserved mask for calling convention");
2452
2453 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFrameRegister(MF)))
2454 X86Info->setFPClobberedByCall(true);
2455 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister()))
2456 X86Info->setBPClobberedByCall(true);
2457
2458 // If this is an invoke in a 32-bit function using a funclet-based
2459 // personality, assume the function clobbers all registers. If an exception
2460 // is thrown, the runtime will not restore CSRs.
2461 // FIXME: Model this more precisely so that we can register allocate across
2462 // the normal edge and spill and fill across the exceptional edge.
2463 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2464 const Function &CallerFn = MF.getFunction();
2465 EHPersonality Pers =
2466 CallerFn.hasPersonalityFn()
2469 if (isFuncletEHPersonality(Pers))
2470 Mask = RegInfo->getNoPreservedMask();
2471 }
2472
2473 // Define a new register mask from the existing mask.
2474 uint32_t *RegMask = nullptr;
2475
2476 // In some calling conventions we need to remove the used physical registers
2477 // from the reg mask. Create a new RegMask for such calling conventions.
2478 // RegMask for calling conventions that disable only return registers (e.g.
2479 // preserve_most) will be modified later in LowerCallResult.
2480 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2481 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2482 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2483
2484 // Allocate a new Reg Mask and copy Mask.
2485 RegMask = MF.allocateRegMask();
2486 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2487 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2488
2489 // Make sure all sub registers of the argument registers are reset
2490 // in the RegMask.
2491 if (ShouldDisableArgRegs) {
2492 for (auto const &RegPair : RegsToPass)
2493 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2494 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2495 }
2496
2497 // Create the RegMask Operand according to our updated mask.
2498 Ops.push_back(DAG.getRegisterMask(RegMask));
2499 } else {
2500 // Create the RegMask Operand according to the static mask.
2501 Ops.push_back(DAG.getRegisterMask(Mask));
2502 }
2503
2504 if (InGlue.getNode())
2505 Ops.push_back(InGlue);
2506
2507 if (isTailCall) {
2508 // We used to do:
2509 //// If this is the first return lowered for this function, add the regs
2510 //// to the liveout set for the function.
2511 // This isn't right, although it's probably harmless on x86; liveouts
2512 // should be computed from returns not tail calls. Consider a void
2513 // function making a tail call to a function returning int.
2515 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, NodeTys, Ops);
2516
2517 if (IsCFICall)
2518 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2519
2520 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2521 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2522 return Ret;
2523 }
2524
2525 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2526 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2527 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2528 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2529 // expanded to the call, directly followed by a special marker sequence and
2530 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2531 assert(!isTailCall &&
2532 "tail calls cannot be marked with clang.arc.attachedcall");
2533 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2534
2535 // Add a target global address for the retainRV/claimRV runtime function
2536 // just before the call target.
2538 auto PtrVT = getPointerTy(DAG.getDataLayout());
2539 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2540 Ops.insert(Ops.begin() + 1, GA);
2541 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2542 } else {
2543 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2544 }
2545
2546 if (IsCFICall)
2547 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2548
2549 InGlue = Chain.getValue(1);
2550 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2551 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2552
2553 // Save heapallocsite metadata.
2554 if (CLI.CB)
2555 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2556 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2557
2558 // Create the CALLSEQ_END node.
2559 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2560 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2562 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2563 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2564 // If this call passes a struct-return pointer, the callee
2565 // pops that struct pointer.
2566 NumBytesForCalleeToPop = 4;
2567
2568 // Returns a glue for retval copy to use.
2569 if (!IsSibcall) {
2570 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2571 InGlue, dl);
2572 InGlue = Chain.getValue(1);
2573 }
2574
2575 if (CallingConv::PreserveNone == CallConv)
2576 for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2577 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2578 Outs[I].Flags.isSwiftError()) {
2579 errorUnsupported(DAG, dl,
2580 "Swift attributes can't be used with preserve_none");
2581 break;
2582 }
2583 }
2584
2585 // Handle result values, copying them out of physregs into vregs that we
2586 // return.
2587 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2588 InVals, RegMask);
2589}
2590
2591//===----------------------------------------------------------------------===//
2592// Fast Calling Convention (tail call) implementation
2593//===----------------------------------------------------------------------===//
2594
2595// Like std call, callee cleans arguments, convention except that ECX is
2596// reserved for storing the tail called function address. Only 2 registers are
2597// free for argument passing (inreg). Tail call optimization is performed
2598// provided:
2599// * tailcallopt is enabled
2600// * caller/callee are fastcc
2601// On X86_64 architecture with GOT-style position independent code only local
2602// (within module) calls are supported at the moment.
2603// To keep the stack aligned according to platform abi the function
2604// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2605// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2606// If a tail called function callee has more arguments than the caller the
2607// caller needs to make sure that there is room to move the RETADDR to. This is
2608// achieved by reserving an area the size of the argument delta right after the
2609// original RETADDR, but before the saved framepointer or the spilled registers
2610// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2611// stack layout:
2612// arg1
2613// arg2
2614// RETADDR
2615// [ new RETADDR
2616// move area ]
2617// (possible EBP)
2618// ESI
2619// EDI
2620// local1 ..
2621
2622/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2623/// requirement.
2624unsigned
2625X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2626 SelectionDAG &DAG) const {
2627 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2628 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2629 assert(StackSize % SlotSize == 0 &&
2630 "StackSize must be a multiple of SlotSize");
2631 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2632}
2633
2634/// Return true if the given stack call argument is already available in the
2635/// same position (relatively) of the caller's incoming argument stack.
2636static
2639 const X86InstrInfo *TII, const CCValAssign &VA) {
2640 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2641
2642 for (;;) {
2643 // Look through nodes that don't alter the bits of the incoming value.
2644 unsigned Op = Arg.getOpcode();
2645 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2646 Op == ISD::AssertZext) {
2647 Arg = Arg.getOperand(0);
2648 continue;
2649 }
2650 if (Op == ISD::TRUNCATE) {
2651 const SDValue &TruncInput = Arg.getOperand(0);
2652 if (TruncInput.getOpcode() == ISD::AssertZext &&
2653 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2654 Arg.getValueType()) {
2655 Arg = TruncInput.getOperand(0);
2656 continue;
2657 }
2658 }
2659 break;
2660 }
2661
2662 int FI = INT_MAX;
2663 if (Arg.getOpcode() == ISD::CopyFromReg) {
2664 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2665 if (!VR.isVirtual())
2666 return false;
2667 MachineInstr *Def = MRI->getVRegDef(VR);
2668 if (!Def)
2669 return false;
2670 if (!Flags.isByVal()) {
2671 if (!TII->isLoadFromStackSlot(*Def, FI))
2672 return false;
2673 } else {
2674 unsigned Opcode = Def->getOpcode();
2675 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2676 Opcode == X86::LEA64_32r) &&
2677 Def->getOperand(1).isFI()) {
2678 FI = Def->getOperand(1).getIndex();
2679 Bytes = Flags.getByValSize();
2680 } else
2681 return false;
2682 }
2683 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2684 if (Flags.isByVal())
2685 // ByVal argument is passed in as a pointer but it's now being
2686 // dereferenced. e.g.
2687 // define @foo(%struct.X* %A) {
2688 // tail call @bar(%struct.X* byval %A)
2689 // }
2690 return false;
2691 SDValue Ptr = Ld->getBasePtr();
2692 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2693 if (!FINode)
2694 return false;
2695 FI = FINode->getIndex();
2696 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2697 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2698 FI = FINode->getIndex();
2699 Bytes = Flags.getByValSize();
2700 } else
2701 return false;
2702
2703 assert(FI != INT_MAX);
2704 if (!MFI.isFixedObjectIndex(FI))
2705 return false;
2706
2707 if (Offset != MFI.getObjectOffset(FI))
2708 return false;
2709
2710 // If this is not byval, check that the argument stack object is immutable.
2711 // inalloca and argument copy elision can create mutable argument stack
2712 // objects. Byval objects can be mutated, but a byval call intends to pass the
2713 // mutated memory.
2714 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2715 return false;
2716
2717 if (VA.getLocVT().getFixedSizeInBits() >
2719 // If the argument location is wider than the argument type, check that any
2720 // extension flags match.
2721 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2722 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2723 return false;
2724 }
2725 }
2726
2727 return Bytes == MFI.getObjectSize(FI);
2728}
2729
2730/// Check whether the call is eligible for tail call optimization. Targets
2731/// that want to do tail call optimization should implement this function.
2732/// Note that the x86 backend does not check musttail calls for eligibility! The
2733/// rest of x86 tail call lowering must be prepared to forward arguments of any
2734/// type.
2735bool X86TargetLowering::IsEligibleForTailCallOptimization(
2737 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2738 SelectionDAG &DAG = CLI.DAG;
2739 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2740 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2742 SDValue Callee = CLI.Callee;
2743 CallingConv::ID CalleeCC = CLI.CallConv;
2744 bool isVarArg = CLI.IsVarArg;
2745
2746 if (!mayTailCallThisCC(CalleeCC))
2747 return false;
2748
2749 // If -tailcallopt is specified, make fastcc functions tail-callable.
2751 const Function &CallerF = MF.getFunction();
2752
2753 // If the function return type is x86_fp80 and the callee return type is not,
2754 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2755 // perform a tailcall optimization here.
2756 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2757 return false;
2758
2759 CallingConv::ID CallerCC = CallerF.getCallingConv();
2760 bool CCMatch = CallerCC == CalleeCC;
2761 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2762 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2763 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2764 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2765
2766 // Win64 functions have extra shadow space for argument homing. Don't do the
2767 // sibcall if the caller and callee have mismatched expectations for this
2768 // space.
2769 if (IsCalleeWin64 != IsCallerWin64)
2770 return false;
2771
2772 if (IsGuaranteeTCO) {
2773 if (canGuaranteeTCO(CalleeCC) && CCMatch)
2774 return true;
2775 return false;
2776 }
2777
2778 // Look for obvious safe cases to perform tail call optimization that do not
2779 // require ABI changes. This is what gcc calls sibcall.
2780
2781 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2782 // emit a special epilogue.
2783 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2784 if (RegInfo->hasStackRealignment(MF))
2785 return false;
2786
2787 // Also avoid sibcall optimization if we're an sret return fn and the callee
2788 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2789 // insufficient.
2791 // For a compatible tail call the callee must return our sret pointer. So it
2792 // needs to be (a) an sret function itself and (b) we pass our sret as its
2793 // sret. Condition #b is harder to determine.
2794 return false;
2795 } else if (IsCalleePopSRet)
2796 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2797 // expect that.
2798 return false;
2799
2800 // Do not sibcall optimize vararg calls unless all arguments are passed via
2801 // registers.
2802 LLVMContext &C = *DAG.getContext();
2803 if (isVarArg && !Outs.empty()) {
2804 // Optimizing for varargs on Win64 is unlikely to be safe without
2805 // additional testing.
2806 if (IsCalleeWin64 || IsCallerWin64)
2807 return false;
2808
2809 for (const auto &VA : ArgLocs)
2810 if (!VA.isRegLoc())
2811 return false;
2812 }
2813
2814 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2815 // stack. Therefore, if it's not used by the call it is not safe to optimize
2816 // this into a sibcall.
2817 bool Unused = false;
2818 for (const auto &In : Ins) {
2819 if (!In.Used) {
2820 Unused = true;
2821 break;
2822 }
2823 }
2824 if (Unused) {
2826 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2827 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2828 for (const auto &VA : RVLocs) {
2829 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2830 return false;
2831 }
2832 }
2833
2834 // Check that the call results are passed in the same way.
2835 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2837 return false;
2838 // The callee has to preserve all registers the caller needs to preserve.
2839 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2840 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2841 if (!CCMatch) {
2842 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2843 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2844 return false;
2845 }
2846
2847 unsigned StackArgsSize = CCInfo.getStackSize();
2848
2849 // If the callee takes no arguments then go on to check the results of the
2850 // call.
2851 if (!Outs.empty()) {
2852 if (StackArgsSize > 0) {
2853 // Check if the arguments are already laid out in the right way as
2854 // the caller's fixed stack objects.
2855 MachineFrameInfo &MFI = MF.getFrameInfo();
2856 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2857 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2858 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2859 const CCValAssign &VA = ArgLocs[I];
2860 SDValue Arg = OutVals[I];
2861 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2863 return false;
2864 if (!VA.isRegLoc()) {
2865 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2866 TII, VA))
2867 return false;
2868 }
2869 }
2870 }
2871
2872 bool PositionIndependent = isPositionIndependent();
2873 // If the tailcall address may be in a register, then make sure it's
2874 // possible to register allocate for it. In 32-bit, the call address can
2875 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2876 // callee-saved registers are restored. These happen to be the same
2877 // registers used to pass 'inreg' arguments so watch out for those.
2878 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2879 !isa<ExternalSymbolSDNode>(Callee)) ||
2880 PositionIndependent)) {
2881 unsigned NumInRegs = 0;
2882 // In PIC we need an extra register to formulate the address computation
2883 // for the callee.
2884 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2885
2886 for (const auto &VA : ArgLocs) {
2887 if (!VA.isRegLoc())
2888 continue;
2889 Register Reg = VA.getLocReg();
2890 switch (Reg) {
2891 default: break;
2892 case X86::EAX: case X86::EDX: case X86::ECX:
2893 if (++NumInRegs == MaxInRegs)
2894 return false;
2895 break;
2896 }
2897 }
2898 }
2899
2900 const MachineRegisterInfo &MRI = MF.getRegInfo();
2901 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2902 return false;
2903 }
2904
2905 bool CalleeWillPop =
2906 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2908
2909 if (unsigned BytesToPop =
2911 // If we have bytes to pop, the callee must pop them.
2912 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2913 if (!CalleePopMatches)
2914 return false;
2915 } else if (CalleeWillPop && StackArgsSize > 0) {
2916 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2917 return false;
2918 }
2919
2920 return true;
2921}
2922
2923/// Determines whether the callee is required to pop its own arguments.
2924/// Callee pop is necessary to support tail calls.
2926 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2927 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2928 // can guarantee TCO.
2929 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2930 return true;
2931
2932 switch (CallingConv) {
2933 default:
2934 return false;
2939 return !is64Bit;
2940 }
2941}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const TargetInstrInfo *TII)
MatchingStackOffset - Return true if the given stack call argument is already available in the same p...
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
uint64_t Addr
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
static LVOptions Options
Definition: LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
Module.h This file contains the declarations for the Module class.
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:195
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:219
CCState - This class holds information needed while lowering arguments and return values.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
void convertToReg(unsigned RegNo)
bool isExtInLoc() const
int64_t getLocMemOffset() const
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1523
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2281
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:155
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:486
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:168
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:281
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:903
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1993
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:219
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:743
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:656
void setDSOLocal(bool Local)
Definition: GlobalValue.h:303
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:91
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:171
LLVMContext & getContext() const
Definition: IRBuilder.h:173
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:566
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:381
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:393
Metadata node.
Definition: Metadata.h:1069
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
const LLVMTargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
Representation of each machine instruction.
Definition: MachineInstr.h:69
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:662
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
void setCFIType(uint32_t Type)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:738
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), AAResults *AA=nullptr)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:489
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
SDValue getRegister(unsigned Reg, EVT VT)
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:490
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, unsigned Reg, SDValue N)
Definition: SelectionDAG.h:789
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:692
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:484
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, unsigned Reg, EVT VT)
Definition: SelectionDAG.h:815
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
Definition: SelectionDAG.h:502
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:572
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
bool empty() const
Definition: SmallVector.h:95
size_t size() const
Definition: SmallVector.h:92
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:587
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:819
void push_back(const T &Elt)
Definition: SmallVector.h:427
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1210
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:134
Class to represent struct types.
Definition: DerivedTypes.h:216
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isAndroidVersionLT(unsigned Major) const
Definition: Triple.h:771
bool isAndroid() const
Tests whether the target is Android.
Definition: Triple.h:769
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:667
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:558
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:703
bool isOSFuchsia() const
Definition: Triple.h:588
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:634
bool isWindowsItaniumEnvironment() const
Definition: Triple.h:649
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:159
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
LLVM Value Representation.
Definition: Value.h:74
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
void setArgumentStackSize(unsigned size)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
unsigned getSlotSize() const
bool hasSSE1() const
Definition: X86Subtarget.h:193
bool useLight256BitInstructions() const
Definition: X86Subtarget.h:258
bool isPICStyleGOT() const
Definition: X86Subtarget.h:328
bool isTargetMCU() const
Definition: X86Subtarget.h:297
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:300
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:280
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:278
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool useAVX512Regs() const
Definition: X86Subtarget.h:253
bool isTargetCOFF() const
Definition: X86Subtarget.h:287
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:337
bool hasAVX512() const
Definition: X86Subtarget.h:201
bool hasSSE41() const
Definition: X86Subtarget.h:197
bool hasSSE2() const
Definition: X86Subtarget.h:194
bool isTargetFuchsia() const
Definition: X86Subtarget.h:298
bool isPICStyleRIPRel() const
Definition: X86Subtarget.h:329
bool isTargetCygMing() const
Definition: X86Subtarget.h:320
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:132
bool hasAVX() const
Definition: X86Subtarget.h:199
unsigned getPreferVectorWidth() const
Definition: X86Subtarget.h:225
bool isTargetAndroid() const
Definition: X86Subtarget.h:293
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:124
bool hasAVX2() const
Definition: X86Subtarget.h:200
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
bool useSoftFloat() const override
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
uint64_t getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:121
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:151
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:122
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
Definition: CallingConv.h:163
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
Definition: CallingConv.h:147
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition: CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:159
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:813
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:557
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:933
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:804
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:634
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:549
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:810
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:918
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ExternalSymbol
Definition: ISDOpcodes.h:83
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:899
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:816
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ FS
Definition: X86.h:210
@ GS
Definition: X86.h:209
Reg
All possible values of the reg field in the ModR/M byte.
@ RET_GLUE
Return with a glue operand.
@ IRET
Return from interrupt. Operand 0 is the number of bytes to pop.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ TC_RETURN
Tail call return.
@ NT_CALL
Same as call except it adds the NoTrack prefix.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition: ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition: ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
AddressSpace
Definition: NVPTXBaseInfo.h:21
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2073
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:291
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1902
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:34
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:93
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:380
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:73
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:290
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:340
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:358
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:306
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:203
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:213
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:167
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:208
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:318
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:101
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:326
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals