LLVM 20.0.0git
X86ISelLoweringCall.cpp
Go to the documentation of this file.
1//===- llvm/lib/Target/X86/X86ISelCallLowering.cpp - Call lowering --------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file implements the lowering of LLVM calls to DAG nodes.
11//
12//===----------------------------------------------------------------------===//
13
14#include "X86.h"
15#include "X86CallingConv.h"
16#include "X86FrameLowering.h"
17#include "X86ISelLowering.h"
18#include "X86InstrBuilder.h"
20#include "X86TargetMachine.h"
21#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/IRBuilder.h"
28#include "llvm/IR/Module.h"
29
30#define DEBUG_TYPE "x86-isel"
31
32using namespace llvm;
33
34STATISTIC(NumTailCalls, "Number of tail calls");
35
36/// Call this when the user attempts to do something unsupported, like
37/// returning a double without SSE2 enabled on x86_64. This is not fatal, unlike
38/// report_fatal_error, so calling code should attempt to recover without
39/// crashing.
40static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl,
41 const char *Msg) {
43 DAG.getContext()->diagnose(
45}
46
47/// Returns true if a CC can dynamically exclude a register from the list of
48/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
49/// the return registers.
51 switch (CC) {
52 default:
53 return false;
57 return true;
58 }
59}
60
61/// Returns true if a CC can dynamically exclude a register from the list of
62/// callee-saved-registers (TargetRegistryInfo::getCalleeSavedRegs()) based on
63/// the parameters.
66}
67
68static std::pair<MVT, unsigned>
70 const X86Subtarget &Subtarget) {
71 // v2i1/v4i1/v8i1/v16i1 all pass in xmm registers unless the calling
72 // convention is one that uses k registers.
73 if (NumElts == 2)
74 return {MVT::v2i64, 1};
75 if (NumElts == 4)
76 return {MVT::v4i32, 1};
77 if (NumElts == 8 && CC != CallingConv::X86_RegCall &&
79 return {MVT::v8i16, 1};
80 if (NumElts == 16 && CC != CallingConv::X86_RegCall &&
82 return {MVT::v16i8, 1};
83 // v32i1 passes in ymm unless we have BWI and the calling convention is
84 // regcall.
85 if (NumElts == 32 && (!Subtarget.hasBWI() || CC != CallingConv::X86_RegCall))
86 return {MVT::v32i8, 1};
87 // Split v64i1 vectors if we don't have v64i8 available.
88 if (NumElts == 64 && Subtarget.hasBWI() && CC != CallingConv::X86_RegCall) {
89 if (Subtarget.useAVX512Regs())
90 return {MVT::v64i8, 1};
91 return {MVT::v32i8, 2};
92 }
93
94 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
95 if (!isPowerOf2_32(NumElts) || (NumElts == 64 && !Subtarget.hasBWI()) ||
96 NumElts > 64)
97 return {MVT::i8, NumElts};
98
100}
101
104 EVT VT) const {
105 if (VT.isVector()) {
106 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
107 unsigned NumElts = VT.getVectorNumElements();
108
109 MVT RegisterVT;
110 unsigned NumRegisters;
111 std::tie(RegisterVT, NumRegisters) =
112 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
113 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
114 return RegisterVT;
115 }
116
117 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
118 return MVT::v8f16;
119 }
120
121 // We will use more GPRs for f64 and f80 on 32 bits when x87 is disabled.
122 if ((VT == MVT::f64 || VT == MVT::f80) && !Subtarget.is64Bit() &&
123 !Subtarget.hasX87())
124 return MVT::i32;
125
126 if (isTypeLegal(MVT::f16)) {
127 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16)
129 Context, CC, VT.changeVectorElementType(MVT::f16));
130
131 if (VT == MVT::bf16)
132 return MVT::f16;
133 }
134
136}
137
140 EVT VT) const {
141 if (VT.isVector()) {
142 if (VT.getVectorElementType() == MVT::i1 && Subtarget.hasAVX512()) {
143 unsigned NumElts = VT.getVectorNumElements();
144
145 MVT RegisterVT;
146 unsigned NumRegisters;
147 std::tie(RegisterVT, NumRegisters) =
148 handleMaskRegisterForCallingConv(NumElts, CC, Subtarget);
149 if (RegisterVT != MVT::INVALID_SIMPLE_VALUE_TYPE)
150 return NumRegisters;
151 }
152
153 if (VT.getVectorElementType() == MVT::f16 && VT.getVectorNumElements() < 8)
154 return 1;
155 }
156
157 // We have to split f64 to 2 registers and f80 to 3 registers on 32 bits if
158 // x87 is disabled.
159 if (!Subtarget.is64Bit() && !Subtarget.hasX87()) {
160 if (VT == MVT::f64)
161 return 2;
162 if (VT == MVT::f80)
163 return 3;
164 }
165
166 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
167 isTypeLegal(MVT::f16))
168 return getNumRegistersForCallingConv(Context, CC,
169 VT.changeVectorElementType(MVT::f16));
170
172}
173
175 LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT,
176 unsigned &NumIntermediates, MVT &RegisterVT) const {
177 // Break wide or odd vXi1 vectors into scalars to match avx2 behavior.
178 if (VT.isVector() && VT.getVectorElementType() == MVT::i1 &&
179 Subtarget.hasAVX512() &&
181 (VT.getVectorNumElements() == 64 && !Subtarget.hasBWI()) ||
182 VT.getVectorNumElements() > 64)) {
183 RegisterVT = MVT::i8;
184 IntermediateVT = MVT::i1;
185 NumIntermediates = VT.getVectorNumElements();
186 return NumIntermediates;
187 }
188
189 // Split v64i1 vectors if we don't have v64i8 available.
190 if (VT == MVT::v64i1 && Subtarget.hasBWI() && !Subtarget.useAVX512Regs() &&
192 RegisterVT = MVT::v32i8;
193 IntermediateVT = MVT::v32i1;
194 NumIntermediates = 2;
195 return 2;
196 }
197
198 // Split vNbf16 vectors according to vNf16.
199 if (VT.isVector() && VT.getVectorElementType() == MVT::bf16 &&
200 isTypeLegal(MVT::f16))
201 VT = VT.changeVectorElementType(MVT::f16);
202
203 return TargetLowering::getVectorTypeBreakdownForCallingConv(Context, CC, VT, IntermediateVT,
204 NumIntermediates, RegisterVT);
205}
206
208 LLVMContext& Context,
209 EVT VT) const {
210 if (!VT.isVector())
211 return MVT::i8;
212
213 if (Subtarget.hasAVX512()) {
214 // Figure out what this type will be legalized to.
215 EVT LegalVT = VT;
216 while (getTypeAction(Context, LegalVT) != TypeLegal)
217 LegalVT = getTypeToTransformTo(Context, LegalVT);
218
219 // If we got a 512-bit vector then we'll definitely have a vXi1 compare.
220 if (LegalVT.getSimpleVT().is512BitVector())
221 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
222
223 if (LegalVT.getSimpleVT().isVector() && Subtarget.hasVLX()) {
224 // If we legalized to less than a 512-bit vector, then we will use a vXi1
225 // compare for vXi32/vXi64 for sure. If we have BWI we will also support
226 // vXi16/vXi8.
227 MVT EltVT = LegalVT.getSimpleVT().getVectorElementType();
228 if (Subtarget.hasBWI() || EltVT.getSizeInBits() >= 32)
229 return EVT::getVectorVT(Context, MVT::i1, VT.getVectorElementCount());
230 }
231 }
232
234}
235
237 Type *Ty, CallingConv::ID CallConv, bool isVarArg,
238 const DataLayout &DL) const {
239 // i128 split into i64 needs to be allocated to two consecutive registers,
240 // or spilled to the stack as a whole.
241 return Ty->isIntegerTy(128);
242}
243
244/// Helper for getByValTypeAlignment to determine
245/// the desired ByVal argument alignment.
246static void getMaxByValAlign(Type *Ty, Align &MaxAlign) {
247 if (MaxAlign == 16)
248 return;
249 if (VectorType *VTy = dyn_cast<VectorType>(Ty)) {
250 if (VTy->getPrimitiveSizeInBits().getFixedValue() == 128)
251 MaxAlign = Align(16);
252 } else if (ArrayType *ATy = dyn_cast<ArrayType>(Ty)) {
253 Align EltAlign;
254 getMaxByValAlign(ATy->getElementType(), EltAlign);
255 if (EltAlign > MaxAlign)
256 MaxAlign = EltAlign;
257 } else if (StructType *STy = dyn_cast<StructType>(Ty)) {
258 for (auto *EltTy : STy->elements()) {
259 Align EltAlign;
260 getMaxByValAlign(EltTy, EltAlign);
261 if (EltAlign > MaxAlign)
262 MaxAlign = EltAlign;
263 if (MaxAlign == 16)
264 break;
265 }
266 }
267}
268
269/// Return the desired alignment for ByVal aggregate
270/// function arguments in the caller parameter area. For X86, aggregates
271/// that contain SSE vectors are placed at 16-byte boundaries while the rest
272/// are at 4-byte boundaries.
274 const DataLayout &DL) const {
275 if (Subtarget.is64Bit())
276 return std::max(DL.getABITypeAlign(Ty), Align::Constant<8>());
277
278 Align Alignment(4);
279 if (Subtarget.hasSSE1())
280 getMaxByValAlign(Ty, Alignment);
281 return Alignment;
282}
283
284/// It returns EVT::Other if the type should be determined using generic
285/// target-independent logic.
286/// For vector ops we check that the overall size isn't larger than our
287/// preferred vector width.
289 const MemOp &Op, const AttributeList &FuncAttributes) const {
290 if (!FuncAttributes.hasFnAttr(Attribute::NoImplicitFloat)) {
291 if (Op.size() >= 16 &&
292 (!Subtarget.isUnalignedMem16Slow() || Op.isAligned(Align(16)))) {
293 // FIXME: Check if unaligned 64-byte accesses are slow.
294 if (Op.size() >= 64 && Subtarget.hasAVX512() && Subtarget.hasEVEX512() &&
295 (Subtarget.getPreferVectorWidth() >= 512)) {
296 return Subtarget.hasBWI() ? MVT::v64i8 : MVT::v16i32;
297 }
298 // FIXME: Check if unaligned 32-byte accesses are slow.
299 if (Op.size() >= 32 && Subtarget.hasAVX() &&
300 Subtarget.useLight256BitInstructions()) {
301 // Although this isn't a well-supported type for AVX1, we'll let
302 // legalization and shuffle lowering produce the optimal codegen. If we
303 // choose an optimal type with a vector element larger than a byte,
304 // getMemsetStores() may create an intermediate splat (using an integer
305 // multiply) before we splat as a vector.
306 return MVT::v32i8;
307 }
308 if (Subtarget.hasSSE2() && (Subtarget.getPreferVectorWidth() >= 128))
309 return MVT::v16i8;
310 // TODO: Can SSE1 handle a byte vector?
311 // If we have SSE1 registers we should be able to use them.
312 if (Subtarget.hasSSE1() && (Subtarget.is64Bit() || Subtarget.hasX87()) &&
313 (Subtarget.getPreferVectorWidth() >= 128))
314 return MVT::v4f32;
315 } else if (((Op.isMemcpy() && !Op.isMemcpyStrSrc()) || Op.isZeroMemset()) &&
316 Op.size() >= 8 && !Subtarget.is64Bit() && Subtarget.hasSSE2()) {
317 // Do not use f64 to lower memcpy if source is string constant. It's
318 // better to use i32 to avoid the loads.
319 // Also, do not use f64 to lower memset unless this is a memset of zeros.
320 // The gymnastics of splatting a byte value into an XMM register and then
321 // only using 8-byte stores (because this is a CPU with slow unaligned
322 // 16-byte accesses) makes that a loser.
323 return MVT::f64;
324 }
325 }
326 // This is a compromise. If we reach here, unaligned accesses may be slow on
327 // this target. However, creating smaller, aligned accesses could be even
328 // slower and would certainly be a lot more code.
329 if (Subtarget.is64Bit() && Op.size() >= 8)
330 return MVT::i64;
331 return MVT::i32;
332}
333
335 if (VT == MVT::f32)
336 return Subtarget.hasSSE1();
337 if (VT == MVT::f64)
338 return Subtarget.hasSSE2();
339 return true;
340}
341
342static bool isBitAligned(Align Alignment, uint64_t SizeInBits) {
343 return (8 * Alignment.value()) % SizeInBits == 0;
344}
345
347 if (isBitAligned(Alignment, VT.getSizeInBits()))
348 return true;
349 switch (VT.getSizeInBits()) {
350 default:
351 // 8-byte and under are always assumed to be fast.
352 return true;
353 case 128:
354 return !Subtarget.isUnalignedMem16Slow();
355 case 256:
356 return !Subtarget.isUnalignedMem32Slow();
357 // TODO: What about AVX-512 (512-bit) accesses?
358 }
359}
360
362 EVT VT, unsigned, Align Alignment, MachineMemOperand::Flags Flags,
363 unsigned *Fast) const {
364 if (Fast)
365 *Fast = isMemoryAccessFast(VT, Alignment);
366 // NonTemporal vector memory ops must be aligned.
367 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
368 // NT loads can only be vector aligned, so if its less aligned than the
369 // minimum vector size (which we can split the vector down to), we might as
370 // well use a regular unaligned vector load.
371 // We don't have any NT loads pre-SSE41.
372 if (!!(Flags & MachineMemOperand::MOLoad))
373 return (Alignment < 16 || !Subtarget.hasSSE41());
374 return false;
375 }
376 // Misaligned accesses of any size are always allowed.
377 return true;
378}
379
381 const DataLayout &DL, EVT VT,
382 unsigned AddrSpace, Align Alignment,
384 unsigned *Fast) const {
385 if (Fast)
386 *Fast = isMemoryAccessFast(VT, Alignment);
387 if (!!(Flags & MachineMemOperand::MONonTemporal) && VT.isVector()) {
388 if (allowsMisalignedMemoryAccesses(VT, AddrSpace, Alignment, Flags,
389 /*Fast=*/nullptr))
390 return true;
391 // NonTemporal vector memory ops are special, and must be aligned.
392 if (!isBitAligned(Alignment, VT.getSizeInBits()))
393 return false;
394 switch (VT.getSizeInBits()) {
395 case 128:
396 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasSSE41())
397 return true;
398 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasSSE2())
399 return true;
400 return false;
401 case 256:
402 if (!!(Flags & MachineMemOperand::MOLoad) && Subtarget.hasAVX2())
403 return true;
404 if (!!(Flags & MachineMemOperand::MOStore) && Subtarget.hasAVX())
405 return true;
406 return false;
407 case 512:
408 if (Subtarget.hasAVX512() && Subtarget.hasEVEX512())
409 return true;
410 return false;
411 default:
412 return false; // Don't have NonTemporal vector memory ops of this size.
413 }
414 }
415 return true;
416}
417
418/// Return the entry encoding for a jump table in the
419/// current function. The returned value is a member of the
420/// MachineJumpTableInfo::JTEntryKind enum.
422 // In GOT pic mode, each entry in the jump table is emitted as a @GOTOFF
423 // symbol.
424 if (isPositionIndependent() && Subtarget.isPICStyleGOT())
426 if (isPositionIndependent() &&
428 !Subtarget.isTargetCOFF())
430
431 // Otherwise, use the normal jump table encoding heuristics.
433}
434
436 return Subtarget.useSoftFloat();
437}
438
440 ArgListTy &Args) const {
441
442 // Only relabel X86-32 for C / Stdcall CCs.
443 if (Subtarget.is64Bit())
444 return;
446 return;
447 unsigned ParamRegs = 0;
448 if (auto *M = MF->getFunction().getParent())
449 ParamRegs = M->getNumberRegisterParameters();
450
451 // Mark the first N int arguments as having reg
452 for (auto &Arg : Args) {
453 Type *T = Arg.Ty;
454 if (T->isIntOrPtrTy())
455 if (MF->getDataLayout().getTypeAllocSize(T) <= 8) {
456 unsigned numRegs = 1;
457 if (MF->getDataLayout().getTypeAllocSize(T) > 4)
458 numRegs = 2;
459 if (ParamRegs < numRegs)
460 return;
461 ParamRegs -= numRegs;
462 Arg.IsInReg = true;
463 }
464 }
465}
466
467const MCExpr *
469 const MachineBasicBlock *MBB,
470 unsigned uid,MCContext &Ctx) const{
472 // In 32-bit ELF systems, our jump table entries are formed with @GOTOFF
473 // entries.
476}
477
478/// Returns relocation base for the given PIC jumptable.
480 SelectionDAG &DAG) const {
481 if (!Subtarget.is64Bit())
482 // This doesn't have SDLoc associated with it, but is not really the
483 // same as a Register.
484 return DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
486 return Table;
487}
488
489/// This returns the relocation base for the given PIC jumptable,
490/// the same as getPICJumpTableRelocBase, but as an MCExpr.
492getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI,
493 MCContext &Ctx) const {
494 // X86-64 uses RIP relative addressing based on the jump table label.
495 if (Subtarget.isPICStyleRIPRel() ||
496 (Subtarget.is64Bit() &&
499
500 // Otherwise, the reference is relative to the PIC base.
501 return MCSymbolRefExpr::create(MF->getPICBaseSymbol(), Ctx);
502}
503
504std::pair<const TargetRegisterClass *, uint8_t>
506 MVT VT) const {
507 const TargetRegisterClass *RRC = nullptr;
508 uint8_t Cost = 1;
509 switch (VT.SimpleTy) {
510 default:
512 case MVT::i8: case MVT::i16: case MVT::i32: case MVT::i64:
513 RRC = Subtarget.is64Bit() ? &X86::GR64RegClass : &X86::GR32RegClass;
514 break;
515 case MVT::x86mmx:
516 RRC = &X86::VR64RegClass;
517 break;
518 case MVT::f32: case MVT::f64:
519 case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
520 case MVT::v4f32: case MVT::v2f64:
521 case MVT::v32i8: case MVT::v16i16: case MVT::v8i32: case MVT::v4i64:
522 case MVT::v8f32: case MVT::v4f64:
523 case MVT::v64i8: case MVT::v32i16: case MVT::v16i32: case MVT::v8i64:
524 case MVT::v16f32: case MVT::v8f64:
525 RRC = &X86::VR128XRegClass;
526 break;
527 }
528 return std::make_pair(RRC, Cost);
529}
530
531unsigned X86TargetLowering::getAddressSpace() const {
532 if (Subtarget.is64Bit())
534 : X86AS::FS;
535 return X86AS::GS;
536}
537
538static bool hasStackGuardSlotTLS(const Triple &TargetTriple) {
539 return TargetTriple.isOSGlibc() || TargetTriple.isOSFuchsia() ||
540 (TargetTriple.isAndroid() && !TargetTriple.isAndroidVersionLT(17));
541}
542
544 int Offset, unsigned AddressSpace) {
546 ConstantInt::get(Type::getInt32Ty(IRB.getContext()), Offset),
548}
549
551 // glibc, bionic, and Fuchsia have a special slot for the stack guard in
552 // tcbhead_t; use it instead of the usual global variable (see
553 // sysdeps/{i386,x86_64}/nptl/tls.h)
554 if (hasStackGuardSlotTLS(Subtarget.getTargetTriple())) {
555 unsigned AddressSpace = getAddressSpace();
556
557 // <zircon/tls.h> defines ZX_TLS_STACK_GUARD_OFFSET with this value.
558 if (Subtarget.isTargetFuchsia())
559 return SegmentOffset(IRB, 0x10, AddressSpace);
560
561 Module *M = IRB.GetInsertBlock()->getParent()->getParent();
562 // Specially, some users may customize the base reg and offset.
563 int Offset = M->getStackProtectorGuardOffset();
564 // If we don't set -stack-protector-guard-offset value:
565 // %fs:0x28, unless we're using a Kernel code model, in which case
566 // it's %gs:0x28. gs:0x14 on i386.
567 if (Offset == INT_MAX)
568 Offset = (Subtarget.is64Bit()) ? 0x28 : 0x14;
569
570 StringRef GuardReg = M->getStackProtectorGuardReg();
571 if (GuardReg == "fs")
573 else if (GuardReg == "gs")
575
576 // Use symbol guard if user specify.
577 StringRef GuardSymb = M->getStackProtectorGuardSymbol();
578 if (!GuardSymb.empty()) {
579 GlobalVariable *GV = M->getGlobalVariable(GuardSymb);
580 if (!GV) {
581 Type *Ty = Subtarget.is64Bit() ? Type::getInt64Ty(M->getContext())
582 : Type::getInt32Ty(M->getContext());
583 GV = new GlobalVariable(*M, Ty, false, GlobalValue::ExternalLinkage,
584 nullptr, GuardSymb, nullptr,
586 if (!Subtarget.isTargetDarwin())
587 GV->setDSOLocal(M->getDirectAccessExternalData());
588 }
589 return GV;
590 }
591
592 return SegmentOffset(IRB, Offset, AddressSpace);
593 }
595}
596
598 // MSVC CRT provides functionalities for stack protection.
599 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
601 // MSVC CRT has a global variable holding security cookie.
602 M.getOrInsertGlobal("__security_cookie",
603 PointerType::getUnqual(M.getContext()));
604
605 // MSVC CRT has a function to validate security cookie.
606 FunctionCallee SecurityCheckCookie = M.getOrInsertFunction(
607 "__security_check_cookie", Type::getVoidTy(M.getContext()),
608 PointerType::getUnqual(M.getContext()));
609 if (Function *F = dyn_cast<Function>(SecurityCheckCookie.getCallee())) {
610 F->setCallingConv(CallingConv::X86_FastCall);
611 F->addParamAttr(0, Attribute::AttrKind::InReg);
612 }
613 return;
614 }
615
616 StringRef GuardMode = M.getStackProtectorGuard();
617
618 // glibc, bionic, and Fuchsia have a special slot for the stack guard.
619 if ((GuardMode == "tls" || GuardMode.empty()) &&
621 return;
623}
624
626 // MSVC CRT has a global variable holding security cookie.
627 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
629 return M.getGlobalVariable("__security_cookie");
630 }
632}
633
635 // MSVC CRT has a function to validate security cookie.
636 if (Subtarget.getTargetTriple().isWindowsMSVCEnvironment() ||
638 return M.getFunction("__security_check_cookie");
639 }
641}
642
643Value *
645 // Android provides a fixed TLS slot for the SafeStack pointer. See the
646 // definition of TLS_SLOT_SAFESTACK in
647 // https://android.googlesource.com/platform/bionic/+/master/libc/private/bionic_tls.h
648 if (Subtarget.isTargetAndroid()) {
649 // %fs:0x48, unless we're using a Kernel code model, in which case it's %gs:
650 // %gs:0x24 on i386
651 int Offset = (Subtarget.is64Bit()) ? 0x48 : 0x24;
652 return SegmentOffset(IRB, Offset, getAddressSpace());
653 }
654
655 // Fuchsia is similar.
656 if (Subtarget.isTargetFuchsia()) {
657 // <zircon/tls.h> defines ZX_TLS_UNSAFE_SP_OFFSET with this value.
658 return SegmentOffset(IRB, 0x18, getAddressSpace());
659 }
660
662}
663
664//===----------------------------------------------------------------------===//
665// Return Value Calling Convention Implementation
666//===----------------------------------------------------------------------===//
667
668bool X86TargetLowering::CanLowerReturn(
669 CallingConv::ID CallConv, MachineFunction &MF, bool isVarArg,
670 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
671 const Type *RetTy) const {
673 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, Context);
674 return CCInfo.CheckReturn(Outs, RetCC_X86);
675}
676
677const MCPhysReg *X86TargetLowering::getScratchRegisters(CallingConv::ID) const {
678 static const MCPhysReg ScratchRegs[] = { X86::R11, 0 };
679 return ScratchRegs;
680}
681
682ArrayRef<MCPhysReg> X86TargetLowering::getRoundingControlRegisters() const {
683 static const MCPhysReg RCRegs[] = {X86::FPCW, X86::MXCSR};
684 return RCRegs;
685}
686
687/// Lowers masks values (v*i1) to the local register values
688/// \returns DAG node after lowering to register type
689static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc,
690 const SDLoc &DL, SelectionDAG &DAG) {
691 EVT ValVT = ValArg.getValueType();
692
693 if (ValVT == MVT::v1i1)
694 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, ValLoc, ValArg,
695 DAG.getIntPtrConstant(0, DL));
696
697 if ((ValVT == MVT::v8i1 && (ValLoc == MVT::i8 || ValLoc == MVT::i32)) ||
698 (ValVT == MVT::v16i1 && (ValLoc == MVT::i16 || ValLoc == MVT::i32))) {
699 // Two stage lowering might be required
700 // bitcast: v8i1 -> i8 / v16i1 -> i16
701 // anyextend: i8 -> i32 / i16 -> i32
702 EVT TempValLoc = ValVT == MVT::v8i1 ? MVT::i8 : MVT::i16;
703 SDValue ValToCopy = DAG.getBitcast(TempValLoc, ValArg);
704 if (ValLoc == MVT::i32)
705 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValToCopy);
706 return ValToCopy;
707 }
708
709 if ((ValVT == MVT::v32i1 && ValLoc == MVT::i32) ||
710 (ValVT == MVT::v64i1 && ValLoc == MVT::i64)) {
711 // One stage lowering is required
712 // bitcast: v32i1 -> i32 / v64i1 -> i64
713 return DAG.getBitcast(ValLoc, ValArg);
714 }
715
716 return DAG.getNode(ISD::ANY_EXTEND, DL, ValLoc, ValArg);
717}
718
719/// Breaks v64i1 value into two registers and adds the new node to the DAG
721 const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg,
722 SmallVectorImpl<std::pair<Register, SDValue>> &RegsToPass, CCValAssign &VA,
723 CCValAssign &NextVA, const X86Subtarget &Subtarget) {
724 assert(Subtarget.hasBWI() && "Expected AVX512BW target!");
725 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
726 assert(Arg.getValueType() == MVT::i64 && "Expecting 64 bit value");
727 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
728 "The value should reside in two registers");
729
730 // Before splitting the value we cast it to i64
731 Arg = DAG.getBitcast(MVT::i64, Arg);
732
733 // Splitting the value into two i32 types
734 SDValue Lo, Hi;
735 std::tie(Lo, Hi) = DAG.SplitScalar(Arg, DL, MVT::i32, MVT::i32);
736
737 // Attach the two i32 types into corresponding registers
738 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Lo));
739 RegsToPass.push_back(std::make_pair(NextVA.getLocReg(), Hi));
740}
741
743X86TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
744 bool isVarArg,
746 const SmallVectorImpl<SDValue> &OutVals,
747 const SDLoc &dl, SelectionDAG &DAG) const {
750
751 // In some cases we need to disable registers from the default CSR list.
752 // For example, when they are used as return registers (preserve_* and X86's
753 // regcall) or for argument passing (X86's regcall).
754 bool ShouldDisableCalleeSavedRegister =
755 shouldDisableRetRegFromCSR(CallConv) ||
756 MF.getFunction().hasFnAttribute("no_caller_saved_registers");
757
758 if (CallConv == CallingConv::X86_INTR && !Outs.empty())
759 report_fatal_error("X86 interrupts may not return any value");
760
762 CCState CCInfo(CallConv, isVarArg, MF, RVLocs, *DAG.getContext());
763 CCInfo.AnalyzeReturn(Outs, RetCC_X86);
764
766 for (unsigned I = 0, OutsIndex = 0, E = RVLocs.size(); I != E;
767 ++I, ++OutsIndex) {
768 CCValAssign &VA = RVLocs[I];
769 assert(VA.isRegLoc() && "Can only return in registers!");
770
771 // Add the register to the CalleeSaveDisableRegs list.
772 if (ShouldDisableCalleeSavedRegister)
774
775 SDValue ValToCopy = OutVals[OutsIndex];
776 EVT ValVT = ValToCopy.getValueType();
777
778 // Promote values to the appropriate types.
779 if (VA.getLocInfo() == CCValAssign::SExt)
780 ValToCopy = DAG.getNode(ISD::SIGN_EXTEND, dl, VA.getLocVT(), ValToCopy);
781 else if (VA.getLocInfo() == CCValAssign::ZExt)
782 ValToCopy = DAG.getNode(ISD::ZERO_EXTEND, dl, VA.getLocVT(), ValToCopy);
783 else if (VA.getLocInfo() == CCValAssign::AExt) {
784 if (ValVT.isVector() && ValVT.getVectorElementType() == MVT::i1)
785 ValToCopy = lowerMasksToReg(ValToCopy, VA.getLocVT(), dl, DAG);
786 else
787 ValToCopy = DAG.getNode(ISD::ANY_EXTEND, dl, VA.getLocVT(), ValToCopy);
788 }
789 else if (VA.getLocInfo() == CCValAssign::BCvt)
790 ValToCopy = DAG.getBitcast(VA.getLocVT(), ValToCopy);
791
793 "Unexpected FP-extend for return value.");
794
795 // Report an error if we have attempted to return a value via an XMM
796 // register and SSE was disabled.
797 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
798 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
799 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
800 } else if (!Subtarget.hasSSE2() &&
801 X86::FR64XRegClass.contains(VA.getLocReg()) &&
802 ValVT == MVT::f64) {
803 // When returning a double via an XMM register, report an error if SSE2 is
804 // not enabled.
805 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
806 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
807 }
808
809 // Returns in ST0/ST1 are handled specially: these are pushed as operands to
810 // the RET instruction and handled by the FP Stackifier.
811 if (VA.getLocReg() == X86::FP0 ||
812 VA.getLocReg() == X86::FP1) {
813 // If this is a copy from an xmm register to ST(0), use an FPExtend to
814 // change the value to the FP stack register class.
816 ValToCopy = DAG.getNode(ISD::FP_EXTEND, dl, MVT::f80, ValToCopy);
817 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
818 // Don't emit a copytoreg.
819 continue;
820 }
821
822 // 64-bit vector (MMX) values are returned in XMM0 / XMM1 except for v1i64
823 // which is returned in RAX / RDX.
824 if (Subtarget.is64Bit()) {
825 if (ValVT == MVT::x86mmx) {
826 if (VA.getLocReg() == X86::XMM0 || VA.getLocReg() == X86::XMM1) {
827 ValToCopy = DAG.getBitcast(MVT::i64, ValToCopy);
828 ValToCopy = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64,
829 ValToCopy);
830 // If we don't have SSE2 available, convert to v4f32 so the generated
831 // register is legal.
832 if (!Subtarget.hasSSE2())
833 ValToCopy = DAG.getBitcast(MVT::v4f32, ValToCopy);
834 }
835 }
836 }
837
838 if (VA.needsCustom()) {
839 assert(VA.getValVT() == MVT::v64i1 &&
840 "Currently the only custom case is when we split v64i1 to 2 regs");
841
842 Passv64i1ArgInRegs(dl, DAG, ValToCopy, RetVals, VA, RVLocs[++I],
843 Subtarget);
844
845 // Add the second register to the CalleeSaveDisableRegs list.
846 if (ShouldDisableCalleeSavedRegister)
847 MF.getRegInfo().disableCalleeSavedRegister(RVLocs[I].getLocReg());
848 } else {
849 RetVals.push_back(std::make_pair(VA.getLocReg(), ValToCopy));
850 }
851 }
852
853 SDValue Glue;
855 RetOps.push_back(Chain); // Operand #0 = Chain (updated below)
856 // Operand #1 = Bytes To Pop
857 RetOps.push_back(DAG.getTargetConstant(FuncInfo->getBytesToPopOnReturn(), dl,
858 MVT::i32));
859
860 // Copy the result values into the output registers.
861 for (auto &RetVal : RetVals) {
862 if (RetVal.first == X86::FP0 || RetVal.first == X86::FP1) {
863 RetOps.push_back(RetVal.second);
864 continue; // Don't emit a copytoreg.
865 }
866
867 Chain = DAG.getCopyToReg(Chain, dl, RetVal.first, RetVal.second, Glue);
868 Glue = Chain.getValue(1);
869 RetOps.push_back(
870 DAG.getRegister(RetVal.first, RetVal.second.getValueType()));
871 }
872
873 // Swift calling convention does not require we copy the sret argument
874 // into %rax/%eax for the return, and SRetReturnReg is not set for Swift.
875
876 // All x86 ABIs require that for returning structs by value we copy
877 // the sret argument into %rax/%eax (depending on ABI) for the return.
878 // We saved the argument into a virtual register in the entry block,
879 // so now we copy the value out and into %rax/%eax.
880 //
881 // Checking Function.hasStructRetAttr() here is insufficient because the IR
882 // may not have an explicit sret argument. If FuncInfo.CanLowerReturn is
883 // false, then an sret argument may be implicitly inserted in the SelDAG. In
884 // either case FuncInfo->setSRetReturnReg() will have been called.
885 if (Register SRetReg = FuncInfo->getSRetReturnReg()) {
886 // When we have both sret and another return value, we should use the
887 // original Chain stored in RetOps[0], instead of the current Chain updated
888 // in the above loop. If we only have sret, RetOps[0] equals to Chain.
889
890 // For the case of sret and another return value, we have
891 // Chain_0 at the function entry
892 // Chain_1 = getCopyToReg(Chain_0) in the above loop
893 // If we use Chain_1 in getCopyFromReg, we will have
894 // Val = getCopyFromReg(Chain_1)
895 // Chain_2 = getCopyToReg(Chain_1, Val) from below
896
897 // getCopyToReg(Chain_0) will be glued together with
898 // getCopyToReg(Chain_1, Val) into Unit A, getCopyFromReg(Chain_1) will be
899 // in Unit B, and we will have cyclic dependency between Unit A and Unit B:
900 // Data dependency from Unit B to Unit A due to usage of Val in
901 // getCopyToReg(Chain_1, Val)
902 // Chain dependency from Unit A to Unit B
903
904 // So here, we use RetOps[0] (i.e Chain_0) for getCopyFromReg.
905 SDValue Val = DAG.getCopyFromReg(RetOps[0], dl, SRetReg,
907
908 Register RetValReg
909 = (Subtarget.is64Bit() && !Subtarget.isTarget64BitILP32()) ?
910 X86::RAX : X86::EAX;
911 Chain = DAG.getCopyToReg(Chain, dl, RetValReg, Val, Glue);
912 Glue = Chain.getValue(1);
913
914 // RAX/EAX now acts like a return value.
915 RetOps.push_back(
916 DAG.getRegister(RetValReg, getPointerTy(DAG.getDataLayout())));
917
918 // Add the returned register to the CalleeSaveDisableRegs list. Don't do
919 // this however for preserve_most/preserve_all to minimize the number of
920 // callee-saved registers for these CCs.
921 if (ShouldDisableCalleeSavedRegister &&
922 CallConv != CallingConv::PreserveAll &&
923 CallConv != CallingConv::PreserveMost)
925 }
926
927 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
928 const MCPhysReg *I =
929 TRI->getCalleeSavedRegsViaCopy(&DAG.getMachineFunction());
930 if (I) {
931 for (; *I; ++I) {
932 if (X86::GR64RegClass.contains(*I))
933 RetOps.push_back(DAG.getRegister(*I, MVT::i64));
934 else
935 llvm_unreachable("Unexpected register class in CSRsViaCopy!");
936 }
937 }
938
939 RetOps[0] = Chain; // Update chain.
940
941 // Add the glue if we have it.
942 if (Glue.getNode())
943 RetOps.push_back(Glue);
944
946 if (CallConv == CallingConv::X86_INTR)
947 opcode = X86ISD::IRET;
948 return DAG.getNode(opcode, dl, MVT::Other, RetOps);
949}
950
951bool X86TargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const {
952 if (N->getNumValues() != 1 || !N->hasNUsesOfValue(1, 0))
953 return false;
954
955 SDValue TCChain = Chain;
956 SDNode *Copy = *N->user_begin();
957 if (Copy->getOpcode() == ISD::CopyToReg) {
958 // If the copy has a glue operand, we conservatively assume it isn't safe to
959 // perform a tail call.
960 if (Copy->getOperand(Copy->getNumOperands()-1).getValueType() == MVT::Glue)
961 return false;
962 TCChain = Copy->getOperand(0);
963 } else if (Copy->getOpcode() != ISD::FP_EXTEND)
964 return false;
965
966 bool HasRet = false;
967 for (const SDNode *U : Copy->users()) {
968 if (U->getOpcode() != X86ISD::RET_GLUE)
969 return false;
970 // If we are returning more than one value, we can definitely
971 // not make a tail call see PR19530
972 if (U->getNumOperands() > 4)
973 return false;
974 if (U->getNumOperands() == 4 &&
975 U->getOperand(U->getNumOperands() - 1).getValueType() != MVT::Glue)
976 return false;
977 HasRet = true;
978 }
979
980 if (!HasRet)
981 return false;
982
983 Chain = TCChain;
984 return true;
985}
986
987EVT X86TargetLowering::getTypeForExtReturn(LLVMContext &Context, EVT VT,
988 ISD::NodeType ExtendKind) const {
989 MVT ReturnMVT = MVT::i32;
990
991 bool Darwin = Subtarget.getTargetTriple().isOSDarwin();
992 if (VT == MVT::i1 || (!Darwin && (VT == MVT::i8 || VT == MVT::i16))) {
993 // The ABI does not require i1, i8 or i16 to be extended.
994 //
995 // On Darwin, there is code in the wild relying on Clang's old behaviour of
996 // always extending i8/i16 return values, so keep doing that for now.
997 // (PR26665).
998 ReturnMVT = MVT::i8;
999 }
1000
1001 EVT MinVT = getRegisterType(Context, ReturnMVT);
1002 return VT.bitsLT(MinVT) ? MinVT : VT;
1003}
1004
1005/// Reads two 32 bit registers and creates a 64 bit mask value.
1006/// \param VA The current 32 bit value that need to be assigned.
1007/// \param NextVA The next 32 bit value that need to be assigned.
1008/// \param Root The parent DAG node.
1009/// \param [in,out] InGlue Represents SDvalue in the parent DAG node for
1010/// glue purposes. In the case the DAG is already using
1011/// physical register instead of virtual, we should glue
1012/// our new SDValue to InGlue SDvalue.
1013/// \return a new SDvalue of size 64bit.
1015 SDValue &Root, SelectionDAG &DAG,
1016 const SDLoc &DL, const X86Subtarget &Subtarget,
1017 SDValue *InGlue = nullptr) {
1018 assert((Subtarget.hasBWI()) && "Expected AVX512BW target!");
1019 assert(Subtarget.is32Bit() && "Expecting 32 bit target");
1020 assert(VA.getValVT() == MVT::v64i1 &&
1021 "Expecting first location of 64 bit width type");
1022 assert(NextVA.getValVT() == VA.getValVT() &&
1023 "The locations should have the same type");
1024 assert(VA.isRegLoc() && NextVA.isRegLoc() &&
1025 "The values should reside in two registers");
1026
1027 SDValue Lo, Hi;
1028 SDValue ArgValueLo, ArgValueHi;
1029
1031 const TargetRegisterClass *RC = &X86::GR32RegClass;
1032
1033 // Read a 32 bit value from the registers.
1034 if (nullptr == InGlue) {
1035 // When no physical register is present,
1036 // create an intermediate virtual register.
1037 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1038 ArgValueLo = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1039 Reg = MF.addLiveIn(NextVA.getLocReg(), RC);
1040 ArgValueHi = DAG.getCopyFromReg(Root, DL, Reg, MVT::i32);
1041 } else {
1042 // When a physical register is available read the value from it and glue
1043 // the reads together.
1044 ArgValueLo =
1045 DAG.getCopyFromReg(Root, DL, VA.getLocReg(), MVT::i32, *InGlue);
1046 *InGlue = ArgValueLo.getValue(2);
1047 ArgValueHi =
1048 DAG.getCopyFromReg(Root, DL, NextVA.getLocReg(), MVT::i32, *InGlue);
1049 *InGlue = ArgValueHi.getValue(2);
1050 }
1051
1052 // Convert the i32 type into v32i1 type.
1053 Lo = DAG.getBitcast(MVT::v32i1, ArgValueLo);
1054
1055 // Convert the i32 type into v32i1 type.
1056 Hi = DAG.getBitcast(MVT::v32i1, ArgValueHi);
1057
1058 // Concatenate the two values together.
1059 return DAG.getNode(ISD::CONCAT_VECTORS, DL, MVT::v64i1, Lo, Hi);
1060}
1061
1062/// The function will lower a register of various sizes (8/16/32/64)
1063/// to a mask value of the expected size (v8i1/v16i1/v32i1/v64i1)
1064/// \returns a DAG node contains the operand after lowering to mask type.
1065static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT,
1066 const EVT &ValLoc, const SDLoc &DL,
1067 SelectionDAG &DAG) {
1068 SDValue ValReturned = ValArg;
1069
1070 if (ValVT == MVT::v1i1)
1071 return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, MVT::v1i1, ValReturned);
1072
1073 if (ValVT == MVT::v64i1) {
1074 // In 32 bit machine, this case is handled by getv64i1Argument
1075 assert(ValLoc == MVT::i64 && "Expecting only i64 locations");
1076 // In 64 bit machine, There is no need to truncate the value only bitcast
1077 } else {
1078 MVT MaskLenVT;
1079 switch (ValVT.getSimpleVT().SimpleTy) {
1080 case MVT::v8i1:
1081 MaskLenVT = MVT::i8;
1082 break;
1083 case MVT::v16i1:
1084 MaskLenVT = MVT::i16;
1085 break;
1086 case MVT::v32i1:
1087 MaskLenVT = MVT::i32;
1088 break;
1089 default:
1090 llvm_unreachable("Expecting a vector of i1 types");
1091 }
1092
1093 ValReturned = DAG.getNode(ISD::TRUNCATE, DL, MaskLenVT, ValReturned);
1094 }
1095 return DAG.getBitcast(ValVT, ValReturned);
1096}
1097
1098/// Lower the result values of a call into the
1099/// appropriate copies out of appropriate physical registers.
1100///
1101SDValue X86TargetLowering::LowerCallResult(
1102 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool isVarArg,
1103 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1105 uint32_t *RegMask) const {
1106
1107 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1108 // Assign locations to each value returned by this call.
1110 CCState CCInfo(CallConv, isVarArg, DAG.getMachineFunction(), RVLocs,
1111 *DAG.getContext());
1112 CCInfo.AnalyzeCallResult(Ins, RetCC_X86);
1113
1114 // Copy all of the result registers out of their specified physreg.
1115 for (unsigned I = 0, InsIndex = 0, E = RVLocs.size(); I != E;
1116 ++I, ++InsIndex) {
1117 CCValAssign &VA = RVLocs[I];
1118 EVT CopyVT = VA.getLocVT();
1119
1120 // In some calling conventions we need to remove the used registers
1121 // from the register mask.
1122 if (RegMask) {
1123 for (MCPhysReg SubReg : TRI->subregs_inclusive(VA.getLocReg()))
1124 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
1125 }
1126
1127 // Report an error if there was an attempt to return FP values via XMM
1128 // registers.
1129 if (!Subtarget.hasSSE1() && X86::FR32XRegClass.contains(VA.getLocReg())) {
1130 errorUnsupported(DAG, dl, "SSE register return with SSE disabled");
1131 if (VA.getLocReg() == X86::XMM1)
1132 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1133 else
1134 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1135 } else if (!Subtarget.hasSSE2() &&
1136 X86::FR64XRegClass.contains(VA.getLocReg()) &&
1137 CopyVT == MVT::f64) {
1138 errorUnsupported(DAG, dl, "SSE2 register return with SSE2 disabled");
1139 if (VA.getLocReg() == X86::XMM1)
1140 VA.convertToReg(X86::FP1); // Set reg to FP1, avoid hitting asserts.
1141 else
1142 VA.convertToReg(X86::FP0); // Set reg to FP0, avoid hitting asserts.
1143 }
1144
1145 // If we prefer to use the value in xmm registers, copy it out as f80 and
1146 // use a truncate to move it from fp stack reg to xmm reg.
1147 bool RoundAfterCopy = false;
1148 if ((VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1) &&
1150 if (!Subtarget.hasX87())
1151 report_fatal_error("X87 register return with X87 disabled");
1152 CopyVT = MVT::f80;
1153 RoundAfterCopy = (CopyVT != VA.getLocVT());
1154 }
1155
1156 SDValue Val;
1157 if (VA.needsCustom()) {
1158 assert(VA.getValVT() == MVT::v64i1 &&
1159 "Currently the only custom case is when we split v64i1 to 2 regs");
1160 Val =
1161 getv64i1Argument(VA, RVLocs[++I], Chain, DAG, dl, Subtarget, &InGlue);
1162 } else {
1163 Chain = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), CopyVT, InGlue)
1164 .getValue(1);
1165 Val = Chain.getValue(0);
1166 InGlue = Chain.getValue(2);
1167 }
1168
1169 if (RoundAfterCopy)
1170 Val = DAG.getNode(ISD::FP_ROUND, dl, VA.getValVT(), Val,
1171 // This truncation won't change the value.
1172 DAG.getIntPtrConstant(1, dl, /*isTarget=*/true));
1173
1174 if (VA.isExtInLoc()) {
1175 if (VA.getValVT().isVector() &&
1176 VA.getValVT().getScalarType() == MVT::i1 &&
1177 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1178 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1179 // promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1180 Val = lowerRegToMasks(Val, VA.getValVT(), VA.getLocVT(), dl, DAG);
1181 } else
1182 Val = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val);
1183 }
1184
1185 if (VA.getLocInfo() == CCValAssign::BCvt)
1186 Val = DAG.getBitcast(VA.getValVT(), Val);
1187
1188 InVals.push_back(Val);
1189 }
1190
1191 return Chain;
1192}
1193
1194//===----------------------------------------------------------------------===//
1195// C & StdCall & Fast Calling Convention implementation
1196//===----------------------------------------------------------------------===//
1197// StdCall calling convention seems to be standard for many Windows' API
1198// routines and around. It differs from C calling convention just a little:
1199// callee should clean up the stack, not caller. Symbols should be also
1200// decorated in some fancy way :) It doesn't support any vector arguments.
1201// For info on fast calling convention see Fast Calling Convention (tail call)
1202// implementation LowerX86_32FastCCCallTo.
1203
1204/// Determines whether Args, either a set of outgoing arguments to a call, or a
1205/// set of incoming args of a call, contains an sret pointer that the callee
1206/// pops
1207template <typename T>
1208static bool hasCalleePopSRet(const SmallVectorImpl<T> &Args,
1209 const X86Subtarget &Subtarget) {
1210 // Not C++20 (yet), so no concepts available.
1211 static_assert(std::is_same_v<T, ISD::OutputArg> ||
1212 std::is_same_v<T, ISD::InputArg>,
1213 "requires ISD::OutputArg or ISD::InputArg");
1214
1215 // Only 32-bit pops the sret. It's a 64-bit world these days, so early-out
1216 // for most compilations.
1217 if (!Subtarget.is32Bit())
1218 return false;
1219
1220 if (Args.empty())
1221 return false;
1222
1223 // Most calls do not have an sret argument, check the arg next.
1224 const ISD::ArgFlagsTy &Flags = Args[0].Flags;
1225 if (!Flags.isSRet() || Flags.isInReg())
1226 return false;
1227
1228 // The MSVCabi does not pop the sret.
1229 if (Subtarget.getTargetTriple().isOSMSVCRT())
1230 return false;
1231
1232 // MCUs don't pop the sret
1233 if (Subtarget.isTargetMCU())
1234 return false;
1235
1236 // Callee pops argument
1237 return true;
1238}
1239
1240/// Make a copy of an aggregate at address specified by "Src" to address
1241/// "Dst" with size and alignment information specified by the specific
1242/// parameter attribute. The copy will be passed as a byval function parameter.
1244 SDValue Chain, ISD::ArgFlagsTy Flags,
1245 SelectionDAG &DAG, const SDLoc &dl) {
1246 SDValue SizeNode = DAG.getIntPtrConstant(Flags.getByValSize(), dl);
1247
1248 return DAG.getMemcpy(
1249 Chain, dl, Dst, Src, SizeNode, Flags.getNonZeroByValAlign(),
1250 /*isVolatile*/ false, /*AlwaysInline=*/true,
1251 /*CI=*/nullptr, std::nullopt, MachinePointerInfo(), MachinePointerInfo());
1252}
1253
1254/// Return true if the calling convention is one that we can guarantee TCO for.
1256 return (CC == CallingConv::Fast || CC == CallingConv::GHC ||
1259}
1260
1261/// Return true if we might ever do TCO for calls with this calling convention.
1263 switch (CC) {
1264 // C calling conventions:
1265 case CallingConv::C:
1266 case CallingConv::Win64:
1269 // Callee pop conventions:
1274 // Swift:
1275 case CallingConv::Swift:
1276 return true;
1277 default:
1278 return canGuaranteeTCO(CC);
1279 }
1280}
1281
1282/// Return true if the function is being made into a tailcall target by
1283/// changing its ABI.
1284static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt) {
1285 return (GuaranteedTailCallOpt && canGuaranteeTCO(CC)) ||
1287}
1288
1289bool X86TargetLowering::mayBeEmittedAsTailCall(const CallInst *CI) const {
1290 if (!CI->isTailCall())
1291 return false;
1292
1293 CallingConv::ID CalleeCC = CI->getCallingConv();
1294 if (!mayTailCallThisCC(CalleeCC))
1295 return false;
1296
1297 return true;
1298}
1299
1300SDValue
1301X86TargetLowering::LowerMemArgument(SDValue Chain, CallingConv::ID CallConv,
1303 const SDLoc &dl, SelectionDAG &DAG,
1304 const CCValAssign &VA,
1305 MachineFrameInfo &MFI, unsigned i) const {
1306 // Create the nodes corresponding to a load from this parameter slot.
1307 ISD::ArgFlagsTy Flags = Ins[i].Flags;
1308 bool AlwaysUseMutable = shouldGuaranteeTCO(
1309 CallConv, DAG.getTarget().Options.GuaranteedTailCallOpt);
1310 bool isImmutable = !AlwaysUseMutable && !Flags.isByVal();
1311 EVT ValVT;
1312 MVT PtrVT = getPointerTy(DAG.getDataLayout());
1313
1314 // If value is passed by pointer we have address passed instead of the value
1315 // itself. No need to extend if the mask value and location share the same
1316 // absolute size.
1317 bool ExtendedInMem =
1318 VA.isExtInLoc() && VA.getValVT().getScalarType() == MVT::i1 &&
1320
1321 if (VA.getLocInfo() == CCValAssign::Indirect || ExtendedInMem)
1322 ValVT = VA.getLocVT();
1323 else
1324 ValVT = VA.getValVT();
1325
1326 // FIXME: For now, all byval parameter objects are marked mutable. This can be
1327 // changed with more analysis.
1328 // In case of tail call optimization mark all arguments mutable. Since they
1329 // could be overwritten by lowering of arguments in case of a tail call.
1330 if (Flags.isByVal()) {
1331 unsigned Bytes = Flags.getByValSize();
1332 if (Bytes == 0) Bytes = 1; // Don't create zero-sized stack objects.
1333
1334 // FIXME: For now, all byval parameter objects are marked as aliasing. This
1335 // can be improved with deeper analysis.
1336 int FI = MFI.CreateFixedObject(Bytes, VA.getLocMemOffset(), isImmutable,
1337 /*isAliased=*/true);
1338 return DAG.getFrameIndex(FI, PtrVT);
1339 }
1340
1341 EVT ArgVT = Ins[i].ArgVT;
1342
1343 // If this is a vector that has been split into multiple parts, don't elide
1344 // the copy. The layout on the stack may not match the packed in-memory
1345 // layout.
1346 bool ScalarizedVector = ArgVT.isVector() && !VA.getLocVT().isVector();
1347
1348 // This is an argument in memory. We might be able to perform copy elision.
1349 // If the argument is passed directly in memory without any extension, then we
1350 // can perform copy elision. Large vector types, for example, may be passed
1351 // indirectly by pointer.
1352 if (Flags.isCopyElisionCandidate() &&
1353 VA.getLocInfo() != CCValAssign::Indirect && !ExtendedInMem &&
1354 !ScalarizedVector) {
1355 SDValue PartAddr;
1356 if (Ins[i].PartOffset == 0) {
1357 // If this is a one-part value or the first part of a multi-part value,
1358 // create a stack object for the entire argument value type and return a
1359 // load from our portion of it. This assumes that if the first part of an
1360 // argument is in memory, the rest will also be in memory.
1361 int FI = MFI.CreateFixedObject(ArgVT.getStoreSize(), VA.getLocMemOffset(),
1362 /*IsImmutable=*/false);
1363 PartAddr = DAG.getFrameIndex(FI, PtrVT);
1364 return DAG.getLoad(
1365 ValVT, dl, Chain, PartAddr,
1367 }
1368
1369 // This is not the first piece of an argument in memory. See if there is
1370 // already a fixed stack object including this offset. If so, assume it
1371 // was created by the PartOffset == 0 branch above and create a load from
1372 // the appropriate offset into it.
1373 int64_t PartBegin = VA.getLocMemOffset();
1374 int64_t PartEnd = PartBegin + ValVT.getSizeInBits() / 8;
1375 int FI = MFI.getObjectIndexBegin();
1376 for (; MFI.isFixedObjectIndex(FI); ++FI) {
1377 int64_t ObjBegin = MFI.getObjectOffset(FI);
1378 int64_t ObjEnd = ObjBegin + MFI.getObjectSize(FI);
1379 if (ObjBegin <= PartBegin && PartEnd <= ObjEnd)
1380 break;
1381 }
1382 if (MFI.isFixedObjectIndex(FI)) {
1383 SDValue Addr =
1384 DAG.getNode(ISD::ADD, dl, PtrVT, DAG.getFrameIndex(FI, PtrVT),
1385 DAG.getIntPtrConstant(Ins[i].PartOffset, dl));
1386 return DAG.getLoad(ValVT, dl, Chain, Addr,
1388 DAG.getMachineFunction(), FI, Ins[i].PartOffset));
1389 }
1390 }
1391
1392 int FI = MFI.CreateFixedObject(ValVT.getSizeInBits() / 8,
1393 VA.getLocMemOffset(), isImmutable);
1394
1395 // Set SExt or ZExt flag.
1396 if (VA.getLocInfo() == CCValAssign::ZExt) {
1397 MFI.setObjectZExt(FI, true);
1398 } else if (VA.getLocInfo() == CCValAssign::SExt) {
1399 MFI.setObjectSExt(FI, true);
1400 }
1401
1402 MaybeAlign Alignment;
1403 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1404 ValVT != MVT::f80)
1405 Alignment = MaybeAlign(4);
1406 SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
1407 SDValue Val = DAG.getLoad(
1408 ValVT, dl, Chain, FIN,
1410 Alignment);
1411 return ExtendedInMem
1412 ? (VA.getValVT().isVector()
1413 ? DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VA.getValVT(), Val)
1414 : DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), Val))
1415 : Val;
1416}
1417
1418// FIXME: Get this from tablegen.
1420 const X86Subtarget &Subtarget) {
1421 assert(Subtarget.is64Bit());
1422
1423 if (Subtarget.isCallingConvWin64(CallConv)) {
1424 static const MCPhysReg GPR64ArgRegsWin64[] = {
1425 X86::RCX, X86::RDX, X86::R8, X86::R9
1426 };
1427 return GPR64ArgRegsWin64;
1428 }
1429
1430 static const MCPhysReg GPR64ArgRegs64Bit[] = {
1431 X86::RDI, X86::RSI, X86::RDX, X86::RCX, X86::R8, X86::R9
1432 };
1433 return GPR64ArgRegs64Bit;
1434}
1435
1436// FIXME: Get this from tablegen.
1438 CallingConv::ID CallConv,
1439 const X86Subtarget &Subtarget) {
1440 assert(Subtarget.is64Bit());
1441 if (Subtarget.isCallingConvWin64(CallConv)) {
1442 // The XMM registers which might contain var arg parameters are shadowed
1443 // in their paired GPR. So we only need to save the GPR to their home
1444 // slots.
1445 // TODO: __vectorcall will change this.
1446 return {};
1447 }
1448
1449 bool isSoftFloat = Subtarget.useSoftFloat();
1450 if (isSoftFloat || !Subtarget.hasSSE1())
1451 // Kernel mode asks for SSE to be disabled, so there are no XMM argument
1452 // registers.
1453 return {};
1454
1455 static const MCPhysReg XMMArgRegs64Bit[] = {
1456 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
1457 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
1458 };
1459 return XMMArgRegs64Bit;
1460}
1461
1462#ifndef NDEBUG
1464 return llvm::is_sorted(
1465 ArgLocs, [](const CCValAssign &A, const CCValAssign &B) -> bool {
1466 return A.getValNo() < B.getValNo();
1467 });
1468}
1469#endif
1470
1471namespace {
1472/// This is a helper class for lowering variable arguments parameters.
1473class VarArgsLoweringHelper {
1474public:
1475 VarArgsLoweringHelper(X86MachineFunctionInfo *FuncInfo, const SDLoc &Loc,
1476 SelectionDAG &DAG, const X86Subtarget &Subtarget,
1477 CallingConv::ID CallConv, CCState &CCInfo)
1478 : FuncInfo(FuncInfo), DL(Loc), DAG(DAG), Subtarget(Subtarget),
1479 TheMachineFunction(DAG.getMachineFunction()),
1480 TheFunction(TheMachineFunction.getFunction()),
1481 FrameInfo(TheMachineFunction.getFrameInfo()),
1482 FrameLowering(*Subtarget.getFrameLowering()),
1483 TargLowering(DAG.getTargetLoweringInfo()), CallConv(CallConv),
1484 CCInfo(CCInfo) {}
1485
1486 // Lower variable arguments parameters.
1487 void lowerVarArgsParameters(SDValue &Chain, unsigned StackSize);
1488
1489private:
1490 void createVarArgAreaAndStoreRegisters(SDValue &Chain, unsigned StackSize);
1491
1492 void forwardMustTailParameters(SDValue &Chain);
1493
1494 bool is64Bit() const { return Subtarget.is64Bit(); }
1495 bool isWin64() const { return Subtarget.isCallingConvWin64(CallConv); }
1496
1497 X86MachineFunctionInfo *FuncInfo;
1498 const SDLoc &DL;
1499 SelectionDAG &DAG;
1500 const X86Subtarget &Subtarget;
1501 MachineFunction &TheMachineFunction;
1502 const Function &TheFunction;
1503 MachineFrameInfo &FrameInfo;
1504 const TargetFrameLowering &FrameLowering;
1505 const TargetLowering &TargLowering;
1506 CallingConv::ID CallConv;
1507 CCState &CCInfo;
1508};
1509} // namespace
1510
1511void VarArgsLoweringHelper::createVarArgAreaAndStoreRegisters(
1512 SDValue &Chain, unsigned StackSize) {
1513 // If the function takes variable number of arguments, make a frame index for
1514 // the start of the first vararg value... for expansion of llvm.va_start. We
1515 // can skip this if there are no va_start calls.
1516 if (is64Bit() || (CallConv != CallingConv::X86_FastCall &&
1517 CallConv != CallingConv::X86_ThisCall)) {
1518 FuncInfo->setVarArgsFrameIndex(
1519 FrameInfo.CreateFixedObject(1, StackSize, true));
1520 }
1521
1522 // 64-bit calling conventions support varargs and register parameters, so we
1523 // have to do extra work to spill them in the prologue.
1524 if (is64Bit()) {
1525 // Find the first unallocated argument registers.
1526 ArrayRef<MCPhysReg> ArgGPRs = get64BitArgumentGPRs(CallConv, Subtarget);
1527 ArrayRef<MCPhysReg> ArgXMMs =
1528 get64BitArgumentXMMs(TheMachineFunction, CallConv, Subtarget);
1529 unsigned NumIntRegs = CCInfo.getFirstUnallocated(ArgGPRs);
1530 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(ArgXMMs);
1531
1532 assert(!(NumXMMRegs && !Subtarget.hasSSE1()) &&
1533 "SSE register cannot be used when SSE is disabled!");
1534
1535 if (isWin64()) {
1536 // Get to the caller-allocated home save location. Add 8 to account
1537 // for the return address.
1538 int HomeOffset = FrameLowering.getOffsetOfLocalArea() + 8;
1539 FuncInfo->setRegSaveFrameIndex(
1540 FrameInfo.CreateFixedObject(1, NumIntRegs * 8 + HomeOffset, false));
1541 // Fixup to set vararg frame on shadow area (4 x i64).
1542 if (NumIntRegs < 4)
1543 FuncInfo->setVarArgsFrameIndex(FuncInfo->getRegSaveFrameIndex());
1544 } else {
1545 // For X86-64, if there are vararg parameters that are passed via
1546 // registers, then we must store them to their spots on the stack so
1547 // they may be loaded by dereferencing the result of va_next.
1548 FuncInfo->setVarArgsGPOffset(NumIntRegs * 8);
1549 FuncInfo->setVarArgsFPOffset(ArgGPRs.size() * 8 + NumXMMRegs * 16);
1550 FuncInfo->setRegSaveFrameIndex(FrameInfo.CreateStackObject(
1551 ArgGPRs.size() * 8 + ArgXMMs.size() * 16, Align(16), false));
1552 }
1553
1555 LiveGPRs; // list of SDValue for GPR registers keeping live input value
1556 SmallVector<SDValue, 8> LiveXMMRegs; // list of SDValue for XMM registers
1557 // keeping live input value
1558 SDValue ALVal; // if applicable keeps SDValue for %al register
1559
1560 // Gather all the live in physical registers.
1561 for (MCPhysReg Reg : ArgGPRs.slice(NumIntRegs)) {
1562 Register GPR = TheMachineFunction.addLiveIn(Reg, &X86::GR64RegClass);
1563 LiveGPRs.push_back(DAG.getCopyFromReg(Chain, DL, GPR, MVT::i64));
1564 }
1565 const auto &AvailableXmms = ArgXMMs.slice(NumXMMRegs);
1566 if (!AvailableXmms.empty()) {
1567 Register AL = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1568 ALVal = DAG.getCopyFromReg(Chain, DL, AL, MVT::i8);
1569 for (MCPhysReg Reg : AvailableXmms) {
1570 // FastRegisterAllocator spills virtual registers at basic
1571 // block boundary. That leads to usages of xmm registers
1572 // outside of check for %al. Pass physical registers to
1573 // VASTART_SAVE_XMM_REGS to avoid unneccessary spilling.
1574 TheMachineFunction.getRegInfo().addLiveIn(Reg);
1575 LiveXMMRegs.push_back(DAG.getRegister(Reg, MVT::v4f32));
1576 }
1577 }
1578
1579 // Store the integer parameter registers.
1581 SDValue RSFIN =
1582 DAG.getFrameIndex(FuncInfo->getRegSaveFrameIndex(),
1583 TargLowering.getPointerTy(DAG.getDataLayout()));
1584 unsigned Offset = FuncInfo->getVarArgsGPOffset();
1585 for (SDValue Val : LiveGPRs) {
1586 SDValue FIN = DAG.getNode(ISD::ADD, DL,
1587 TargLowering.getPointerTy(DAG.getDataLayout()),
1588 RSFIN, DAG.getIntPtrConstant(Offset, DL));
1589 SDValue Store =
1590 DAG.getStore(Val.getValue(1), DL, Val, FIN,
1592 DAG.getMachineFunction(),
1593 FuncInfo->getRegSaveFrameIndex(), Offset));
1594 MemOps.push_back(Store);
1595 Offset += 8;
1596 }
1597
1598 // Now store the XMM (fp + vector) parameter registers.
1599 if (!LiveXMMRegs.empty()) {
1600 SmallVector<SDValue, 12> SaveXMMOps;
1601 SaveXMMOps.push_back(Chain);
1602 SaveXMMOps.push_back(ALVal);
1603 SaveXMMOps.push_back(RSFIN);
1604 SaveXMMOps.push_back(
1605 DAG.getTargetConstant(FuncInfo->getVarArgsFPOffset(), DL, MVT::i32));
1606 llvm::append_range(SaveXMMOps, LiveXMMRegs);
1607 MachineMemOperand *StoreMMO =
1610 DAG.getMachineFunction(), FuncInfo->getRegSaveFrameIndex(),
1611 Offset),
1614 DL, DAG.getVTList(MVT::Other),
1615 SaveXMMOps, MVT::i8, StoreMMO));
1616 }
1617
1618 if (!MemOps.empty())
1619 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOps);
1620 }
1621}
1622
1623void VarArgsLoweringHelper::forwardMustTailParameters(SDValue &Chain) {
1624 // Find the largest legal vector type.
1625 MVT VecVT = MVT::Other;
1626 // FIXME: Only some x86_32 calling conventions support AVX512.
1627 if (Subtarget.useAVX512Regs() &&
1628 (is64Bit() || (CallConv == CallingConv::X86_VectorCall ||
1629 CallConv == CallingConv::Intel_OCL_BI)))
1630 VecVT = MVT::v16f32;
1631 else if (Subtarget.hasAVX())
1632 VecVT = MVT::v8f32;
1633 else if (Subtarget.hasSSE2())
1634 VecVT = MVT::v4f32;
1635
1636 // We forward some GPRs and some vector types.
1637 SmallVector<MVT, 2> RegParmTypes;
1638 MVT IntVT = is64Bit() ? MVT::i64 : MVT::i32;
1639 RegParmTypes.push_back(IntVT);
1640 if (VecVT != MVT::Other)
1641 RegParmTypes.push_back(VecVT);
1642
1643 // Compute the set of forwarded registers. The rest are scratch.
1645 FuncInfo->getForwardedMustTailRegParms();
1646 CCInfo.analyzeMustTailForwardedRegisters(Forwards, RegParmTypes, CC_X86);
1647
1648 // Forward AL for SysV x86_64 targets, since it is used for varargs.
1649 if (is64Bit() && !isWin64() && !CCInfo.isAllocated(X86::AL)) {
1650 Register ALVReg = TheMachineFunction.addLiveIn(X86::AL, &X86::GR8RegClass);
1651 Forwards.push_back(ForwardedRegister(ALVReg, X86::AL, MVT::i8));
1652 }
1653
1654 // Copy all forwards from physical to virtual registers.
1655 for (ForwardedRegister &FR : Forwards) {
1656 // FIXME: Can we use a less constrained schedule?
1657 SDValue RegVal = DAG.getCopyFromReg(Chain, DL, FR.VReg, FR.VT);
1658 FR.VReg = TheMachineFunction.getRegInfo().createVirtualRegister(
1659 TargLowering.getRegClassFor(FR.VT));
1660 Chain = DAG.getCopyToReg(Chain, DL, FR.VReg, RegVal);
1661 }
1662}
1663
1664void VarArgsLoweringHelper::lowerVarArgsParameters(SDValue &Chain,
1665 unsigned StackSize) {
1666 // Set FrameIndex to the 0xAAAAAAA value to mark unset state.
1667 // If necessary, it would be set into the correct value later.
1668 FuncInfo->setVarArgsFrameIndex(0xAAAAAAA);
1669 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1670
1671 if (FrameInfo.hasVAStart())
1672 createVarArgAreaAndStoreRegisters(Chain, StackSize);
1673
1674 if (FrameInfo.hasMustTailInVarArgFunc())
1675 forwardMustTailParameters(Chain);
1676}
1677
1678SDValue X86TargetLowering::LowerFormalArguments(
1679 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
1680 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &dl,
1681 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
1684
1685 const Function &F = MF.getFunction();
1686 if (F.hasExternalLinkage() && Subtarget.isTargetCygMing() &&
1687 F.getName() == "main")
1688 FuncInfo->setForceFramePointer(true);
1689
1690 MachineFrameInfo &MFI = MF.getFrameInfo();
1691 bool Is64Bit = Subtarget.is64Bit();
1692 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
1693
1694 assert(
1695 !(IsVarArg && canGuaranteeTCO(CallConv)) &&
1696 "Var args not supported with calling conv' regcall, fastcc, ghc or hipe");
1697
1698 // Assign locations to all of the incoming arguments.
1700 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
1701
1702 // Allocate shadow area for Win64.
1703 if (IsWin64)
1704 CCInfo.AllocateStack(32, Align(8));
1705
1706 CCInfo.AnalyzeArguments(Ins, CC_X86);
1707
1708 // In vectorcall calling convention a second pass is required for the HVA
1709 // types.
1710 if (CallingConv::X86_VectorCall == CallConv) {
1711 CCInfo.AnalyzeArgumentsSecondPass(Ins, CC_X86);
1712 }
1713
1714 // The next loop assumes that the locations are in the same order of the
1715 // input arguments.
1716 assert(isSortedByValueNo(ArgLocs) &&
1717 "Argument Location list must be sorted before lowering");
1718
1719 SDValue ArgValue;
1720 for (unsigned I = 0, InsIndex = 0, E = ArgLocs.size(); I != E;
1721 ++I, ++InsIndex) {
1722 assert(InsIndex < Ins.size() && "Invalid Ins index");
1723 CCValAssign &VA = ArgLocs[I];
1724
1725 if (VA.isRegLoc()) {
1726 EVT RegVT = VA.getLocVT();
1727 if (VA.needsCustom()) {
1728 assert(
1729 VA.getValVT() == MVT::v64i1 &&
1730 "Currently the only custom case is when we split v64i1 to 2 regs");
1731
1732 // v64i1 values, in regcall calling convention, that are
1733 // compiled to 32 bit arch, are split up into two registers.
1734 ArgValue =
1735 getv64i1Argument(VA, ArgLocs[++I], Chain, DAG, dl, Subtarget);
1736 } else {
1737 const TargetRegisterClass *RC;
1738 if (RegVT == MVT::i8)
1739 RC = &X86::GR8RegClass;
1740 else if (RegVT == MVT::i16)
1741 RC = &X86::GR16RegClass;
1742 else if (RegVT == MVT::i32)
1743 RC = &X86::GR32RegClass;
1744 else if (Is64Bit && RegVT == MVT::i64)
1745 RC = &X86::GR64RegClass;
1746 else if (RegVT == MVT::f16)
1747 RC = Subtarget.hasAVX512() ? &X86::FR16XRegClass : &X86::FR16RegClass;
1748 else if (RegVT == MVT::f32)
1749 RC = Subtarget.hasAVX512() ? &X86::FR32XRegClass : &X86::FR32RegClass;
1750 else if (RegVT == MVT::f64)
1751 RC = Subtarget.hasAVX512() ? &X86::FR64XRegClass : &X86::FR64RegClass;
1752 else if (RegVT == MVT::f80)
1753 RC = &X86::RFP80RegClass;
1754 else if (RegVT == MVT::f128)
1755 RC = &X86::VR128RegClass;
1756 else if (RegVT.is512BitVector())
1757 RC = &X86::VR512RegClass;
1758 else if (RegVT.is256BitVector())
1759 RC = Subtarget.hasVLX() ? &X86::VR256XRegClass : &X86::VR256RegClass;
1760 else if (RegVT.is128BitVector())
1761 RC = Subtarget.hasVLX() ? &X86::VR128XRegClass : &X86::VR128RegClass;
1762 else if (RegVT == MVT::x86mmx)
1763 RC = &X86::VR64RegClass;
1764 else if (RegVT == MVT::v1i1)
1765 RC = &X86::VK1RegClass;
1766 else if (RegVT == MVT::v8i1)
1767 RC = &X86::VK8RegClass;
1768 else if (RegVT == MVT::v16i1)
1769 RC = &X86::VK16RegClass;
1770 else if (RegVT == MVT::v32i1)
1771 RC = &X86::VK32RegClass;
1772 else if (RegVT == MVT::v64i1)
1773 RC = &X86::VK64RegClass;
1774 else
1775 llvm_unreachable("Unknown argument type!");
1776
1777 Register Reg = MF.addLiveIn(VA.getLocReg(), RC);
1778 ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
1779 }
1780
1781 // If this is an 8 or 16-bit value, it is really passed promoted to 32
1782 // bits. Insert an assert[sz]ext to capture this, then truncate to the
1783 // right size.
1784 if (VA.getLocInfo() == CCValAssign::SExt)
1785 ArgValue = DAG.getNode(ISD::AssertSext, dl, RegVT, ArgValue,
1786 DAG.getValueType(VA.getValVT()));
1787 else if (VA.getLocInfo() == CCValAssign::ZExt)
1788 ArgValue = DAG.getNode(ISD::AssertZext, dl, RegVT, ArgValue,
1789 DAG.getValueType(VA.getValVT()));
1790 else if (VA.getLocInfo() == CCValAssign::BCvt)
1791 ArgValue = DAG.getBitcast(VA.getValVT(), ArgValue);
1792
1793 if (VA.isExtInLoc()) {
1794 // Handle MMX values passed in XMM regs.
1795 if (RegVT.isVector() && VA.getValVT().getScalarType() != MVT::i1)
1796 ArgValue = DAG.getNode(X86ISD::MOVDQ2Q, dl, VA.getValVT(), ArgValue);
1797 else if (VA.getValVT().isVector() &&
1798 VA.getValVT().getScalarType() == MVT::i1 &&
1799 ((VA.getLocVT() == MVT::i64) || (VA.getLocVT() == MVT::i32) ||
1800 (VA.getLocVT() == MVT::i16) || (VA.getLocVT() == MVT::i8))) {
1801 // Promoting a mask type (v*i1) into a register of type i64/i32/i16/i8
1802 ArgValue = lowerRegToMasks(ArgValue, VA.getValVT(), RegVT, dl, DAG);
1803 } else
1804 ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
1805 }
1806 } else {
1807 assert(VA.isMemLoc());
1808 ArgValue =
1809 LowerMemArgument(Chain, CallConv, Ins, dl, DAG, VA, MFI, InsIndex);
1810 }
1811
1812 // If value is passed via pointer - do a load.
1813 if (VA.getLocInfo() == CCValAssign::Indirect &&
1814 !(Ins[I].Flags.isByVal() && VA.isRegLoc())) {
1815 ArgValue =
1816 DAG.getLoad(VA.getValVT(), dl, Chain, ArgValue, MachinePointerInfo());
1817 }
1818
1819 InVals.push_back(ArgValue);
1820 }
1821
1822 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1823 if (Ins[I].Flags.isSwiftAsync()) {
1824 auto X86FI = MF.getInfo<X86MachineFunctionInfo>();
1825 if (X86::isExtendedSwiftAsyncFrameSupported(Subtarget, MF))
1826 X86FI->setHasSwiftAsyncContext(true);
1827 else {
1828 int PtrSize = Subtarget.is64Bit() ? 8 : 4;
1829 int FI =
1830 MF.getFrameInfo().CreateStackObject(PtrSize, Align(PtrSize), false);
1831 X86FI->setSwiftAsyncContextFrameIdx(FI);
1832 SDValue St = DAG.getStore(
1833 DAG.getEntryNode(), dl, InVals[I],
1834 DAG.getFrameIndex(FI, PtrSize == 8 ? MVT::i64 : MVT::i32),
1836 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, St, Chain);
1837 }
1838 }
1839
1840 // Swift calling convention does not require we copy the sret argument
1841 // into %rax/%eax for the return. We don't set SRetReturnReg for Swift.
1842 if (CallConv == CallingConv::Swift || CallConv == CallingConv::SwiftTail)
1843 continue;
1844
1845 // All x86 ABIs require that for returning structs by value we copy the
1846 // sret argument into %rax/%eax (depending on ABI) for the return. Save
1847 // the argument into a virtual register so that we can access it from the
1848 // return points.
1849 if (Ins[I].Flags.isSRet()) {
1850 assert(!FuncInfo->getSRetReturnReg() &&
1851 "SRet return has already been set");
1852 MVT PtrTy = getPointerTy(DAG.getDataLayout());
1853 Register Reg =
1855 FuncInfo->setSRetReturnReg(Reg);
1856 SDValue Copy = DAG.getCopyToReg(DAG.getEntryNode(), dl, Reg, InVals[I]);
1857 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Copy, Chain);
1858 break;
1859 }
1860 }
1861
1862 unsigned StackSize = CCInfo.getStackSize();
1863 // Align stack specially for tail calls.
1864 if (shouldGuaranteeTCO(CallConv,
1866 StackSize = GetAlignedArgumentStackSize(StackSize, DAG);
1867
1868 if (IsVarArg)
1869 VarArgsLoweringHelper(FuncInfo, dl, DAG, Subtarget, CallConv, CCInfo)
1870 .lowerVarArgsParameters(Chain, StackSize);
1871
1872 // Some CCs need callee pop.
1873 if (X86::isCalleePop(CallConv, Is64Bit, IsVarArg,
1875 FuncInfo->setBytesToPopOnReturn(StackSize); // Callee pops everything.
1876 } else if (CallConv == CallingConv::X86_INTR && Ins.size() == 2) {
1877 // X86 interrupts must pop the error code (and the alignment padding) if
1878 // present.
1879 FuncInfo->setBytesToPopOnReturn(Is64Bit ? 16 : 4);
1880 } else {
1881 FuncInfo->setBytesToPopOnReturn(0); // Callee pops nothing.
1882 // If this is an sret function, the return should pop the hidden pointer.
1883 if (!canGuaranteeTCO(CallConv) && hasCalleePopSRet(Ins, Subtarget))
1884 FuncInfo->setBytesToPopOnReturn(4);
1885 }
1886
1887 if (!Is64Bit) {
1888 // RegSaveFrameIndex is X86-64 only.
1889 FuncInfo->setRegSaveFrameIndex(0xAAAAAAA);
1890 }
1891
1892 FuncInfo->setArgumentStackSize(StackSize);
1893
1894 if (WinEHFuncInfo *EHInfo = MF.getWinEHFuncInfo()) {
1895 EHPersonality Personality = classifyEHPersonality(F.getPersonalityFn());
1896 if (Personality == EHPersonality::CoreCLR) {
1897 assert(Is64Bit);
1898 // TODO: Add a mechanism to frame lowering that will allow us to indicate
1899 // that we'd prefer this slot be allocated towards the bottom of the frame
1900 // (i.e. near the stack pointer after allocating the frame). Every
1901 // funclet needs a copy of this slot in its (mostly empty) frame, and the
1902 // offset from the bottom of this and each funclet's frame must be the
1903 // same, so the size of funclets' (mostly empty) frames is dictated by
1904 // how far this slot is from the bottom (since they allocate just enough
1905 // space to accommodate holding this slot at the correct offset).
1906 int PSPSymFI = MFI.CreateStackObject(8, Align(8), /*isSpillSlot=*/false);
1907 EHInfo->PSPSymFrameIdx = PSPSymFI;
1908 }
1909 }
1910
1911 if (shouldDisableArgRegFromCSR(CallConv) ||
1912 F.hasFnAttribute("no_caller_saved_registers")) {
1914 for (std::pair<MCRegister, Register> Pair : MRI.liveins())
1915 MRI.disableCalleeSavedRegister(Pair.first);
1916 }
1917
1918 if (CallingConv::PreserveNone == CallConv)
1919 for (unsigned I = 0, E = Ins.size(); I != E; ++I) {
1920 if (Ins[I].Flags.isSwiftSelf() || Ins[I].Flags.isSwiftAsync() ||
1921 Ins[I].Flags.isSwiftError()) {
1922 errorUnsupported(DAG, dl,
1923 "Swift attributes can't be used with preserve_none");
1924 break;
1925 }
1926 }
1927
1928 return Chain;
1929}
1930
1931SDValue X86TargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr,
1932 SDValue Arg, const SDLoc &dl,
1933 SelectionDAG &DAG,
1934 const CCValAssign &VA,
1935 ISD::ArgFlagsTy Flags,
1936 bool isByVal) const {
1937 unsigned LocMemOffset = VA.getLocMemOffset();
1938 SDValue PtrOff = DAG.getIntPtrConstant(LocMemOffset, dl);
1939 PtrOff = DAG.getNode(ISD::ADD, dl, getPointerTy(DAG.getDataLayout()),
1940 StackPtr, PtrOff);
1941 if (isByVal)
1942 return CreateCopyOfByValArgument(Arg, PtrOff, Chain, Flags, DAG, dl);
1943
1944 MaybeAlign Alignment;
1945 if (Subtarget.isTargetWindowsMSVC() && !Subtarget.is64Bit() &&
1946 Arg.getSimpleValueType() != MVT::f80)
1947 Alignment = MaybeAlign(4);
1948 return DAG.getStore(
1949 Chain, dl, Arg, PtrOff,
1951 Alignment);
1952}
1953
1954/// Emit a load of return address if tail call
1955/// optimization is performed and it is required.
1956SDValue X86TargetLowering::EmitTailCallLoadRetAddr(
1957 SelectionDAG &DAG, SDValue &OutRetAddr, SDValue Chain, bool IsTailCall,
1958 bool Is64Bit, int FPDiff, const SDLoc &dl) const {
1959 // Adjust the Return address stack slot.
1960 EVT VT = getPointerTy(DAG.getDataLayout());
1961 OutRetAddr = getReturnAddressFrameIndex(DAG);
1962
1963 // Load the "old" Return address.
1964 OutRetAddr = DAG.getLoad(VT, dl, Chain, OutRetAddr, MachinePointerInfo());
1965 return SDValue(OutRetAddr.getNode(), 1);
1966}
1967
1968/// Emit a store of the return address if tail call
1969/// optimization is performed and it is required (FPDiff!=0).
1971 SDValue Chain, SDValue RetAddrFrIdx,
1972 EVT PtrVT, unsigned SlotSize,
1973 int FPDiff, const SDLoc &dl) {
1974 // Store the return address to the appropriate stack slot.
1975 if (!FPDiff) return Chain;
1976 // Calculate the new stack slot for the return address.
1977 int NewReturnAddrFI =
1978 MF.getFrameInfo().CreateFixedObject(SlotSize, (int64_t)FPDiff - SlotSize,
1979 false);
1980 SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, PtrVT);
1981 Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
1983 DAG.getMachineFunction(), NewReturnAddrFI));
1984 return Chain;
1985}
1986
1987/// Returns a vector_shuffle mask for an movs{s|d}, movd
1988/// operation of specified width.
1989SDValue X86TargetLowering::getMOVL(SelectionDAG &DAG, const SDLoc &dl, MVT VT,
1990 SDValue V1, SDValue V2) const {
1991 unsigned NumElems = VT.getVectorNumElements();
1993 Mask.push_back(NumElems);
1994 for (unsigned i = 1; i != NumElems; ++i)
1995 Mask.push_back(i);
1996 return DAG.getVectorShuffle(VT, dl, V1, V2, Mask);
1997}
1998
1999SDValue
2000X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
2001 SmallVectorImpl<SDValue> &InVals) const {
2002 SelectionDAG &DAG = CLI.DAG;
2003 SDLoc &dl = CLI.DL;
2005 SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2007 SDValue Chain = CLI.Chain;
2008 SDValue Callee = CLI.Callee;
2009 CallingConv::ID CallConv = CLI.CallConv;
2010 bool &isTailCall = CLI.IsTailCall;
2011 bool isVarArg = CLI.IsVarArg;
2012 const auto *CB = CLI.CB;
2013
2015 bool Is64Bit = Subtarget.is64Bit();
2016 bool IsWin64 = Subtarget.isCallingConvWin64(CallConv);
2017 bool IsSibcall = false;
2018 bool IsGuaranteeTCO = MF.getTarget().Options.GuaranteedTailCallOpt ||
2019 CallConv == CallingConv::Tail || CallConv == CallingConv::SwiftTail;
2020 bool IsCalleePopSRet = !IsGuaranteeTCO && hasCalleePopSRet(Outs, Subtarget);
2022 bool HasNCSR = (CB && isa<CallInst>(CB) &&
2023 CB->hasFnAttr("no_caller_saved_registers"));
2024 bool HasNoCfCheck = (CB && CB->doesNoCfCheck());
2025 bool IsIndirectCall = (CB && isa<CallInst>(CB) && CB->isIndirectCall());
2026 bool IsCFICall = IsIndirectCall && CLI.CFIType;
2027 const Module *M = MF.getFunction().getParent();
2028 Metadata *IsCFProtectionSupported = M->getModuleFlag("cf-protection-branch");
2029
2031 if (CallConv == CallingConv::X86_INTR)
2032 report_fatal_error("X86 interrupts may not be called directly");
2033
2034 // Analyze operands of the call, assigning locations to each operand.
2036 CCState CCInfo(CallConv, isVarArg, MF, ArgLocs, *DAG.getContext());
2037
2038 // Allocate shadow area for Win64.
2039 if (IsWin64)
2040 CCInfo.AllocateStack(32, Align(8));
2041
2042 CCInfo.AnalyzeArguments(Outs, CC_X86);
2043
2044 // In vectorcall calling convention a second pass is required for the HVA
2045 // types.
2046 if (CallingConv::X86_VectorCall == CallConv) {
2047 CCInfo.AnalyzeArgumentsSecondPass(Outs, CC_X86);
2048 }
2049
2050 bool IsMustTail = CLI.CB && CLI.CB->isMustTailCall();
2051 if (Subtarget.isPICStyleGOT() && !IsGuaranteeTCO && !IsMustTail) {
2052 // If we are using a GOT, disable tail calls to external symbols with
2053 // default visibility. Tail calling such a symbol requires using a GOT
2054 // relocation, which forces early binding of the symbol. This breaks code
2055 // that require lazy function symbol resolution. Using musttail or
2056 // GuaranteedTailCallOpt will override this.
2057 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2058 if (!G || (!G->getGlobal()->hasLocalLinkage() &&
2059 G->getGlobal()->hasDefaultVisibility()))
2060 isTailCall = false;
2061 }
2062
2063 if (isTailCall && !IsMustTail) {
2064 // Check if it's really possible to do a tail call.
2065 isTailCall = IsEligibleForTailCallOptimization(CLI, CCInfo, ArgLocs,
2066 IsCalleePopSRet);
2067
2068 // Sibcalls are automatically detected tailcalls which do not require
2069 // ABI changes.
2070 if (!IsGuaranteeTCO && isTailCall)
2071 IsSibcall = true;
2072
2073 if (isTailCall)
2074 ++NumTailCalls;
2075 }
2076
2077 if (IsMustTail && !isTailCall)
2078 report_fatal_error("failed to perform tail call elimination on a call "
2079 "site marked musttail");
2080
2081 assert(!(isVarArg && canGuaranteeTCO(CallConv)) &&
2082 "Var args not supported with calling convention fastcc, ghc or hipe");
2083
2084 // Get a count of how many bytes are to be pushed on the stack.
2085 unsigned NumBytes = CCInfo.getAlignedCallFrameSize();
2086 if (IsSibcall)
2087 // This is a sibcall. The memory operands are available in caller's
2088 // own caller's stack.
2089 NumBytes = 0;
2090 else if (IsGuaranteeTCO && canGuaranteeTCO(CallConv))
2091 NumBytes = GetAlignedArgumentStackSize(NumBytes, DAG);
2092
2093 int FPDiff = 0;
2094 if (isTailCall &&
2095 shouldGuaranteeTCO(CallConv,
2097 // Lower arguments at fp - stackoffset + fpdiff.
2098 unsigned NumBytesCallerPushed = X86Info->getBytesToPopOnReturn();
2099
2100 FPDiff = NumBytesCallerPushed - NumBytes;
2101
2102 // Set the delta of movement of the returnaddr stackslot.
2103 // But only set if delta is greater than previous delta.
2104 if (FPDiff < X86Info->getTCReturnAddrDelta())
2105 X86Info->setTCReturnAddrDelta(FPDiff);
2106 }
2107
2108 unsigned NumBytesToPush = NumBytes;
2109 unsigned NumBytesToPop = NumBytes;
2110
2111 // If we have an inalloca argument, all stack space has already been allocated
2112 // for us and be right at the top of the stack. We don't support multiple
2113 // arguments passed in memory when using inalloca.
2114 if (!Outs.empty() && Outs.back().Flags.isInAlloca()) {
2115 NumBytesToPush = 0;
2116 if (!ArgLocs.back().isMemLoc())
2117 report_fatal_error("cannot use inalloca attribute on a register "
2118 "parameter");
2119 if (ArgLocs.back().getLocMemOffset() != 0)
2120 report_fatal_error("any parameter with the inalloca attribute must be "
2121 "the only memory argument");
2122 } else if (CLI.IsPreallocated) {
2123 assert(ArgLocs.back().isMemLoc() &&
2124 "cannot use preallocated attribute on a register "
2125 "parameter");
2126 SmallVector<size_t, 4> PreallocatedOffsets;
2127 for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
2128 if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
2129 PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
2130 }
2131 }
2133 size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
2134 MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
2135 MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
2136 NumBytesToPush = 0;
2137 }
2138
2139 if (!IsSibcall && !IsMustTail)
2140 Chain = DAG.getCALLSEQ_START(Chain, NumBytesToPush,
2141 NumBytes - NumBytesToPush, dl);
2142
2143 SDValue RetAddrFrIdx;
2144 // Load return address for tail calls.
2145 if (isTailCall && FPDiff)
2146 Chain = EmitTailCallLoadRetAddr(DAG, RetAddrFrIdx, Chain, isTailCall,
2147 Is64Bit, FPDiff, dl);
2148
2150 SmallVector<SDValue, 8> MemOpChains;
2152
2153 // The next loop assumes that the locations are in the same order of the
2154 // input arguments.
2155 assert(isSortedByValueNo(ArgLocs) &&
2156 "Argument Location list must be sorted before lowering");
2157
2158 // Walk the register/memloc assignments, inserting copies/loads. In the case
2159 // of tail call optimization arguments are handle later.
2160 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2161 for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
2162 ++I, ++OutIndex) {
2163 assert(OutIndex < Outs.size() && "Invalid Out index");
2164 // Skip inalloca/preallocated arguments, they have already been written.
2165 ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
2166 if (Flags.isInAlloca() || Flags.isPreallocated())
2167 continue;
2168
2169 CCValAssign &VA = ArgLocs[I];
2170 EVT RegVT = VA.getLocVT();
2171 SDValue Arg = OutVals[OutIndex];
2172 bool isByVal = Flags.isByVal();
2173
2174 // Promote the value if needed.
2175 switch (VA.getLocInfo()) {
2176 default: llvm_unreachable("Unknown loc info!");
2177 case CCValAssign::Full: break;
2178 case CCValAssign::SExt:
2179 Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
2180 break;
2181 case CCValAssign::ZExt:
2182 Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
2183 break;
2184 case CCValAssign::AExt:
2185 if (Arg.getValueType().isVector() &&
2186 Arg.getValueType().getVectorElementType() == MVT::i1)
2187 Arg = lowerMasksToReg(Arg, RegVT, dl, DAG);
2188 else if (RegVT.is128BitVector()) {
2189 // Special case: passing MMX values in XMM registers.
2190 Arg = DAG.getBitcast(MVT::i64, Arg);
2191 Arg = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, MVT::v2i64, Arg);
2192 Arg = getMOVL(DAG, dl, MVT::v2i64, DAG.getUNDEF(MVT::v2i64), Arg);
2193 } else
2194 Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
2195 break;
2196 case CCValAssign::BCvt:
2197 Arg = DAG.getBitcast(RegVT, Arg);
2198 break;
2199 case CCValAssign::Indirect: {
2200 if (isByVal) {
2201 // Memcpy the argument to a temporary stack slot to prevent
2202 // the caller from seeing any modifications the callee may make
2203 // as guaranteed by the `byval` attribute.
2204 int FrameIdx = MF.getFrameInfo().CreateStackObject(
2205 Flags.getByValSize(),
2206 std::max(Align(16), Flags.getNonZeroByValAlign()), false);
2207 SDValue StackSlot =
2208 DAG.getFrameIndex(FrameIdx, getPointerTy(DAG.getDataLayout()));
2209 Chain =
2210 CreateCopyOfByValArgument(Arg, StackSlot, Chain, Flags, DAG, dl);
2211 // From now on treat this as a regular pointer
2212 Arg = StackSlot;
2213 isByVal = false;
2214 } else {
2215 // Store the argument.
2216 SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
2217 int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
2218 Chain = DAG.getStore(
2219 Chain, dl, Arg, SpillSlot,
2221 Arg = SpillSlot;
2222 }
2223 break;
2224 }
2225 }
2226
2227 if (VA.needsCustom()) {
2228 assert(VA.getValVT() == MVT::v64i1 &&
2229 "Currently the only custom case is when we split v64i1 to 2 regs");
2230 // Split v64i1 value into two registers
2231 Passv64i1ArgInRegs(dl, DAG, Arg, RegsToPass, VA, ArgLocs[++I], Subtarget);
2232 } else if (VA.isRegLoc()) {
2233 RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
2234 const TargetOptions &Options = DAG.getTarget().Options;
2235 if (Options.EmitCallSiteInfo)
2236 CSInfo.ArgRegPairs.emplace_back(VA.getLocReg(), I);
2237 if (isVarArg && IsWin64) {
2238 // Win64 ABI requires argument XMM reg to be copied to the corresponding
2239 // shadow reg if callee is a varargs function.
2240 Register ShadowReg;
2241 switch (VA.getLocReg()) {
2242 case X86::XMM0: ShadowReg = X86::RCX; break;
2243 case X86::XMM1: ShadowReg = X86::RDX; break;
2244 case X86::XMM2: ShadowReg = X86::R8; break;
2245 case X86::XMM3: ShadowReg = X86::R9; break;
2246 }
2247 if (ShadowReg)
2248 RegsToPass.push_back(std::make_pair(ShadowReg, Arg));
2249 }
2250 } else if (!IsSibcall && (!isTailCall || isByVal)) {
2251 assert(VA.isMemLoc());
2252 if (!StackPtr.getNode())
2253 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2255 MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
2256 dl, DAG, VA, Flags, isByVal));
2257 }
2258 }
2259
2260 if (!MemOpChains.empty())
2261 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains);
2262
2263 if (Subtarget.isPICStyleGOT()) {
2264 // ELF / PIC requires GOT in the EBX register before function calls via PLT
2265 // GOT pointer (except regcall).
2266 if (!isTailCall) {
2267 // Indirect call with RegCall calling convertion may use up all the
2268 // general registers, so it is not suitable to bind EBX reister for
2269 // GOT address, just let register allocator handle it.
2270 if (CallConv != CallingConv::X86_RegCall)
2271 RegsToPass.push_back(std::make_pair(
2272 Register(X86::EBX), DAG.getNode(X86ISD::GlobalBaseReg, SDLoc(),
2273 getPointerTy(DAG.getDataLayout()))));
2274 } else {
2275 // If we are tail calling and generating PIC/GOT style code load the
2276 // address of the callee into ECX. The value in ecx is used as target of
2277 // the tail jump. This is done to circumvent the ebx/callee-saved problem
2278 // for tail calls on PIC/GOT architectures. Normally we would just put the
2279 // address of GOT into ebx and then call target@PLT. But for tail calls
2280 // ebx would be restored (since ebx is callee saved) before jumping to the
2281 // target@PLT.
2282
2283 // Note: The actual moving to ECX is done further down.
2284 GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee);
2285 if (G && !G->getGlobal()->hasLocalLinkage() &&
2286 G->getGlobal()->hasDefaultVisibility())
2287 Callee = LowerGlobalAddress(Callee, DAG);
2288 else if (isa<ExternalSymbolSDNode>(Callee))
2289 Callee = LowerExternalSymbol(Callee, DAG);
2290 }
2291 }
2292
2293 if (Is64Bit && isVarArg && !IsWin64 && !IsMustTail &&
2294 (Subtarget.hasSSE1() || !M->getModuleFlag("SkipRaxSetup"))) {
2295 // From AMD64 ABI document:
2296 // For calls that may call functions that use varargs or stdargs
2297 // (prototype-less calls or calls to functions containing ellipsis (...) in
2298 // the declaration) %al is used as hidden argument to specify the number
2299 // of SSE registers used. The contents of %al do not need to match exactly
2300 // the number of registers, but must be an ubound on the number of SSE
2301 // registers used and is in the range 0 - 8 inclusive.
2302
2303 // Count the number of XMM registers allocated.
2304 static const MCPhysReg XMMArgRegs[] = {
2305 X86::XMM0, X86::XMM1, X86::XMM2, X86::XMM3,
2306 X86::XMM4, X86::XMM5, X86::XMM6, X86::XMM7
2307 };
2308 unsigned NumXMMRegs = CCInfo.getFirstUnallocated(XMMArgRegs);
2309 assert((Subtarget.hasSSE1() || !NumXMMRegs)
2310 && "SSE registers cannot be used when SSE is disabled");
2311 RegsToPass.push_back(std::make_pair(Register(X86::AL),
2312 DAG.getConstant(NumXMMRegs, dl,
2313 MVT::i8)));
2314 }
2315
2316 if (isVarArg && IsMustTail) {
2317 const auto &Forwards = X86Info->getForwardedMustTailRegParms();
2318 for (const auto &F : Forwards) {
2319 SDValue Val = DAG.getCopyFromReg(Chain, dl, F.VReg, F.VT);
2320 RegsToPass.push_back(std::make_pair(F.PReg, Val));
2321 }
2322 }
2323
2324 // For tail calls lower the arguments to the 'real' stack slots. Sibcalls
2325 // don't need this because the eligibility check rejects calls that require
2326 // shuffling arguments passed in memory.
2327 if (!IsSibcall && isTailCall) {
2328 // Force all the incoming stack arguments to be loaded from the stack
2329 // before any new outgoing arguments are stored to the stack, because the
2330 // outgoing stack slots may alias the incoming argument stack slots, and
2331 // the alias isn't otherwise explicit. This is slightly more conservative
2332 // than necessary, because it means that each store effectively depends
2333 // on every argument instead of just those arguments it would clobber.
2334 SDValue ArgChain = DAG.getStackArgumentTokenFactor(Chain);
2335
2336 SmallVector<SDValue, 8> MemOpChains2;
2337 SDValue FIN;
2338 int FI = 0;
2339 for (unsigned I = 0, OutsIndex = 0, E = ArgLocs.size(); I != E;
2340 ++I, ++OutsIndex) {
2341 CCValAssign &VA = ArgLocs[I];
2342
2343 if (VA.isRegLoc()) {
2344 if (VA.needsCustom()) {
2345 assert((CallConv == CallingConv::X86_RegCall) &&
2346 "Expecting custom case only in regcall calling convention");
2347 // This means that we are in special case where one argument was
2348 // passed through two register locations - Skip the next location
2349 ++I;
2350 }
2351
2352 continue;
2353 }
2354
2355 assert(VA.isMemLoc());
2356 SDValue Arg = OutVals[OutsIndex];
2357 ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
2358 // Skip inalloca/preallocated arguments. They don't require any work.
2359 if (Flags.isInAlloca() || Flags.isPreallocated())
2360 continue;
2361 // Create frame index.
2362 int32_t Offset = VA.getLocMemOffset()+FPDiff;
2363 uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
2364 FI = MF.getFrameInfo().CreateFixedObject(OpSize, Offset, true);
2365 FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
2366
2367 if (Flags.isByVal()) {
2368 // Copy relative to framepointer.
2370 if (!StackPtr.getNode())
2371 StackPtr = DAG.getCopyFromReg(Chain, dl, RegInfo->getStackRegister(),
2374 StackPtr, Source);
2375
2376 MemOpChains2.push_back(CreateCopyOfByValArgument(Source, FIN,
2377 ArgChain,
2378 Flags, DAG, dl));
2379 } else {
2380 // Store relative to framepointer.
2381 MemOpChains2.push_back(DAG.getStore(
2382 ArgChain, dl, Arg, FIN,
2384 }
2385 }
2386
2387 if (!MemOpChains2.empty())
2388 Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, MemOpChains2);
2389
2390 // Store the return address to the appropriate stack slot.
2391 Chain = EmitTailCallStoreRetAddr(DAG, MF, Chain, RetAddrFrIdx,
2393 RegInfo->getSlotSize(), FPDiff, dl);
2394 }
2395
2396 // Build a sequence of copy-to-reg nodes chained together with token chain
2397 // and glue operands which copy the outgoing args into registers.
2398 SDValue InGlue;
2399 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
2400 Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
2401 RegsToPass[i].second, InGlue);
2402 InGlue = Chain.getValue(1);
2403 }
2404
2405 if (DAG.getTarget().getCodeModel() == CodeModel::Large) {
2406 assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
2407 // In the 64-bit large code model, we have to make all calls
2408 // through a register, since the call instruction's 32-bit
2409 // pc-relative offset may not be large enough to hold the whole
2410 // address.
2411 } else if (Callee->getOpcode() == ISD::GlobalAddress ||
2412 Callee->getOpcode() == ISD::ExternalSymbol) {
2413 // Lower direct calls to global addresses and external symbols. Setting
2414 // ForCall to true here has the effect of removing WrapperRIP when possible
2415 // to allow direct calls to be selected without first materializing the
2416 // address into a register.
2417 Callee = LowerGlobalOrExternal(Callee, DAG, /*ForCall=*/true);
2418 } else if (Subtarget.isTarget64BitILP32() &&
2419 Callee.getValueType() == MVT::i32) {
2420 // Zero-extend the 32-bit Callee address into a 64-bit according to x32 ABI
2421 Callee = DAG.getNode(ISD::ZERO_EXTEND, dl, MVT::i64, Callee);
2422 }
2423
2425
2426 if (!IsSibcall && isTailCall && !IsMustTail) {
2427 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, 0, InGlue, dl);
2428 InGlue = Chain.getValue(1);
2429 }
2430
2431 Ops.push_back(Chain);
2432 Ops.push_back(Callee);
2433
2434 if (isTailCall)
2435 Ops.push_back(DAG.getSignedTargetConstant(FPDiff, dl, MVT::i32));
2436
2437 // Add argument registers to the end of the list so that they are known live
2438 // into the call.
2439 for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
2440 Ops.push_back(DAG.getRegister(RegsToPass[i].first,
2441 RegsToPass[i].second.getValueType()));
2442
2443 // Add a register mask operand representing the call-preserved registers.
2444 const uint32_t *Mask = [&]() {
2445 auto AdaptedCC = CallConv;
2446 // If HasNCSR is asserted (attribute NoCallerSavedRegisters exists),
2447 // use X86_INTR calling convention because it has the same CSR mask
2448 // (same preserved registers).
2449 if (HasNCSR)
2451 // If NoCalleeSavedRegisters is requested, than use GHC since it happens
2452 // to use the CSR_NoRegs_RegMask.
2453 if (CB && CB->hasFnAttr("no_callee_saved_registers"))
2454 AdaptedCC = (CallingConv::ID)CallingConv::GHC;
2455 return RegInfo->getCallPreservedMask(MF, AdaptedCC);
2456 }();
2457 assert(Mask && "Missing call preserved mask for calling convention");
2458
2459 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getFramePtr())) {
2460 X86Info->setFPClobberedByCall(true);
2461 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2462 X86Info->setFPClobberedByInvoke(true);
2463 }
2464 if (MachineOperand::clobbersPhysReg(Mask, RegInfo->getBaseRegister())) {
2465 X86Info->setBPClobberedByCall(true);
2466 if (CLI.CB && isa<InvokeInst>(CLI.CB))
2467 X86Info->setBPClobberedByInvoke(true);
2468 }
2469
2470 // If this is an invoke in a 32-bit function using a funclet-based
2471 // personality, assume the function clobbers all registers. If an exception
2472 // is thrown, the runtime will not restore CSRs.
2473 // FIXME: Model this more precisely so that we can register allocate across
2474 // the normal edge and spill and fill across the exceptional edge.
2475 if (!Is64Bit && CLI.CB && isa<InvokeInst>(CLI.CB)) {
2476 const Function &CallerFn = MF.getFunction();
2477 EHPersonality Pers =
2478 CallerFn.hasPersonalityFn()
2481 if (isFuncletEHPersonality(Pers))
2482 Mask = RegInfo->getNoPreservedMask();
2483 }
2484
2485 // Define a new register mask from the existing mask.
2486 uint32_t *RegMask = nullptr;
2487
2488 // In some calling conventions we need to remove the used physical registers
2489 // from the reg mask. Create a new RegMask for such calling conventions.
2490 // RegMask for calling conventions that disable only return registers (e.g.
2491 // preserve_most) will be modified later in LowerCallResult.
2492 bool ShouldDisableArgRegs = shouldDisableArgRegFromCSR(CallConv) || HasNCSR;
2493 if (ShouldDisableArgRegs || shouldDisableRetRegFromCSR(CallConv)) {
2494 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
2495
2496 // Allocate a new Reg Mask and copy Mask.
2497 RegMask = MF.allocateRegMask();
2498 unsigned RegMaskSize = MachineOperand::getRegMaskSize(TRI->getNumRegs());
2499 memcpy(RegMask, Mask, sizeof(RegMask[0]) * RegMaskSize);
2500
2501 // Make sure all sub registers of the argument registers are reset
2502 // in the RegMask.
2503 if (ShouldDisableArgRegs) {
2504 for (auto const &RegPair : RegsToPass)
2505 for (MCPhysReg SubReg : TRI->subregs_inclusive(RegPair.first))
2506 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
2507 }
2508
2509 // Create the RegMask Operand according to our updated mask.
2510 Ops.push_back(DAG.getRegisterMask(RegMask));
2511 } else {
2512 // Create the RegMask Operand according to the static mask.
2513 Ops.push_back(DAG.getRegisterMask(Mask));
2514 }
2515
2516 if (InGlue.getNode())
2517 Ops.push_back(InGlue);
2518
2519 if (isTailCall) {
2520 // We used to do:
2521 //// If this is the first return lowered for this function, add the regs
2522 //// to the liveout set for the function.
2523 // This isn't right, although it's probably harmless on x86; liveouts
2524 // should be computed from returns not tail calls. Consider a void
2525 // function making a tail call to a function returning int.
2527 SDValue Ret = DAG.getNode(X86ISD::TC_RETURN, dl, MVT::Other, Ops);
2528
2529 if (IsCFICall)
2530 Ret.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2531
2532 DAG.addNoMergeSiteInfo(Ret.getNode(), CLI.NoMerge);
2533 DAG.addCallSiteInfo(Ret.getNode(), std::move(CSInfo));
2534 return Ret;
2535 }
2536
2537 // Returns a chain & a glue for retval copy to use.
2538 SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
2539 if (HasNoCfCheck && IsCFProtectionSupported && IsIndirectCall) {
2540 Chain = DAG.getNode(X86ISD::NT_CALL, dl, NodeTys, Ops);
2541 } else if (CLI.CB && objcarc::hasAttachedCallOpBundle(CLI.CB)) {
2542 // Calls with a "clang.arc.attachedcall" bundle are special. They should be
2543 // expanded to the call, directly followed by a special marker sequence and
2544 // a call to a ObjC library function. Use the CALL_RVMARKER to do that.
2545 assert(!isTailCall &&
2546 "tail calls cannot be marked with clang.arc.attachedcall");
2547 assert(Is64Bit && "clang.arc.attachedcall is only supported in 64bit mode");
2548
2549 // Add a target global address for the retainRV/claimRV runtime function
2550 // just before the call target.
2552 auto PtrVT = getPointerTy(DAG.getDataLayout());
2553 auto GA = DAG.getTargetGlobalAddress(ARCFn, dl, PtrVT);
2554 Ops.insert(Ops.begin() + 1, GA);
2555 Chain = DAG.getNode(X86ISD::CALL_RVMARKER, dl, NodeTys, Ops);
2556 } else {
2557 Chain = DAG.getNode(X86ISD::CALL, dl, NodeTys, Ops);
2558 }
2559
2560 if (IsCFICall)
2561 Chain.getNode()->setCFIType(CLI.CFIType->getZExtValue());
2562
2563 InGlue = Chain.getValue(1);
2564 DAG.addNoMergeSiteInfo(Chain.getNode(), CLI.NoMerge);
2565 DAG.addCallSiteInfo(Chain.getNode(), std::move(CSInfo));
2566
2567 // Save heapallocsite metadata.
2568 if (CLI.CB)
2569 if (MDNode *HeapAlloc = CLI.CB->getMetadata("heapallocsite"))
2570 DAG.addHeapAllocSite(Chain.getNode(), HeapAlloc);
2571
2572 // Create the CALLSEQ_END node.
2573 unsigned NumBytesForCalleeToPop = 0; // Callee pops nothing.
2574 if (X86::isCalleePop(CallConv, Is64Bit, isVarArg,
2576 NumBytesForCalleeToPop = NumBytes; // Callee pops everything
2577 else if (!canGuaranteeTCO(CallConv) && IsCalleePopSRet)
2578 // If this call passes a struct-return pointer, the callee
2579 // pops that struct pointer.
2580 NumBytesForCalleeToPop = 4;
2581
2582 // Returns a glue for retval copy to use.
2583 if (!IsSibcall) {
2584 Chain = DAG.getCALLSEQ_END(Chain, NumBytesToPop, NumBytesForCalleeToPop,
2585 InGlue, dl);
2586 InGlue = Chain.getValue(1);
2587 }
2588
2589 if (CallingConv::PreserveNone == CallConv)
2590 for (unsigned I = 0, E = Outs.size(); I != E; ++I) {
2591 if (Outs[I].Flags.isSwiftSelf() || Outs[I].Flags.isSwiftAsync() ||
2592 Outs[I].Flags.isSwiftError()) {
2593 errorUnsupported(DAG, dl,
2594 "Swift attributes can't be used with preserve_none");
2595 break;
2596 }
2597 }
2598
2599 // Handle result values, copying them out of physregs into vregs that we
2600 // return.
2601 return LowerCallResult(Chain, InGlue, CallConv, isVarArg, Ins, dl, DAG,
2602 InVals, RegMask);
2603}
2604
2605//===----------------------------------------------------------------------===//
2606// Fast Calling Convention (tail call) implementation
2607//===----------------------------------------------------------------------===//
2608
2609// Like std call, callee cleans arguments, convention except that ECX is
2610// reserved for storing the tail called function address. Only 2 registers are
2611// free for argument passing (inreg). Tail call optimization is performed
2612// provided:
2613// * tailcallopt is enabled
2614// * caller/callee are fastcc
2615// On X86_64 architecture with GOT-style position independent code only local
2616// (within module) calls are supported at the moment.
2617// To keep the stack aligned according to platform abi the function
2618// GetAlignedArgumentStackSize ensures that argument delta is always multiples
2619// of stack alignment. (Dynamic linkers need this - Darwin's dyld for example)
2620// If a tail called function callee has more arguments than the caller the
2621// caller needs to make sure that there is room to move the RETADDR to. This is
2622// achieved by reserving an area the size of the argument delta right after the
2623// original RETADDR, but before the saved framepointer or the spilled registers
2624// e.g. caller(arg1, arg2) calls callee(arg1, arg2,arg3,arg4)
2625// stack layout:
2626// arg1
2627// arg2
2628// RETADDR
2629// [ new RETADDR
2630// move area ]
2631// (possible EBP)
2632// ESI
2633// EDI
2634// local1 ..
2635
2636/// Make the stack size align e.g 16n + 12 aligned for a 16-byte align
2637/// requirement.
2638unsigned
2639X86TargetLowering::GetAlignedArgumentStackSize(const unsigned StackSize,
2640 SelectionDAG &DAG) const {
2641 const Align StackAlignment = Subtarget.getFrameLowering()->getStackAlign();
2642 const uint64_t SlotSize = Subtarget.getRegisterInfo()->getSlotSize();
2643 assert(StackSize % SlotSize == 0 &&
2644 "StackSize must be a multiple of SlotSize");
2645 return alignTo(StackSize + SlotSize, StackAlignment) - SlotSize;
2646}
2647
2648/// Return true if the given stack call argument is already available in the
2649/// same position (relatively) of the caller's incoming argument stack.
2650static
2653 const X86InstrInfo *TII, const CCValAssign &VA) {
2654 unsigned Bytes = Arg.getValueSizeInBits() / 8;
2655
2656 for (;;) {
2657 // Look through nodes that don't alter the bits of the incoming value.
2658 unsigned Op = Arg.getOpcode();
2659 if (Op == ISD::ZERO_EXTEND || Op == ISD::ANY_EXTEND || Op == ISD::BITCAST ||
2660 Op == ISD::AssertZext) {
2661 Arg = Arg.getOperand(0);
2662 continue;
2663 }
2664 if (Op == ISD::TRUNCATE) {
2665 const SDValue &TruncInput = Arg.getOperand(0);
2666 if (TruncInput.getOpcode() == ISD::AssertZext &&
2667 cast<VTSDNode>(TruncInput.getOperand(1))->getVT() ==
2668 Arg.getValueType()) {
2669 Arg = TruncInput.getOperand(0);
2670 continue;
2671 }
2672 }
2673 break;
2674 }
2675
2676 int FI = INT_MAX;
2677 if (Arg.getOpcode() == ISD::CopyFromReg) {
2678 Register VR = cast<RegisterSDNode>(Arg.getOperand(1))->getReg();
2679 if (!VR.isVirtual())
2680 return false;
2681 MachineInstr *Def = MRI->getVRegDef(VR);
2682 if (!Def)
2683 return false;
2684 if (!Flags.isByVal()) {
2685 if (!TII->isLoadFromStackSlot(*Def, FI))
2686 return false;
2687 } else {
2688 unsigned Opcode = Def->getOpcode();
2689 if ((Opcode == X86::LEA32r || Opcode == X86::LEA64r ||
2690 Opcode == X86::LEA64_32r) &&
2691 Def->getOperand(1).isFI()) {
2692 FI = Def->getOperand(1).getIndex();
2693 Bytes = Flags.getByValSize();
2694 } else
2695 return false;
2696 }
2697 } else if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Arg)) {
2698 if (Flags.isByVal())
2699 // ByVal argument is passed in as a pointer but it's now being
2700 // dereferenced. e.g.
2701 // define @foo(%struct.X* %A) {
2702 // tail call @bar(%struct.X* byval %A)
2703 // }
2704 return false;
2705 SDValue Ptr = Ld->getBasePtr();
2706 FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr);
2707 if (!FINode)
2708 return false;
2709 FI = FINode->getIndex();
2710 } else if (Arg.getOpcode() == ISD::FrameIndex && Flags.isByVal()) {
2711 FrameIndexSDNode *FINode = cast<FrameIndexSDNode>(Arg);
2712 FI = FINode->getIndex();
2713 Bytes = Flags.getByValSize();
2714 } else
2715 return false;
2716
2717 assert(FI != INT_MAX);
2718 if (!MFI.isFixedObjectIndex(FI))
2719 return false;
2720
2721 if (Offset != MFI.getObjectOffset(FI))
2722 return false;
2723
2724 // If this is not byval, check that the argument stack object is immutable.
2725 // inalloca and argument copy elision can create mutable argument stack
2726 // objects. Byval objects can be mutated, but a byval call intends to pass the
2727 // mutated memory.
2728 if (!Flags.isByVal() && !MFI.isImmutableObjectIndex(FI))
2729 return false;
2730
2731 if (VA.getLocVT().getFixedSizeInBits() >
2733 // If the argument location is wider than the argument type, check that any
2734 // extension flags match.
2735 if (Flags.isZExt() != MFI.isObjectZExt(FI) ||
2736 Flags.isSExt() != MFI.isObjectSExt(FI)) {
2737 return false;
2738 }
2739 }
2740
2741 return Bytes == MFI.getObjectSize(FI);
2742}
2743
2744/// Check whether the call is eligible for tail call optimization. Targets
2745/// that want to do tail call optimization should implement this function.
2746/// Note that the x86 backend does not check musttail calls for eligibility! The
2747/// rest of x86 tail call lowering must be prepared to forward arguments of any
2748/// type.
2749bool X86TargetLowering::IsEligibleForTailCallOptimization(
2751 SmallVectorImpl<CCValAssign> &ArgLocs, bool IsCalleePopSRet) const {
2752 SelectionDAG &DAG = CLI.DAG;
2753 const SmallVectorImpl<ISD::OutputArg> &Outs = CLI.Outs;
2754 const SmallVectorImpl<SDValue> &OutVals = CLI.OutVals;
2756 SDValue Callee = CLI.Callee;
2757 CallingConv::ID CalleeCC = CLI.CallConv;
2758 bool isVarArg = CLI.IsVarArg;
2759
2760 if (!mayTailCallThisCC(CalleeCC))
2761 return false;
2762
2763 // If -tailcallopt is specified, make fastcc functions tail-callable.
2765 const Function &CallerF = MF.getFunction();
2766
2767 // If the function return type is x86_fp80 and the callee return type is not,
2768 // then the FP_EXTEND of the call result is not a nop. It's not safe to
2769 // perform a tailcall optimization here.
2770 if (CallerF.getReturnType()->isX86_FP80Ty() && !CLI.RetTy->isX86_FP80Ty())
2771 return false;
2772
2773 CallingConv::ID CallerCC = CallerF.getCallingConv();
2774 bool CCMatch = CallerCC == CalleeCC;
2775 bool IsCalleeWin64 = Subtarget.isCallingConvWin64(CalleeCC);
2776 bool IsCallerWin64 = Subtarget.isCallingConvWin64(CallerCC);
2777 bool IsGuaranteeTCO = DAG.getTarget().Options.GuaranteedTailCallOpt ||
2778 CalleeCC == CallingConv::Tail || CalleeCC == CallingConv::SwiftTail;
2779
2780 // Win64 functions have extra shadow space for argument homing. Don't do the
2781 // sibcall if the caller and callee have mismatched expectations for this
2782 // space.
2783 if (IsCalleeWin64 != IsCallerWin64)
2784 return false;
2785
2786 if (IsGuaranteeTCO) {
2787 if (canGuaranteeTCO(CalleeCC) && CCMatch)
2788 return true;
2789 return false;
2790 }
2791
2792 // Look for obvious safe cases to perform tail call optimization that do not
2793 // require ABI changes. This is what gcc calls sibcall.
2794
2795 // Can't do sibcall if stack needs to be dynamically re-aligned. PEI needs to
2796 // emit a special epilogue.
2797 const X86RegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2798 if (RegInfo->hasStackRealignment(MF))
2799 return false;
2800
2801 // Also avoid sibcall optimization if we're an sret return fn and the callee
2802 // is incompatible. See comment in LowerReturn about why hasStructRetAttr is
2803 // insufficient.
2805 // For a compatible tail call the callee must return our sret pointer. So it
2806 // needs to be (a) an sret function itself and (b) we pass our sret as its
2807 // sret. Condition #b is harder to determine.
2808 return false;
2809 } else if (IsCalleePopSRet)
2810 // The callee pops an sret, so we cannot tail-call, as our caller doesn't
2811 // expect that.
2812 return false;
2813
2814 // Do not sibcall optimize vararg calls unless all arguments are passed via
2815 // registers.
2816 LLVMContext &C = *DAG.getContext();
2817 if (isVarArg && !Outs.empty()) {
2818 // Optimizing for varargs on Win64 is unlikely to be safe without
2819 // additional testing.
2820 if (IsCalleeWin64 || IsCallerWin64)
2821 return false;
2822
2823 for (const auto &VA : ArgLocs)
2824 if (!VA.isRegLoc())
2825 return false;
2826 }
2827
2828 // If the call result is in ST0 / ST1, it needs to be popped off the x87
2829 // stack. Therefore, if it's not used by the call it is not safe to optimize
2830 // this into a sibcall.
2831 bool Unused = false;
2832 for (const auto &In : Ins) {
2833 if (!In.Used) {
2834 Unused = true;
2835 break;
2836 }
2837 }
2838 if (Unused) {
2840 CCState RVCCInfo(CalleeCC, false, MF, RVLocs, C);
2841 RVCCInfo.AnalyzeCallResult(Ins, RetCC_X86);
2842 for (const auto &VA : RVLocs) {
2843 if (VA.getLocReg() == X86::FP0 || VA.getLocReg() == X86::FP1)
2844 return false;
2845 }
2846 }
2847
2848 // Check that the call results are passed in the same way.
2849 if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins,
2851 return false;
2852 // The callee has to preserve all registers the caller needs to preserve.
2853 const X86RegisterInfo *TRI = Subtarget.getRegisterInfo();
2854 const uint32_t *CallerPreserved = TRI->getCallPreservedMask(MF, CallerCC);
2855 if (!CCMatch) {
2856 const uint32_t *CalleePreserved = TRI->getCallPreservedMask(MF, CalleeCC);
2857 if (!TRI->regmaskSubsetEqual(CallerPreserved, CalleePreserved))
2858 return false;
2859 }
2860
2861 // The stack frame of the caller cannot be replaced by the tail-callee one's
2862 // if the function is required to preserve all the registers. Conservatively
2863 // prevent tail optimization even if hypothetically all the registers are used
2864 // for passing formal parameters or returning values.
2865 if (CallerF.hasFnAttribute("no_caller_saved_registers"))
2866 return false;
2867
2868 unsigned StackArgsSize = CCInfo.getStackSize();
2869
2870 // If the callee takes no arguments then go on to check the results of the
2871 // call.
2872 if (!Outs.empty()) {
2873 if (StackArgsSize > 0) {
2874 // Check if the arguments are already laid out in the right way as
2875 // the caller's fixed stack objects.
2876 MachineFrameInfo &MFI = MF.getFrameInfo();
2877 const MachineRegisterInfo *MRI = &MF.getRegInfo();
2878 const X86InstrInfo *TII = Subtarget.getInstrInfo();
2879 for (unsigned I = 0, E = ArgLocs.size(); I != E; ++I) {
2880 const CCValAssign &VA = ArgLocs[I];
2881 SDValue Arg = OutVals[I];
2882 ISD::ArgFlagsTy Flags = Outs[I].Flags;
2884 return false;
2885 if (!VA.isRegLoc()) {
2886 if (!MatchingStackOffset(Arg, VA.getLocMemOffset(), Flags, MFI, MRI,
2887 TII, VA))
2888 return false;
2889 }
2890 }
2891 }
2892
2893 bool PositionIndependent = isPositionIndependent();
2894 // If the tailcall address may be in a register, then make sure it's
2895 // possible to register allocate for it. In 32-bit, the call address can
2896 // only target EAX, EDX, or ECX since the tail call must be scheduled after
2897 // callee-saved registers are restored. These happen to be the same
2898 // registers used to pass 'inreg' arguments so watch out for those.
2899 if (!Subtarget.is64Bit() && ((!isa<GlobalAddressSDNode>(Callee) &&
2900 !isa<ExternalSymbolSDNode>(Callee)) ||
2901 PositionIndependent)) {
2902 unsigned NumInRegs = 0;
2903 // In PIC we need an extra register to formulate the address computation
2904 // for the callee.
2905 unsigned MaxInRegs = PositionIndependent ? 2 : 3;
2906
2907 for (const auto &VA : ArgLocs) {
2908 if (!VA.isRegLoc())
2909 continue;
2910 Register Reg = VA.getLocReg();
2911 switch (Reg) {
2912 default: break;
2913 case X86::EAX: case X86::EDX: case X86::ECX:
2914 if (++NumInRegs == MaxInRegs)
2915 return false;
2916 break;
2917 }
2918 }
2919 }
2920
2921 const MachineRegisterInfo &MRI = MF.getRegInfo();
2922 if (!parametersInCSRMatch(MRI, CallerPreserved, ArgLocs, OutVals))
2923 return false;
2924 }
2925
2926 bool CalleeWillPop =
2927 X86::isCalleePop(CalleeCC, Subtarget.is64Bit(), isVarArg,
2929
2930 if (unsigned BytesToPop =
2932 // If we have bytes to pop, the callee must pop them.
2933 bool CalleePopMatches = CalleeWillPop && BytesToPop == StackArgsSize;
2934 if (!CalleePopMatches)
2935 return false;
2936 } else if (CalleeWillPop && StackArgsSize > 0) {
2937 // If we don't have bytes to pop, make sure the callee doesn't pop any.
2938 return false;
2939 }
2940
2941 return true;
2942}
2943
2944/// Determines whether the callee is required to pop its own arguments.
2945/// Callee pop is necessary to support tail calls.
2947 bool is64Bit, bool IsVarArg, bool GuaranteeTCO) {
2948 // If GuaranteeTCO is true, we force some calls to be callee pop so that we
2949 // can guarantee TCO.
2950 if (!IsVarArg && shouldGuaranteeTCO(CallingConv, GuaranteeTCO))
2951 return true;
2952
2953 switch (CallingConv) {
2954 default:
2955 return false;
2960 return !is64Bit;
2961 }
2962}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
static bool canGuaranteeTCO(CallingConv::ID CC, bool GuaranteeTailCalls)
Return true if the calling convention is one that we can guarantee TCO for.
static bool mayTailCallThisCC(CallingConv::ID CC)
Return true if we might ever do TCO for calls with this calling convention.
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
return RetTy
uint64_t Addr
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:235
const HexagonInstrInfo * TII
static SDValue CreateCopyOfByValArgument(SDValue Src, SDValue Dst, SDValue Chain, ISD::ArgFlagsTy Flags, SelectionDAG &DAG, const SDLoc &dl)
CreateCopyOfByValArgument - Make a copy of an aggregate at address specified by "Src" to address "Dst...
Module.h This file contains the declarations for the Module class.
static LVOptions Options
Definition: LVOptions.cpp:25
const MCPhysReg ArgGPRs[]
static bool shouldGuaranteeTCO(CallingConv::ID CC, bool GuaranteedTailCallOpt)
Return true if the function is being made into a tailcall target by changing its ABI.
static bool MatchingStackOffset(SDValue Arg, unsigned Offset, ISD::ArgFlagsTy Flags, MachineFrameInfo &MFI, const MachineRegisterInfo *MRI, const M68kInstrInfo *TII, const CCValAssign &VA)
Return true if the given stack call argument is already available in the same position (relatively) o...
#define F(x, y, z)
Definition: MD5.cpp:55
#define I(x, y, z)
Definition: MD5.cpp:58
#define G(x, y, z)
Definition: MD5.cpp:56
unsigned const TargetRegisterInfo * TRI
This file defines ARC utility functions which are used by various parts of the compiler.
static CodeModel::Model getCodeModel(const PPCSubtarget &S, const TargetMachine &TM, const MachineOperand &MO)
static void getMaxByValAlign(Type *Ty, Align &MaxAlign, Align MaxMaxAlign)
getMaxByValAlign - Helper for getByValTypeAlignment to determine the desired ByVal argument alignment...
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:166
static bool is64Bit(const char *name)
static SDValue lowerMasksToReg(const SDValue &ValArg, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
Lowers masks values (v*i1) to the local register values.
static void Passv64i1ArgInRegs(const SDLoc &DL, SelectionDAG &DAG, SDValue &Arg, SmallVectorImpl< std::pair< Register, SDValue > > &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, const X86Subtarget &Subtarget)
Breaks v64i1 value into two registers and adds the new node to the DAG.
static SDValue getv64i1Argument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, const SDLoc &DL, const X86Subtarget &Subtarget, SDValue *InGlue=nullptr)
Reads two 32 bit registers and creates a 64 bit mask value.
static ArrayRef< MCPhysReg > get64BitArgumentXMMs(MachineFunction &MF, CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static bool isSortedByValueNo(ArrayRef< CCValAssign > ArgLocs)
static ArrayRef< MCPhysReg > get64BitArgumentGPRs(CallingConv::ID CallConv, const X86Subtarget &Subtarget)
static std::pair< MVT, unsigned > handleMaskRegisterForCallingConv(unsigned NumElts, CallingConv::ID CC, const X86Subtarget &Subtarget)
static bool shouldDisableRetRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static void errorUnsupported(SelectionDAG &DAG, const SDLoc &dl, const char *Msg)
Call this when the user attempts to do something unsupported, like returning a double without SSE2 en...
static SDValue EmitTailCallStoreRetAddr(SelectionDAG &DAG, MachineFunction &MF, SDValue Chain, SDValue RetAddrFrIdx, EVT PtrVT, unsigned SlotSize, int FPDiff, const SDLoc &dl)
Emit a store of the return address if tail call optimization is performed and it is required (FPDiff!...
static bool hasCalleePopSRet(const SmallVectorImpl< T > &Args, const X86Subtarget &Subtarget)
Determines whether Args, either a set of outgoing arguments to a call, or a set of incoming args of a...
static bool shouldDisableArgRegFromCSR(CallingConv::ID CC)
Returns true if a CC can dynamically exclude a register from the list of callee-saved-registers (Targ...
static bool hasStackGuardSlotTLS(const Triple &TargetTriple)
static SDValue lowerRegToMasks(const SDValue &ValArg, const EVT &ValVT, const EVT &ValLoc, const SDLoc &DL, SelectionDAG &DAG)
The function will lower a register of various sizes (8/16/32/64) to a mask value of the expected size...
static Constant * SegmentOffset(IRBuilderBase &IRB, int Offset, unsigned AddressSpace)
static bool isBitAligned(Align Alignment, uint64_t SizeInBits)
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:168
ArrayRef< T > slice(size_t N, size_t M) const
slice(n, m) - Chop off the first N elements of the array, and keep M elements in the array.
Definition: ArrayRef.h:198
bool hasFnAttr(Attribute::AttrKind Kind) const
Return true if the attribute exists for the function.
const Function * getParent() const
Return the enclosing method, or null if none.
Definition: BasicBlock.h:220
CCState - This class holds information needed while lowering arguments and return values.
static bool resultsCompatible(CallingConv::ID CalleeCC, CallingConv::ID CallerCC, MachineFunction &MF, LLVMContext &C, const SmallVectorImpl< ISD::InputArg > &Ins, CCAssignFn CalleeFn, CCAssignFn CallerFn)
Returns true if the results of the two calling conventions are compatible.
uint64_t getStackSize() const
Returns the size of the currently allocated portion of the stack.
CCValAssign - Represent assignment of one arg/retval to a location.
bool isRegLoc() const
void convertToReg(MCRegister Reg)
Register getLocReg() const
LocInfo getLocInfo() const
bool needsCustom() const
bool isMemLoc() const
bool isExtInLoc() const
int64_t getLocMemOffset() const
CallingConv::ID getCallingConv() const
Definition: InstrTypes.h:1399
bool paramHasAttr(unsigned ArgNo, Attribute::AttrKind Kind) const
Determine whether the argument or parameter has the given attribute.
bool isMustTailCall() const
Tests if this call site must be tail call optimized.
This class represents a function call, abstracting a target machine's calling convention.
bool isTailCall() const
static Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
Definition: Constants.cpp:2307
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition: Constants.h:157
This is an important base class in LLVM.
Definition: Constant.h:42
This class represents an Operation in the Expression.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:63
TypeSize getTypeAllocSize(Type *Ty) const
Returns the offset in bytes between successive objects of the specified type, including alignment pad...
Definition: DataLayout.h:457
Diagnostic information for unsupported feature in backend.
A handy container for a FunctionType+Callee-pointer pair, which can be passed around as a single enti...
Definition: DerivedTypes.h:170
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:277
bool hasPersonalityFn() const
Check whether this function has a personality function.
Definition: Function.h:905
Constant * getPersonalityFn() const
Get the personality function associated with this function.
Definition: Function.cpp:1048
Type * getReturnType() const
Returns the type of the ret val.
Definition: Function.h:221
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:731
Module * getParent()
Get the module that this global value is contained inside of...
Definition: GlobalValue.h:657
void setDSOLocal(bool Local)
Definition: GlobalValue.h:304
@ ExternalLinkage
Externally visible function.
Definition: GlobalValue.h:52
Register isLoadFromStackSlot(const MachineInstr &MI, int &FrameIndex) const override
TargetInstrInfo overrides.
Common base class shared among various IRBuilders.
Definition: IRBuilder.h:113
BasicBlock * GetInsertBlock() const
Definition: IRBuilder.h:193
LLVMContext & getContext() const
Definition: IRBuilder.h:195
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition: IRBuilder.h:588
MDNode * getMetadata(unsigned KindID) const
Get the metadata of given kind attached to this Instruction.
Definition: Instruction.h:407
This is an important class for using LLVM in a threaded context.
Definition: LLVMContext.h:67
void diagnose(const DiagnosticInfo &DI)
Report a message to the currently installed diagnostic handler.
This class is used to represent ISD::LOAD nodes.
Context object for machine code objects.
Definition: MCContext.h:83
Base class for the full range of assembler expressions which are needed for parsing.
Definition: MCExpr.h:34
static const MCSymbolRefExpr * create(const MCSymbol *Symbol, MCContext &Ctx)
Definition: MCExpr.h:398
Metadata node.
Definition: Metadata.h:1073
Machine Value Type.
@ INVALID_SIMPLE_VALUE_TYPE
SimpleValueType SimpleTy
unsigned getVectorNumElements() const
bool isVector() const
Return true if this is a vector value type.
bool is512BitVector() const
Return true if this is a 512-bit vector type.
TypeSize getSizeInBits() const
Returns the size of the specified MVT in bits.
uint64_t getFixedSizeInBits() const
Return the size of the specified fixed width value type in bits.
MVT getVectorElementType() const
MVT getScalarType() const
If this is a vector, return the element type, otherwise return this.
MCSymbol * getSymbol() const
Return the MCSymbol for this basic block.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
int CreateFixedObject(uint64_t Size, int64_t SPOffset, bool IsImmutable, bool isAliased=false)
Create a new object at a fixed location on the stack.
void setObjectZExt(int ObjectIdx, bool IsZExt)
int CreateStackObject(uint64_t Size, Align Alignment, bool isSpillSlot, const AllocaInst *Alloca=nullptr, uint8_t ID=0)
Create a new statically sized stack object, returning a nonnegative identifier to represent it.
void setObjectSExt(int ObjectIdx, bool IsSExt)
bool isImmutableObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to an immutable object.
void setHasTailCall(bool V=true)
bool isObjectZExt(int ObjectIdx) const
int64_t getObjectSize(int ObjectIdx) const
Return the size of the specified object.
bool isObjectSExt(int ObjectIdx) const
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
int getObjectIndexBegin() const
Return the minimum frame object index.
const WinEHFuncInfo * getWinEHFuncInfo() const
getWinEHFuncInfo - Return information about how the current function uses Windows exception handling.
MCSymbol * getPICBaseSymbol() const
getPICBaseSymbol - Return a function-local symbol to represent the PIC base.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
uint32_t * allocateRegMask()
Allocate and initialize a register mask with NumRegister bits.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register addLiveIn(MCRegister PReg, const TargetRegisterClass *RC)
addLiveIn - Add the specified physical register as a live-in value and create a corresponding virtual...
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Representation of each machine instruction.
Definition: MachineInstr.h:71
@ EK_Custom32
EK_Custom32 - Each entry is a 32-bit value that is custom lowered by the TargetLowering::LowerCustomJ...
@ EK_LabelDifference64
EK_LabelDifference64 - Each entry is the address of the block minus the address of the jump table.
A description of a memory reference used in the backend.
Flags
Flags values. These may be or'd together.
@ MOLoad
The memory access reads data.
@ MONonTemporal
The memory access is non-temporal.
@ MOStore
The memory access writes data.
static unsigned getRegMaskSize(unsigned NumRegs)
Returns number of elements needed for a regmask array.
static bool clobbersPhysReg(const uint32_t *RegMask, MCRegister PhysReg)
clobbersPhysReg - Returns true if this RegMask clobbers PhysReg.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Register createVirtualRegister(const TargetRegisterClass *RegClass, StringRef Name="")
createVirtualRegister - Create and return a new virtual register in the function with the specified r...
void disableCalleeSavedRegister(MCRegister Reg)
Disables the register from the list of CSRs.
Root of the metadata hierarchy.
Definition: Metadata.h:62
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
static PointerType * getUnqual(Type *ElementType)
This constructs a pointer to an object of the specified type in the default address space (address sp...
Definition: DerivedTypes.h:686
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
Wrapper class for IR location info (IR ordering and DebugLoc) to be passed into SDNode creation funct...
const DebugLoc & getDebugLoc() const
Represents one node in the SelectionDAG.
void setCFIType(uint32_t Type)
Unlike LLVM values, Selection DAG nodes may return multiple values as the result of a computation.
SDNode * getNode() const
get the SDNode which holds the desired result
SDValue getValue(unsigned R) const
EVT getValueType() const
Return the ValueType of the referenced return value.
TypeSize getValueSizeInBits() const
Returns the size of the value in bits.
const SDValue & getOperand(unsigned i) const
MVT getSimpleValueType() const
Return the simple ValueType of the referenced return value.
unsigned getOpcode() const
This is used to represent a portion of an LLVM function in a low-level Data Dependence DAG representa...
Definition: SelectionDAG.h:228
SDValue getTargetGlobalAddress(const GlobalValue *GV, const SDLoc &DL, EVT VT, int64_t offset=0, unsigned TargetFlags=0)
Definition: SelectionDAG.h:751
SDValue getStackArgumentTokenFactor(SDValue Chain)
Compute a TokenFactor to force all the incoming stack arguments to be loaded from the stack.
SDValue getCopyToReg(SDValue Chain, const SDLoc &dl, Register Reg, SDValue N)
Definition: SelectionDAG.h:802
SDVTList getVTList(EVT VT)
Return an SDVTList that represents the list of values specified.
SDValue getRegister(Register Reg, EVT VT)
SDValue getLoad(EVT VT, const SDLoc &dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, MaybeAlign Alignment=MaybeAlign(), MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr)
Loads are not normal binary operators: their result type is not determined by their operands,...
SDValue getMemcpy(SDValue Chain, const SDLoc &dl, SDValue Dst, SDValue Src, SDValue Size, Align Alignment, bool isVol, bool AlwaysInline, const CallInst *CI, std::optional< bool > OverrideTailCall, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo, const AAMDNodes &AAInfo=AAMDNodes(), BatchAAResults *BatchAA=nullptr)
void addNoMergeSiteInfo(const SDNode *Node, bool NoMerge)
Set NoMergeSiteInfo to be associated with Node if NoMerge is true.
SDValue getUNDEF(EVT VT)
Return an UNDEF node. UNDEF does not have a useful SDLoc.
SDValue getCALLSEQ_END(SDValue Chain, SDValue Op1, SDValue Op2, SDValue InGlue, const SDLoc &DL)
Return a new CALLSEQ_END node, which always must have a glue result (to ensure it's not CSE'd).
SDValue getBitcast(EVT VT, SDValue V)
Return a bitcast using the SDLoc of the value operand, and casting to the provided type.
SDValue getCopyFromReg(SDValue Chain, const SDLoc &dl, Register Reg, EVT VT)
Definition: SelectionDAG.h:828
const DataLayout & getDataLayout() const
Definition: SelectionDAG.h:497
void addHeapAllocSite(const SDNode *Node, MDNode *MD)
Set HeapAllocSite to be associated with Node.
SDValue getConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isTarget=false, bool isOpaque=false)
Create a ConstantSDNode wrapping a constant value.
SDValue getSignedTargetConstant(int64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:713
SDValue getStore(SDValue Chain, const SDLoc &dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags MMOFlags=MachineMemOperand::MONone, const AAMDNodes &AAInfo=AAMDNodes())
Helper function to build ISD::STORE nodes.
SDValue getCALLSEQ_START(SDValue Chain, uint64_t InSize, uint64_t OutSize, const SDLoc &DL)
Return a new CALLSEQ_START node, that starts new call frame, in which InSize bytes are set up inside ...
const TargetMachine & getTarget() const
Definition: SelectionDAG.h:498
SDValue getIntPtrConstant(uint64_t Val, const SDLoc &DL, bool isTarget=false)
SDValue getValueType(EVT)
SDValue getNode(unsigned Opcode, const SDLoc &DL, EVT VT, ArrayRef< SDUse > Ops)
Gets or creates the specified node.
SDValue getTargetConstant(uint64_t Val, const SDLoc &DL, EVT VT, bool isOpaque=false)
Definition: SelectionDAG.h:701
MachineFunction & getMachineFunction() const
Definition: SelectionDAG.h:492
SDValue getFrameIndex(int FI, EVT VT, bool isTarget=false)
SDValue getRegisterMask(const uint32_t *RegMask)
void addCallSiteInfo(const SDNode *Node, CallSiteInfo &&CallInfo)
Set CallSiteInfo to be associated with Node.
LLVMContext * getContext() const
Definition: SelectionDAG.h:510
SDValue getMemIntrinsicNode(unsigned Opcode, const SDLoc &dl, SDVTList VTList, ArrayRef< SDValue > Ops, EVT MemVT, MachinePointerInfo PtrInfo, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MOLoad|MachineMemOperand::MOStore, LocationSize Size=0, const AAMDNodes &AAInfo=AAMDNodes())
Creates a MemIntrinsicNode that may produce a result and takes a list of operands.
SDValue CreateStackTemporary(TypeSize Bytes, Align Alignment)
Create a stack temporary based on the size in bytes and the alignment.
SDValue getEntryNode() const
Return the token chain corresponding to the entry of the function.
Definition: SelectionDAG.h:580
std::pair< SDValue, SDValue > SplitScalar(const SDValue &N, const SDLoc &DL, const EVT &LoVT, const EVT &HiVT)
Split the scalar node with EXTRACT_ELEMENT using the provided VTs and return the low/high part.
SDValue getVectorShuffle(EVT VT, const SDLoc &dl, SDValue N1, SDValue N2, ArrayRef< int > Mask)
Return an ISD::VECTOR_SHUFFLE node.
bool empty() const
Definition: SmallVector.h:81
size_t size() const
Definition: SmallVector.h:78
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:573
iterator insert(iterator I, T &&Elt)
Definition: SmallVector.h:805
void push_back(const T &Elt)
Definition: SmallVector.h:413
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1196
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:51
constexpr bool empty() const
empty - Check if the string is empty.
Definition: StringRef.h:147
Class to represent struct types.
Definition: DerivedTypes.h:218
Information about stack frame layout on the target.
Align getStackAlign() const
getStackAlignment - This method returns the number of bytes to which the stack pointer must be aligne...
virtual const TargetRegisterClass * getRegClassFor(MVT VT, bool isDivergent=false) const
Return the register class that should be used for the specified value type.
virtual Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const
Returns the target-specific address of the unsafe stack pointer.
const TargetMachine & getTargetMachine() const
virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain targets require unusual breakdowns of certain types.
virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
virtual Value * getSDagStackGuard(const Module &M) const
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
virtual unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
virtual Function * getSSPStackGuardCheck(const Module &M) const
If the target has a standard stack protection check function that performs validation and error handl...
virtual Value * getIRStackGuard(IRBuilderBase &IRB) const
If the target has a standard location for the stack protector guard, returns the address of that loca...
bool isTypeLegal(EVT VT) const
Return true if the target has native support for the specified value type.
virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS=0) const
Return the pointer type for the given address space, defaults to the pointer type from the data layou...
virtual std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const
Return the largest legal super-reg register class of the register class for the specified type and it...
LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const
Return how we should legalize values of this type, either it is already legal (return 'Legal') or we ...
std::vector< ArgListEntry > ArgListTy
MVT getRegisterType(MVT VT) const
Return the type of registers that this ValueType will eventually require.
virtual void insertSSPDeclarations(Module &M) const
Inserts necessary declarations for SSP (stack protection) purpose.
This class defines information used to lower LLVM code to legal SelectionDAG operators that the targe...
virtual const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool parametersInCSRMatch(const MachineRegisterInfo &MRI, const uint32_t *CallerPreservedMask, const SmallVectorImpl< CCValAssign > &ArgLocs, const SmallVectorImpl< SDValue > &OutVals) const
Check whether parameters to a call that are passed in callee saved registers are the same as from the...
bool isPositionIndependent() const
virtual unsigned getJumpTableEncoding() const
Return the entry encoding for a jump table in the current function.
TargetOptions Options
CodeModel::Model getCodeModel() const
Returns the code model.
unsigned GuaranteedTailCallOpt
GuaranteedTailCallOpt - This flag is enabled when -tailcallopt is specified on the commandline.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
Triple - Helper class for working with autoconf configuration names.
Definition: Triple.h:44
bool isAndroidVersionLT(unsigned Major) const
Definition: Triple.h:799
bool isAndroid() const
Tests whether the target is Android.
Definition: Triple.h:797
bool isOSMSVCRT() const
Is this a "Windows" OS targeting a "MSVCRT.dll" environment.
Definition: Triple.h:695
bool isOSDarwin() const
Is this a "Darwin" OS (macOS, iOS, tvOS, watchOS, XROS, or DriverKit).
Definition: Triple.h:585
bool isOSGlibc() const
Tests whether the OS uses glibc.
Definition: Triple.h:731
bool isOSFuchsia() const
Definition: Triple.h:615
bool isWindowsMSVCEnvironment() const
Checks if the environment could be MSVC.
Definition: Triple.h:662
bool isWindowsItaniumEnvironment() const
Definition: Triple.h:677
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
bool isX86_FP80Ty() const
Return true if this is x86 long double.
Definition: Type.h:159
static Type * getVoidTy(LLVMContext &C)
static IntegerType * getInt32Ty(LLVMContext &C)
static IntegerType * getInt64Ty(LLVMContext &C)
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:237
LLVM Value Representation.
Definition: Value.h:74
X86MachineFunctionInfo - This class is derived from MachineFunction and contains private X86 target-s...
void setBytesToPopOnReturn(unsigned bytes)
void setVarArgsGPOffset(unsigned Offset)
void setArgumentStackSize(unsigned size)
SmallVectorImpl< ForwardedRegister > & getForwardedMustTailRegParms()
void setVarArgsFPOffset(unsigned Offset)
unsigned getSlotSize() const
bool hasSSE1() const
Definition: X86Subtarget.h:193
bool useLight256BitInstructions() const
Definition: X86Subtarget.h:258
bool isPICStyleGOT() const
Definition: X86Subtarget.h:333
bool isTargetMCU() const
Definition: X86Subtarget.h:302
bool isTargetWindowsMSVC() const
Definition: X86Subtarget.h:305
bool isTarget64BitILP32() const
Is this x86_64 with the ILP32 programming model (x32 ABI)?
Definition: X86Subtarget.h:173
bool isTargetDarwin() const
Definition: X86Subtarget.h:285
const Triple & getTargetTriple() const
Definition: X86Subtarget.h:283
const X86InstrInfo * getInstrInfo() const override
Definition: X86Subtarget.h:122
bool useAVX512Regs() const
Definition: X86Subtarget.h:253
bool isTargetCOFF() const
Definition: X86Subtarget.h:292
bool isCallingConvWin64(CallingConv::ID CC) const
Definition: X86Subtarget.h:342
bool hasAVX512() const
Definition: X86Subtarget.h:201
bool hasSSE41() const
Definition: X86Subtarget.h:197
bool hasSSE2() const
Definition: X86Subtarget.h:194
bool isTargetFuchsia() const
Definition: X86Subtarget.h:303
bool isPICStyleRIPRel() const
Definition: X86Subtarget.h:334
bool isTargetCygMing() const
Definition: X86Subtarget.h:325
const X86RegisterInfo * getRegisterInfo() const override
Definition: X86Subtarget.h:132
bool hasAVX() const
Definition: X86Subtarget.h:199
unsigned getPreferVectorWidth() const
Definition: X86Subtarget.h:225
bool isTargetAndroid() const
Definition: X86Subtarget.h:298
const X86FrameLowering * getFrameLowering() const override
Definition: X86Subtarget.h:124
bool hasAVX2() const
Definition: X86Subtarget.h:200
std::pair< const TargetRegisterClass *, uint8_t > findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const override
Return the largest legal super-reg register class of the register class for the specified type and it...
SDValue getPICJumpTableRelocBase(SDValue Table, SelectionDAG &DAG) const override
Returns relocation base for the given PIC jumptable.
unsigned getJumpTableEncoding() const override
Return the entry encoding for a jump table in the current function.
bool isMemoryAccessFast(EVT VT, Align Alignment) const
bool useSoftFloat() const override
const MCExpr * getPICJumpTableRelocBaseExpr(const MachineFunction *MF, unsigned JTI, MCContext &Ctx) const override
This returns the relocation base for the given PIC jumptable, the same as getPICJumpTableRelocBase,...
bool isSafeMemOpType(MVT VT) const override
Returns true if it's safe to use load / store of the specified type to expand memcpy / memset inline.
bool functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, bool isVarArg, const DataLayout &DL) const override
For some targets, an LLVM struct type must be broken down into multiple simple types,...
Value * getIRStackGuard(IRBuilderBase &IRB) const override
If the target has a standard location for the stack protector cookie, returns the address of that loc...
Align getByValTypeAlignment(Type *Ty, const DataLayout &DL) const override
Return the desired alignment for ByVal aggregate function arguments in the caller parameter area.
EVT getOptimalMemOpType(const MemOp &Op, const AttributeList &FuncAttributes) const override
It returns EVT::Other if the type should be determined using generic target-independent logic.
Function * getSSPStackGuardCheck(const Module &M) const override
If the target has a standard stack protection check function that performs validation and error handl...
MVT getRegisterTypeForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain combinations of ABIs, Targets and features require that types are legal for some operations a...
bool allowsMisalignedMemoryAccesses(EVT VT, unsigned AS, Align Alignment, MachineMemOperand::Flags Flags, unsigned *Fast) const override
Returns true if the target allows unaligned memory accesses of the specified type.
Value * getSDagStackGuard(const Module &M) const override
Return the variable that's previously inserted by insertSSPDeclarations, if any, otherwise return nul...
unsigned getVectorTypeBreakdownForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, unsigned &NumIntermediates, MVT &RegisterVT) const override
Certain targets such as MIPS require that some types such as vectors are always broken down into scal...
void markLibCallAttributes(MachineFunction *MF, unsigned CC, ArgListTy &Args) const override
Value * getSafeStackPointerLocation(IRBuilderBase &IRB) const override
Return true if the target stores SafeStack pointer at a fixed offset in some non-standard address spa...
bool isScalarFPTypeInSSEReg(EVT VT) const
Return true if the specified scalar FP type is computed in an SSE register, not on the X87 floating p...
unsigned getNumRegistersForCallingConv(LLVMContext &Context, CallingConv::ID CC, EVT VT) const override
Certain targets require unusual breakdowns of certain types.
bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, unsigned AddrSpace, Align Alignment, MachineMemOperand::Flags Flags=MachineMemOperand::MONone, unsigned *Fast=nullptr) const override
This function returns true if the memory access is aligned or if the target allows this specific unal...
SDValue getReturnAddressFrameIndex(SelectionDAG &DAG) const
EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, EVT VT) const override
Return the value type to use for ISD::SETCC.
EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const override
For types supported by the target, this is an identity function.
void insertSSPDeclarations(Module &M) const override
Inserts necessary declarations for SSP (stack protection) purpose.
const MCExpr * LowerCustomJumpTableEntry(const MachineJumpTableInfo *MJTI, const MachineBasicBlock *MBB, unsigned uid, MCContext &Ctx) const override
constexpr ScalarTy getFixedValue() const
Definition: TypeSize.h:202
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:125
@ X86_64_SysV
The C convention as specified in the x86-64 supplement to the System V ABI, used on most non-Windows ...
Definition: CallingConv.h:151
@ HiPE
Used by the High-Performance Erlang Compiler (HiPE).
Definition: CallingConv.h:53
@ Swift
Calling convention for Swift.
Definition: CallingConv.h:69
@ PreserveMost
Used for runtime calls that preserves most registers.
Definition: CallingConv.h:63
@ X86_INTR
x86 hardware interrupt context.
Definition: CallingConv.h:173
@ GHC
Used by the Glasgow Haskell Compiler (GHC).
Definition: CallingConv.h:50
@ X86_ThisCall
Similar to X86_StdCall.
Definition: CallingConv.h:122
@ PreserveAll
Used for runtime calls that preserves (almost) all registers.
Definition: CallingConv.h:66
@ X86_StdCall
stdcall is mostly used by the Win32 API.
Definition: CallingConv.h:99
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ X86_VectorCall
MSVC calling convention that passes vectors and vector aggregates in SSE registers.
Definition: CallingConv.h:163
@ Intel_OCL_BI
Used for Intel OpenCL built-ins.
Definition: CallingConv.h:147
@ PreserveNone
Used for runtime calls that preserves none general registers.
Definition: CallingConv.h:90
@ Tail
Attemps to make calls as fast as possible while guaranteeing that tail call optimization can always b...
Definition: CallingConv.h:76
@ Win64
The C convention as implemented on Windows/x86-64 and AArch64.
Definition: CallingConv.h:159
@ SwiftTail
This follows the Swift calling convention in how arguments are passed but guarantees tail calls will ...
Definition: CallingConv.h:87
@ X86_RegCall
Register calling convention used for parameters transfer optimization.
Definition: CallingConv.h:203
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ X86_FastCall
'fast' analog of X86_StdCall.
Definition: CallingConv.h:103
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
NodeType
ISD::NodeType enum - This enum defines the target-independent operators for a SelectionDAG.
Definition: ISDOpcodes.h:40
@ ADD
Simple integer binary arithmetic operators.
Definition: ISDOpcodes.h:246
@ ANY_EXTEND
ANY_EXTEND - Used for integer types. The high bits are undefined.
Definition: ISDOpcodes.h:814
@ GlobalAddress
Definition: ISDOpcodes.h:78
@ CONCAT_VECTORS
CONCAT_VECTORS(VECTOR0, VECTOR1, ...) - Given a number of values of vector type with the same length ...
Definition: ISDOpcodes.h:558
@ BITCAST
BITCAST - This operator converts between integer, vector and FP values, as if the value was stored to...
Definition: ISDOpcodes.h:954
@ FrameIndex
Definition: ISDOpcodes.h:80
@ SIGN_EXTEND
Conversion operators.
Definition: ISDOpcodes.h:805
@ SCALAR_TO_VECTOR
SCALAR_TO_VECTOR(VAL) - This represents the operation of loading a scalar value into element 0 of the...
Definition: ISDOpcodes.h:635
@ CopyFromReg
CopyFromReg - This node indicates that the input value is a virtual or physical register that is defi...
Definition: ISDOpcodes.h:215
@ EXTRACT_VECTOR_ELT
EXTRACT_VECTOR_ELT(VECTOR, IDX) - Returns a single element from VECTOR identified by the (potentially...
Definition: ISDOpcodes.h:550
@ CopyToReg
CopyToReg - This node has three operands: a chain, a register number to set to this value,...
Definition: ISDOpcodes.h:209
@ ZERO_EXTEND
ZERO_EXTEND - Used for integer types, zeroing the new bits.
Definition: ISDOpcodes.h:811
@ FP_EXTEND
X = FP_EXTEND(Y) - Extend a smaller FP type into a larger FP type.
Definition: ISDOpcodes.h:939
@ TokenFactor
TokenFactor - This node takes multiple tokens as input and produces a single token result.
Definition: ISDOpcodes.h:52
@ ExternalSymbol
Definition: ISDOpcodes.h:83
@ FP_ROUND
X = FP_ROUND(Y, TRUNC) - Rounding 'Y' from a larger floating point type down to the precision of the ...
Definition: ISDOpcodes.h:920
@ TRUNCATE
TRUNCATE - Completely drop the high bits.
Definition: ISDOpcodes.h:817
@ AssertSext
AssertSext, AssertZext - These nodes record if a register contains a value that has already been zero...
Definition: ISDOpcodes.h:61
@ AssertZext
Definition: ISDOpcodes.h:62
@ FS
Definition: X86.h:211
@ GS
Definition: X86.h:210
Reg
All possible values of the reg field in the ModR/M byte.
@ RET_GLUE
Return with a glue operand.
@ IRET
Return from interrupt. Operand 0 is the number of bytes to pop.
@ CALL
These operations represent an abstract X86 call instruction, which includes a bunch of information.
@ GlobalBaseReg
On Darwin, this node represents the result of the popl at function entry, used for PIC code.
@ TC_RETURN
Tail call return.
@ NT_CALL
Same as call except it adds the NoTrack prefix.
@ MOVDQ2Q
Copies a 64-bit value from the low word of an XMM vector to an MMX vector.
bool isExtendedSwiftAsyncFrameSupported(const X86Subtarget &Subtarget, const MachineFunction &MF)
True if the target supports the extended frame for async Swift functions.
bool isCalleePop(CallingConv::ID CallingConv, bool is64Bit, bool IsVarArg, bool GuaranteeTCO)
Determines whether the callee is required to pop its own arguments.
std::optional< Function * > getAttachedARCFunction(const CallBase *CB)
This function returns operand bundle clang_arc_attachedcall's argument, which is the address of the A...
Definition: ObjCARCUtil.h:43
bool hasAttachedCallOpBundle(const CallBase *CB)
Definition: ObjCARCUtil.h:29
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:480
bool RetCC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition: STLExtras.h:2115
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:293
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
bool is_sorted(R &&Range, Compare C)
Wrapper function around std::is_sorted to check if elements in a range R are sorted with respect to a...
Definition: STLExtras.h:1926
EHPersonality classifyEHPersonality(const Value *Pers)
See if the given exception handling personality function is one that we understand.
bool isFuncletEHPersonality(EHPersonality Pers)
Returns true if this is a personality function that invokes handler funclets (which must return to it...
uint64_t alignTo(uint64_t Size, Align A)
Returns a multiple of A needed to store Size bytes.
Definition: Alignment.h:155
bool CC_X86(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State)
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
uint64_t value() const
This is a hole in the type system and should not be abused.
Definition: Alignment.h:85
Extended Value Type.
Definition: ValueTypes.h:35
EVT changeVectorElementTypeToInteger() const
Return a vector with the same number of elements as this vector, but with the element type converted ...
Definition: ValueTypes.h:94
TypeSize getStoreSize() const
Return the number of bytes overwritten by a store of the specified value type.
Definition: ValueTypes.h:390
static EVT getVectorVT(LLVMContext &Context, EVT VT, unsigned NumElements, bool IsScalable=false)
Returns the EVT that represents a vector NumElements in length, where each element is of type VT.
Definition: ValueTypes.h:74
bool bitsLT(EVT VT) const
Return true if this has less bits than VT.
Definition: ValueTypes.h:295
ElementCount getVectorElementCount() const
Definition: ValueTypes.h:345
TypeSize getSizeInBits() const
Return the size of the specified value type in bits.
Definition: ValueTypes.h:368
MVT getSimpleVT() const
Return the SimpleValueType held in the specified simple EVT.
Definition: ValueTypes.h:311
bool is128BitVector() const
Return true if this is a 128-bit vector type.
Definition: ValueTypes.h:207
bool is512BitVector() const
Return true if this is a 512-bit vector type.
Definition: ValueTypes.h:217
bool isVector() const
Return true if this is a vector value type.
Definition: ValueTypes.h:168
bool is256BitVector() const
Return true if this is a 256-bit vector type.
Definition: ValueTypes.h:212
EVT getVectorElementType() const
Given a vector type, return the type of each element.
Definition: ValueTypes.h:323
EVT changeVectorElementType(EVT EltVT) const
Return a VT for a vector type whose attributes match ourselves with the exception of the element type...
Definition: ValueTypes.h:102
unsigned getVectorNumElements() const
Given a vector type, return the number of elements it contains.
Definition: ValueTypes.h:331
Describes a register that needs to be forwarded from the prologue to a musttail call.
SmallVector< ArgRegPair, 1 > ArgRegPairs
Vector of call argument and its forwarding register.
This class contains a discriminated union of information about pointers in memory operands,...
static MachinePointerInfo getStack(MachineFunction &MF, int64_t Offset, uint8_t ID=0)
Stack pointer relative access.
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition: Alignment.h:117
This represents a list of ValueType's that has been intern'd by a SelectionDAG.
This structure contains all information that is necessary for lowering calls.
SmallVector< ISD::InputArg, 32 > Ins
SmallVector< ISD::OutputArg, 32 > Outs
SmallVector< SDValue, 32 > OutVals