LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/GlobalValue.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Intrinsics.h"
35#include "llvm/IR/IntrinsicsAArch64.h"
36#include "llvm/IR/IntrinsicsAMDGPU.h"
37#include "llvm/IR/IntrinsicsARM.h"
38#include "llvm/IR/IntrinsicsNVPTX.h"
39#include "llvm/IR/IntrinsicsRISCV.h"
40#include "llvm/IR/IntrinsicsWebAssembly.h"
41#include "llvm/IR/IntrinsicsX86.h"
42#include "llvm/IR/LLVMContext.h"
43#include "llvm/IR/MDBuilder.h"
44#include "llvm/IR/Metadata.h"
45#include "llvm/IR/Module.h"
46#include "llvm/IR/Value.h"
47#include "llvm/IR/Verifier.h"
53#include "llvm/Support/Regex.h"
56#include <cstdint>
57#include <cstring>
58#include <numeric>
59
60using namespace llvm;
61
62static cl::opt<bool>
63 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
64 cl::desc("Disable autoupgrade of debug info"));
65
66static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
67
68// Report a fatal error along with the
69// Call Instruction which caused the error
70[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
71 CallBase *CI) {
72 CI->print(llvm::errs());
73 llvm::errs() << "\n";
75}
76
77// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
78// changed their type from v4f32 to v2i64.
80 Function *&NewFn) {
81 // Check whether this is an old version of the function, which received
82 // v4f32 arguments.
83 Type *Arg0Type = F->getFunctionType()->getParamType(0);
84 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
85 return false;
86
87 // Yes, it's old, replace it with new version.
88 rename(F);
89 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
90 return true;
91}
92
93// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
94// arguments have changed their type from i32 to i8.
96 Function *&NewFn) {
97 // Check that the last argument is an i32.
98 Type *LastArgType = F->getFunctionType()->getParamType(
99 F->getFunctionType()->getNumParams() - 1);
100 if (!LastArgType->isIntegerTy(32))
101 return false;
102
103 // Move this function aside and map down.
104 rename(F);
105 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
106 return true;
107}
108
109// Upgrade the declaration of fp compare intrinsics that change return type
110// from scalar to vXi1 mask.
112 Function *&NewFn) {
113 // Check if the return type is a vector.
114 if (F->getReturnType()->isVectorTy())
115 return false;
116
117 rename(F);
118 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
119 return true;
120}
121
122// Upgrade the declaration of multiply and add bytes intrinsics whose input
123// arguments' types have changed from vectors of i32 to vectors of i8
125 Function *&NewFn) {
126 // check if input argument type is a vector of i8
127 Type *Arg1Type = F->getFunctionType()->getParamType(1);
128 Type *Arg2Type = F->getFunctionType()->getParamType(2);
129 if (Arg1Type->isVectorTy() &&
130 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
131 Arg2Type->isVectorTy() &&
132 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
133 return false;
134
135 rename(F);
136 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
137 return true;
138}
139
140// Upgrade the declaration of multipy and add words intrinsics whose input
141// arguments' types have changed to vectors of i32 to vectors of i16
143 Function *&NewFn) {
144 // check if input argument type is a vector of i16
145 Type *Arg1Type = F->getFunctionType()->getParamType(1);
146 Type *Arg2Type = F->getFunctionType()->getParamType(2);
147 if (Arg1Type->isVectorTy() &&
148 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
149 Arg2Type->isVectorTy() &&
150 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
151 return false;
152
153 rename(F);
154 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
155 return true;
156}
157
159 Function *&NewFn) {
160 if (F->getReturnType()->getScalarType()->isBFloatTy())
161 return false;
162
163 rename(F);
164 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
165 return true;
166}
167
169 Function *&NewFn) {
170 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
171 return false;
172
173 rename(F);
174 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
175 return true;
176}
177
179 // All of the intrinsics matches below should be marked with which llvm
180 // version started autoupgrading them. At some point in the future we would
181 // like to use this information to remove upgrade code for some older
182 // intrinsics. It is currently undecided how we will determine that future
183 // point.
184 if (Name.consume_front("avx."))
185 return (Name.starts_with("blend.p") || // Added in 3.7
186 Name == "cvt.ps2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.pd.256" || // Added in 3.9
188 Name == "cvtdq2.ps.256" || // Added in 7.0
189 Name.starts_with("movnt.") || // Added in 3.2
190 Name.starts_with("sqrt.p") || // Added in 7.0
191 Name.starts_with("storeu.") || // Added in 3.9
192 Name.starts_with("vbroadcast.s") || // Added in 3.5
193 Name.starts_with("vbroadcastf128") || // Added in 4.0
194 Name.starts_with("vextractf128.") || // Added in 3.7
195 Name.starts_with("vinsertf128.") || // Added in 3.7
196 Name.starts_with("vperm2f128.") || // Added in 6.0
197 Name.starts_with("vpermil.")); // Added in 3.1
198
199 if (Name.consume_front("avx2."))
200 return (Name == "movntdqa" || // Added in 5.0
201 Name.starts_with("pabs.") || // Added in 6.0
202 Name.starts_with("padds.") || // Added in 8.0
203 Name.starts_with("paddus.") || // Added in 8.0
204 Name.starts_with("pblendd.") || // Added in 3.7
205 Name == "pblendw" || // Added in 3.7
206 Name.starts_with("pbroadcast") || // Added in 3.8
207 Name.starts_with("pcmpeq.") || // Added in 3.1
208 Name.starts_with("pcmpgt.") || // Added in 3.1
209 Name.starts_with("pmax") || // Added in 3.9
210 Name.starts_with("pmin") || // Added in 3.9
211 Name.starts_with("pmovsx") || // Added in 3.9
212 Name.starts_with("pmovzx") || // Added in 3.9
213 Name == "pmul.dq" || // Added in 7.0
214 Name == "pmulu.dq" || // Added in 7.0
215 Name.starts_with("psll.dq") || // Added in 3.7
216 Name.starts_with("psrl.dq") || // Added in 3.7
217 Name.starts_with("psubs.") || // Added in 8.0
218 Name.starts_with("psubus.") || // Added in 8.0
219 Name.starts_with("vbroadcast") || // Added in 3.8
220 Name == "vbroadcasti128" || // Added in 3.7
221 Name == "vextracti128" || // Added in 3.7
222 Name == "vinserti128" || // Added in 3.7
223 Name == "vperm2i128"); // Added in 6.0
224
225 if (Name.consume_front("avx512.")) {
226 if (Name.consume_front("mask."))
227 // 'avx512.mask.*'
228 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
229 Name.starts_with("and.") || // Added in 3.9
230 Name.starts_with("andn.") || // Added in 3.9
231 Name.starts_with("broadcast.s") || // Added in 3.9
232 Name.starts_with("broadcastf32x4.") || // Added in 6.0
233 Name.starts_with("broadcastf32x8.") || // Added in 6.0
234 Name.starts_with("broadcastf64x2.") || // Added in 6.0
235 Name.starts_with("broadcastf64x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x4.") || // Added in 6.0
237 Name.starts_with("broadcasti32x8.") || // Added in 6.0
238 Name.starts_with("broadcasti64x2.") || // Added in 6.0
239 Name.starts_with("broadcasti64x4.") || // Added in 6.0
240 Name.starts_with("cmp.b") || // Added in 5.0
241 Name.starts_with("cmp.d") || // Added in 5.0
242 Name.starts_with("cmp.q") || // Added in 5.0
243 Name.starts_with("cmp.w") || // Added in 5.0
244 Name.starts_with("compress.b") || // Added in 9.0
245 Name.starts_with("compress.d") || // Added in 9.0
246 Name.starts_with("compress.p") || // Added in 9.0
247 Name.starts_with("compress.q") || // Added in 9.0
248 Name.starts_with("compress.store.") || // Added in 7.0
249 Name.starts_with("compress.w") || // Added in 9.0
250 Name.starts_with("conflict.") || // Added in 9.0
251 Name.starts_with("cvtdq2pd.") || // Added in 4.0
252 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
253 Name == "cvtpd2dq.256" || // Added in 7.0
254 Name == "cvtpd2ps.256" || // Added in 7.0
255 Name == "cvtps2pd.128" || // Added in 7.0
256 Name == "cvtps2pd.256" || // Added in 7.0
257 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
258 Name == "cvtqq2ps.256" || // Added in 9.0
259 Name == "cvtqq2ps.512" || // Added in 9.0
260 Name == "cvttpd2dq.256" || // Added in 7.0
261 Name == "cvttps2dq.128" || // Added in 7.0
262 Name == "cvttps2dq.256" || // Added in 7.0
263 Name.starts_with("cvtudq2pd.") || // Added in 4.0
264 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
265 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
266 Name == "cvtuqq2ps.256" || // Added in 9.0
267 Name == "cvtuqq2ps.512" || // Added in 9.0
268 Name.starts_with("dbpsadbw.") || // Added in 7.0
269 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
270 Name.starts_with("expand.b") || // Added in 9.0
271 Name.starts_with("expand.d") || // Added in 9.0
272 Name.starts_with("expand.load.") || // Added in 7.0
273 Name.starts_with("expand.p") || // Added in 9.0
274 Name.starts_with("expand.q") || // Added in 9.0
275 Name.starts_with("expand.w") || // Added in 9.0
276 Name.starts_with("fpclass.p") || // Added in 7.0
277 Name.starts_with("insert") || // Added in 4.0
278 Name.starts_with("load.") || // Added in 3.9
279 Name.starts_with("loadu.") || // Added in 3.9
280 Name.starts_with("lzcnt.") || // Added in 5.0
281 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
283 Name.starts_with("movddup") || // Added in 3.9
284 Name.starts_with("move.s") || // Added in 4.0
285 Name.starts_with("movshdup") || // Added in 3.9
286 Name.starts_with("movsldup") || // Added in 3.9
287 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
288 Name.starts_with("or.") || // Added in 3.9
289 Name.starts_with("pabs.") || // Added in 6.0
290 Name.starts_with("packssdw.") || // Added in 5.0
291 Name.starts_with("packsswb.") || // Added in 5.0
292 Name.starts_with("packusdw.") || // Added in 5.0
293 Name.starts_with("packuswb.") || // Added in 5.0
294 Name.starts_with("padd.") || // Added in 4.0
295 Name.starts_with("padds.") || // Added in 8.0
296 Name.starts_with("paddus.") || // Added in 8.0
297 Name.starts_with("palignr.") || // Added in 3.9
298 Name.starts_with("pand.") || // Added in 3.9
299 Name.starts_with("pandn.") || // Added in 3.9
300 Name.starts_with("pavg") || // Added in 6.0
301 Name.starts_with("pbroadcast") || // Added in 6.0
302 Name.starts_with("pcmpeq.") || // Added in 3.9
303 Name.starts_with("pcmpgt.") || // Added in 3.9
304 Name.starts_with("perm.df.") || // Added in 3.9
305 Name.starts_with("perm.di.") || // Added in 3.9
306 Name.starts_with("permvar.") || // Added in 7.0
307 Name.starts_with("pmaddubs.w.") || // Added in 7.0
308 Name.starts_with("pmaddw.d.") || // Added in 7.0
309 Name.starts_with("pmax") || // Added in 4.0
310 Name.starts_with("pmin") || // Added in 4.0
311 Name == "pmov.qd.256" || // Added in 9.0
312 Name == "pmov.qd.512" || // Added in 9.0
313 Name == "pmov.wb.256" || // Added in 9.0
314 Name == "pmov.wb.512" || // Added in 9.0
315 Name.starts_with("pmovsx") || // Added in 4.0
316 Name.starts_with("pmovzx") || // Added in 4.0
317 Name.starts_with("pmul.dq.") || // Added in 4.0
318 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
319 Name.starts_with("pmulh.w.") || // Added in 7.0
320 Name.starts_with("pmulhu.w.") || // Added in 7.0
321 Name.starts_with("pmull.") || // Added in 4.0
322 Name.starts_with("pmultishift.qb.") || // Added in 8.0
323 Name.starts_with("pmulu.dq.") || // Added in 4.0
324 Name.starts_with("por.") || // Added in 3.9
325 Name.starts_with("prol.") || // Added in 8.0
326 Name.starts_with("prolv.") || // Added in 8.0
327 Name.starts_with("pror.") || // Added in 8.0
328 Name.starts_with("prorv.") || // Added in 8.0
329 Name.starts_with("pshuf.b.") || // Added in 4.0
330 Name.starts_with("pshuf.d.") || // Added in 3.9
331 Name.starts_with("pshufh.w.") || // Added in 3.9
332 Name.starts_with("pshufl.w.") || // Added in 3.9
333 Name.starts_with("psll.d") || // Added in 4.0
334 Name.starts_with("psll.q") || // Added in 4.0
335 Name.starts_with("psll.w") || // Added in 4.0
336 Name.starts_with("pslli") || // Added in 4.0
337 Name.starts_with("psllv") || // Added in 4.0
338 Name.starts_with("psra.d") || // Added in 4.0
339 Name.starts_with("psra.q") || // Added in 4.0
340 Name.starts_with("psra.w") || // Added in 4.0
341 Name.starts_with("psrai") || // Added in 4.0
342 Name.starts_with("psrav") || // Added in 4.0
343 Name.starts_with("psrl.d") || // Added in 4.0
344 Name.starts_with("psrl.q") || // Added in 4.0
345 Name.starts_with("psrl.w") || // Added in 4.0
346 Name.starts_with("psrli") || // Added in 4.0
347 Name.starts_with("psrlv") || // Added in 4.0
348 Name.starts_with("psub.") || // Added in 4.0
349 Name.starts_with("psubs.") || // Added in 8.0
350 Name.starts_with("psubus.") || // Added in 8.0
351 Name.starts_with("pternlog.") || // Added in 7.0
352 Name.starts_with("punpckh") || // Added in 3.9
353 Name.starts_with("punpckl") || // Added in 3.9
354 Name.starts_with("pxor.") || // Added in 3.9
355 Name.starts_with("shuf.f") || // Added in 6.0
356 Name.starts_with("shuf.i") || // Added in 6.0
357 Name.starts_with("shuf.p") || // Added in 4.0
358 Name.starts_with("sqrt.p") || // Added in 7.0
359 Name.starts_with("store.b.") || // Added in 3.9
360 Name.starts_with("store.d.") || // Added in 3.9
361 Name.starts_with("store.p") || // Added in 3.9
362 Name.starts_with("store.q.") || // Added in 3.9
363 Name.starts_with("store.w.") || // Added in 3.9
364 Name == "store.ss" || // Added in 7.0
365 Name.starts_with("storeu.") || // Added in 3.9
366 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
367 Name.starts_with("ucmp.") || // Added in 5.0
368 Name.starts_with("unpckh.") || // Added in 3.9
369 Name.starts_with("unpckl.") || // Added in 3.9
370 Name.starts_with("valign.") || // Added in 4.0
371 Name == "vcvtph2ps.128" || // Added in 11.0
372 Name == "vcvtph2ps.256" || // Added in 11.0
373 Name.starts_with("vextract") || // Added in 4.0
374 Name.starts_with("vfmadd.") || // Added in 7.0
375 Name.starts_with("vfmaddsub.") || // Added in 7.0
376 Name.starts_with("vfnmadd.") || // Added in 7.0
377 Name.starts_with("vfnmsub.") || // Added in 7.0
378 Name.starts_with("vpdpbusd.") || // Added in 7.0
379 Name.starts_with("vpdpbusds.") || // Added in 7.0
380 Name.starts_with("vpdpwssd.") || // Added in 7.0
381 Name.starts_with("vpdpwssds.") || // Added in 7.0
382 Name.starts_with("vpermi2var.") || // Added in 7.0
383 Name.starts_with("vpermil.p") || // Added in 3.9
384 Name.starts_with("vpermilvar.") || // Added in 4.0
385 Name.starts_with("vpermt2var.") || // Added in 7.0
386 Name.starts_with("vpmadd52") || // Added in 7.0
387 Name.starts_with("vpshld.") || // Added in 7.0
388 Name.starts_with("vpshldv.") || // Added in 8.0
389 Name.starts_with("vpshrd.") || // Added in 7.0
390 Name.starts_with("vpshrdv.") || // Added in 8.0
391 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
392 Name.starts_with("xor.")); // Added in 3.9
393
394 if (Name.consume_front("mask3."))
395 // 'avx512.mask3.*'
396 return (Name.starts_with("vfmadd.") || // Added in 7.0
397 Name.starts_with("vfmaddsub.") || // Added in 7.0
398 Name.starts_with("vfmsub.") || // Added in 7.0
399 Name.starts_with("vfmsubadd.") || // Added in 7.0
400 Name.starts_with("vfnmsub.")); // Added in 7.0
401
402 if (Name.consume_front("maskz."))
403 // 'avx512.maskz.*'
404 return (Name.starts_with("pternlog.") || // Added in 7.0
405 Name.starts_with("vfmadd.") || // Added in 7.0
406 Name.starts_with("vfmaddsub.") || // Added in 7.0
407 Name.starts_with("vpdpbusd.") || // Added in 7.0
408 Name.starts_with("vpdpbusds.") || // Added in 7.0
409 Name.starts_with("vpdpwssd.") || // Added in 7.0
410 Name.starts_with("vpdpwssds.") || // Added in 7.0
411 Name.starts_with("vpermt2var.") || // Added in 7.0
412 Name.starts_with("vpmadd52") || // Added in 7.0
413 Name.starts_with("vpshldv.") || // Added in 8.0
414 Name.starts_with("vpshrdv.")); // Added in 8.0
415
416 // 'avx512.*'
417 return (Name == "movntdqa" || // Added in 5.0
418 Name == "pmul.dq.512" || // Added in 7.0
419 Name == "pmulu.dq.512" || // Added in 7.0
420 Name.starts_with("broadcastm") || // Added in 6.0
421 Name.starts_with("cmp.p") || // Added in 12.0
422 Name.starts_with("cvtb2mask.") || // Added in 7.0
423 Name.starts_with("cvtd2mask.") || // Added in 7.0
424 Name.starts_with("cvtmask2") || // Added in 5.0
425 Name.starts_with("cvtq2mask.") || // Added in 7.0
426 Name == "cvtusi2sd" || // Added in 7.0
427 Name.starts_with("cvtw2mask.") || // Added in 7.0
428 Name == "kand.w" || // Added in 7.0
429 Name == "kandn.w" || // Added in 7.0
430 Name == "knot.w" || // Added in 7.0
431 Name == "kor.w" || // Added in 7.0
432 Name == "kortestc.w" || // Added in 7.0
433 Name == "kortestz.w" || // Added in 7.0
434 Name.starts_with("kunpck") || // added in 6.0
435 Name == "kxnor.w" || // Added in 7.0
436 Name == "kxor.w" || // Added in 7.0
437 Name.starts_with("padds.") || // Added in 8.0
438 Name.starts_with("pbroadcast") || // Added in 3.9
439 Name.starts_with("prol") || // Added in 8.0
440 Name.starts_with("pror") || // Added in 8.0
441 Name.starts_with("psll.dq") || // Added in 3.9
442 Name.starts_with("psrl.dq") || // Added in 3.9
443 Name.starts_with("psubs.") || // Added in 8.0
444 Name.starts_with("ptestm") || // Added in 6.0
445 Name.starts_with("ptestnm") || // Added in 6.0
446 Name.starts_with("storent.") || // Added in 3.9
447 Name.starts_with("vbroadcast.s") || // Added in 7.0
448 Name.starts_with("vpshld.") || // Added in 8.0
449 Name.starts_with("vpshrd.")); // Added in 8.0
450 }
451
452 if (Name.consume_front("fma."))
453 return (Name.starts_with("vfmadd.") || // Added in 7.0
454 Name.starts_with("vfmsub.") || // Added in 7.0
455 Name.starts_with("vfmsubadd.") || // Added in 7.0
456 Name.starts_with("vfnmadd.") || // Added in 7.0
457 Name.starts_with("vfnmsub.")); // Added in 7.0
458
459 if (Name.consume_front("fma4."))
460 return Name.starts_with("vfmadd.s"); // Added in 7.0
461
462 if (Name.consume_front("sse."))
463 return (Name == "add.ss" || // Added in 4.0
464 Name == "cvtsi2ss" || // Added in 7.0
465 Name == "cvtsi642ss" || // Added in 7.0
466 Name == "div.ss" || // Added in 4.0
467 Name == "mul.ss" || // Added in 4.0
468 Name.starts_with("sqrt.p") || // Added in 7.0
469 Name == "sqrt.ss" || // Added in 7.0
470 Name.starts_with("storeu.") || // Added in 3.9
471 Name == "sub.ss"); // Added in 4.0
472
473 if (Name.consume_front("sse2."))
474 return (Name == "add.sd" || // Added in 4.0
475 Name == "cvtdq2pd" || // Added in 3.9
476 Name == "cvtdq2ps" || // Added in 7.0
477 Name == "cvtps2pd" || // Added in 3.9
478 Name == "cvtsi2sd" || // Added in 7.0
479 Name == "cvtsi642sd" || // Added in 7.0
480 Name == "cvtss2sd" || // Added in 7.0
481 Name == "div.sd" || // Added in 4.0
482 Name == "mul.sd" || // Added in 4.0
483 Name.starts_with("padds.") || // Added in 8.0
484 Name.starts_with("paddus.") || // Added in 8.0
485 Name.starts_with("pcmpeq.") || // Added in 3.1
486 Name.starts_with("pcmpgt.") || // Added in 3.1
487 Name == "pmaxs.w" || // Added in 3.9
488 Name == "pmaxu.b" || // Added in 3.9
489 Name == "pmins.w" || // Added in 3.9
490 Name == "pminu.b" || // Added in 3.9
491 Name == "pmulu.dq" || // Added in 7.0
492 Name.starts_with("pshuf") || // Added in 3.9
493 Name.starts_with("psll.dq") || // Added in 3.7
494 Name.starts_with("psrl.dq") || // Added in 3.7
495 Name.starts_with("psubs.") || // Added in 8.0
496 Name.starts_with("psubus.") || // Added in 8.0
497 Name.starts_with("sqrt.p") || // Added in 7.0
498 Name == "sqrt.sd" || // Added in 7.0
499 Name == "storel.dq" || // Added in 3.9
500 Name.starts_with("storeu.") || // Added in 3.9
501 Name == "sub.sd"); // Added in 4.0
502
503 if (Name.consume_front("sse41."))
504 return (Name.starts_with("blendp") || // Added in 3.7
505 Name == "movntdqa" || // Added in 5.0
506 Name == "pblendw" || // Added in 3.7
507 Name == "pmaxsb" || // Added in 3.9
508 Name == "pmaxsd" || // Added in 3.9
509 Name == "pmaxud" || // Added in 3.9
510 Name == "pmaxuw" || // Added in 3.9
511 Name == "pminsb" || // Added in 3.9
512 Name == "pminsd" || // Added in 3.9
513 Name == "pminud" || // Added in 3.9
514 Name == "pminuw" || // Added in 3.9
515 Name.starts_with("pmovsx") || // Added in 3.8
516 Name.starts_with("pmovzx") || // Added in 3.9
517 Name == "pmuldq"); // Added in 7.0
518
519 if (Name.consume_front("sse42."))
520 return Name == "crc32.64.8"; // Added in 3.4
521
522 if (Name.consume_front("sse4a."))
523 return Name.starts_with("movnt."); // Added in 3.9
524
525 if (Name.consume_front("ssse3."))
526 return (Name == "pabs.b.128" || // Added in 6.0
527 Name == "pabs.d.128" || // Added in 6.0
528 Name == "pabs.w.128"); // Added in 6.0
529
530 if (Name.consume_front("xop."))
531 return (Name == "vpcmov" || // Added in 3.8
532 Name == "vpcmov.256" || // Added in 5.0
533 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
534 Name.starts_with("vprot")); // Added in 8.0
535
536 if (Name.consume_front("bmi."))
537 return (Name.starts_with("pdep.") || // Added in 23.0
538 Name.starts_with("pext.")); // Added in 23.0
539
540 return (Name == "addcarry.u32" || // Added in 8.0
541 Name == "addcarry.u64" || // Added in 8.0
542 Name == "addcarryx.u32" || // Added in 8.0
543 Name == "addcarryx.u64" || // Added in 8.0
544 Name == "subborrow.u32" || // Added in 8.0
545 Name == "subborrow.u64" || // Added in 8.0
546 Name.starts_with("vcvtph2ps.")); // Added in 11.0
547}
548
550 Function *&NewFn) {
551 // Only handle intrinsics that start with "x86.".
552 if (!Name.consume_front("x86."))
553 return false;
554
555 if (shouldUpgradeX86Intrinsic(F, Name)) {
556 NewFn = nullptr;
557 return true;
558 }
559
560 if (Name == "rdtscp") { // Added in 8.0
561 // If this intrinsic has 0 operands, it's the new version.
562 if (F->getFunctionType()->getNumParams() == 0)
563 return false;
564
565 rename(F);
566 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
567 Intrinsic::x86_rdtscp);
568 return true;
569 }
570
572
573 // SSE4.1 ptest functions may have an old signature.
574 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
576 .Case("c", Intrinsic::x86_sse41_ptestc)
577 .Case("z", Intrinsic::x86_sse41_ptestz)
578 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
581 return upgradePTESTIntrinsic(F, ID, NewFn);
582
583 return false;
584 }
585
586 // Several blend and other instructions with masks used the wrong number of
587 // bits.
588
589 // Added in 3.6
591 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
592 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
593 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
594 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
595 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
596 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
599 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
600
601 if (Name.consume_front("avx512.")) {
602 if (Name.consume_front("mask.cmp.")) {
603 // Added in 7.0
605 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
606 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
607 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
608 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
609 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
610 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
613 return upgradeX86MaskedFPCompare(F, ID, NewFn);
614 } else if (Name.starts_with("vpdpbusd.") ||
615 Name.starts_with("vpdpbusds.")) {
616 // Added in 21.1
618 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
619 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
620 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
621 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
622 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
623 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
626 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
627 } else if (Name.starts_with("vpdpwssd.") ||
628 Name.starts_with("vpdpwssds.")) {
629 // Added in 21.1
631 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
632 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
633 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
634 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
635 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
636 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
639 return upgradeX86MultiplyAddWords(F, ID, NewFn);
640 }
641 return false; // No other 'x86.avx512.*'.
642 }
643
644 if (Name.consume_front("avx2.")) {
645 if (Name.consume_front("vpdpb")) {
646 // Added in 21.1
648 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
649 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
650 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
651 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
652 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
653 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
654 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
655 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
656 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
657 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
658 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
659 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
662 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
663 } else if (Name.consume_front("vpdpw")) {
664 // Added in 21.1
666 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
667 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
668 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
669 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
670 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
671 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
672 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
673 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
674 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
675 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
676 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
677 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
680 return upgradeX86MultiplyAddWords(F, ID, NewFn);
681 }
682 return false; // No other 'x86.avx2.*'
683 }
684
685 if (Name.consume_front("avx10.")) {
686 if (Name.consume_front("vpdpb")) {
687 // Added in 21.1
689 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
690 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
691 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
692 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
693 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
694 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
697 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
698 } else if (Name.consume_front("vpdpw")) {
700 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
701 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
702 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
703 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
704 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
705 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
708 return upgradeX86MultiplyAddWords(F, ID, NewFn);
709 }
710 return false; // No other 'x86.avx10.*'
711 }
712
713 if (Name.consume_front("avx512bf16.")) {
714 // Added in 9.0
716 .Case("cvtne2ps2bf16.128",
717 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
718 .Case("cvtne2ps2bf16.256",
719 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
720 .Case("cvtne2ps2bf16.512",
721 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
722 .Case("mask.cvtneps2bf16.128",
723 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
724 .Case("cvtneps2bf16.256",
725 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
726 .Case("cvtneps2bf16.512",
727 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
730 return upgradeX86BF16Intrinsic(F, ID, NewFn);
731
732 // Added in 9.0
734 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
735 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
736 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
739 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
740 return false; // No other 'x86.avx512bf16.*'.
741 }
742
743 if (Name.consume_front("xop.")) {
745 if (Name.starts_with("vpermil2")) { // Added in 3.9
746 // Upgrade any XOP PERMIL2 index operand still using a float/double
747 // vector.
748 auto Idx = F->getFunctionType()->getParamType(2);
749 if (Idx->isFPOrFPVectorTy()) {
750 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
751 unsigned EltSize = Idx->getScalarSizeInBits();
752 if (EltSize == 64 && IdxSize == 128)
753 ID = Intrinsic::x86_xop_vpermil2pd;
754 else if (EltSize == 32 && IdxSize == 128)
755 ID = Intrinsic::x86_xop_vpermil2ps;
756 else if (EltSize == 64 && IdxSize == 256)
757 ID = Intrinsic::x86_xop_vpermil2pd_256;
758 else
759 ID = Intrinsic::x86_xop_vpermil2ps_256;
760 }
761 } else if (F->arg_size() == 2)
762 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
764 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
765 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
767
769 rename(F);
770 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
771 return true;
772 }
773 return false; // No other 'x86.xop.*'
774 }
775
776 if (Name == "seh.recoverfp") {
777 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
778 Intrinsic::eh_recoverfp);
779 return true;
780 }
781
782 return false;
783}
784
785// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
786// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
788 StringRef Name,
789 Function *&NewFn) {
790 if (Name.starts_with("rbit")) {
791 // '(arm|aarch64).rbit'.
793 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
794 return true;
795 }
796
797 if (Name == "thread.pointer") {
798 // '(arm|aarch64).thread.pointer'.
800 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
801 return true;
802 }
803
804 bool Neon = Name.consume_front("neon.");
805 if (Neon) {
806 // '(arm|aarch64).neon.*'.
807 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
808 // v16i8 respectively.
809 if (Name.consume_front("bfdot.")) {
810 // (arm|aarch64).neon.bfdot.*'.
813 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
814 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
815 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
818 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
819 assert((OperandWidth == 64 || OperandWidth == 128) &&
820 "Unexpected operand width");
821 LLVMContext &Ctx = F->getParent()->getContext();
822 std::array<Type *, 2> Tys{
823 {F->getReturnType(),
824 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
825 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
826 return true;
827 }
828 return false; // No other '(arm|aarch64).neon.bfdot.*'.
829 }
830
831 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
832 // anymore and accept v8bf16 instead of v16i8.
833 if (Name.consume_front("bfm")) {
834 // (arm|aarch64).neon.bfm*'.
835 if (Name.consume_back(".v4f32.v16i8")) {
836 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
839 .Case("mla",
840 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
841 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
842 .Case("lalb",
843 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
844 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
845 .Case("lalt",
846 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
847 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
850 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
851 return true;
852 }
853 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
854 }
855 return false; // No other '(arm|aarch64).neon.bfm*.
856 }
857 // Continue on to Aarch64 Neon or Arm Neon.
858 }
859 // Continue on to Arm or Aarch64.
860
861 if (IsArm) {
862 // 'arm.*'.
863 if (Neon) {
864 // 'arm.neon.*'.
866 .StartsWith("vclz.", Intrinsic::ctlz)
867 .StartsWith("vcnt.", Intrinsic::ctpop)
868 .StartsWith("vqadds.", Intrinsic::sadd_sat)
869 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
870 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
871 .StartsWith("vqsubu.", Intrinsic::usub_sat)
872 .StartsWith("vrinta.", Intrinsic::round)
873 .StartsWith("vrintn.", Intrinsic::roundeven)
874 .StartsWith("vrintm.", Intrinsic::floor)
875 .StartsWith("vrintp.", Intrinsic::ceil)
876 .StartsWith("vrintx.", Intrinsic::rint)
877 .StartsWith("vrintz.", Intrinsic::trunc)
880 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
881 F->arg_begin()->getType());
882 return true;
883 }
884
885 if (Name.consume_front("vst")) {
886 // 'arm.neon.vst*'.
887 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
889 if (vstRegex.match(Name, &Groups)) {
890 static const Intrinsic::ID StoreInts[] = {
891 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
892 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
893
894 static const Intrinsic::ID StoreLaneInts[] = {
895 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
896 Intrinsic::arm_neon_vst4lane};
897
898 auto fArgs = F->getFunctionType()->params();
899 Type *Tys[] = {fArgs[0], fArgs[1]};
900 if (Groups[1].size() == 1)
902 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
903 else
905 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
906 return true;
907 }
908 return false; // No other 'arm.neon.vst*'.
909 }
910
911 return false; // No other 'arm.neon.*'.
912 }
913
914 if (Name.consume_front("mve.")) {
915 // 'arm.mve.*'.
916 if (Name == "vctp64") {
917 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
918 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
919 // the function and deal with it below in UpgradeIntrinsicCall.
920 rename(F);
921 return true;
922 }
923 return false; // Not 'arm.mve.vctp64'.
924 }
925
926 if (Name.starts_with("vrintn.v")) {
928 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
929 return true;
930 }
931
932 // These too are changed to accept a v2i1 instead of the old v4i1.
933 if (Name.consume_back(".v4i1")) {
934 // 'arm.mve.*.v4i1'.
935 if (Name.consume_back(".predicated.v2i64.v4i32"))
936 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
937 return Name == "mull.int" || Name == "vqdmull";
938
939 if (Name.consume_back(".v2i64")) {
940 // 'arm.mve.*.v2i64.v4i1'
941 bool IsGather = Name.consume_front("vldr.gather.");
942 if (IsGather || Name.consume_front("vstr.scatter.")) {
943 if (Name.consume_front("base.")) {
944 // Optional 'wb.' prefix.
945 Name.consume_front("wb.");
946 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
947 // predicated.v2i64.v2i64.v4i1'.
948 return Name == "predicated.v2i64";
949 }
950
951 if (Name.consume_front("offset.predicated."))
952 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
953 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
954
955 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
956 return false;
957 }
958
959 return false; // No other 'arm.mve.*.v2i64.v4i1'.
960 }
961 return false; // No other 'arm.mve.*.v4i1'.
962 }
963 return false; // No other 'arm.mve.*'.
964 }
965
966 if (Name.consume_front("cde.vcx")) {
967 // 'arm.cde.vcx*'.
968 if (Name.consume_back(".predicated.v2i64.v4i1"))
969 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
970 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
971 Name == "3q" || Name == "3qa";
972
973 return false; // No other 'arm.cde.vcx*'.
974 }
975 } else {
976 // 'aarch64.*'.
977 if (Neon) {
978 // 'aarch64.neon.*'.
980 .StartsWith("frintn", Intrinsic::roundeven)
981 .StartsWith("rbit", Intrinsic::bitreverse)
984 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
985 F->arg_begin()->getType());
986 return true;
987 }
988
989 if (Name.starts_with("addp")) {
990 // 'aarch64.neon.addp*'.
991 if (F->arg_size() != 2)
992 return false; // Invalid IR.
993 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
994 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
996 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
997 return true;
998 }
999 }
1000
1001 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
1002 if (Name.starts_with("bfcvt")) {
1003 NewFn = nullptr;
1004 return true;
1005 }
1006
1007 // vcvtfp2hf and vcvthf2fp -> fpext and fptrunc
1008 if (Name == "vcvtfp2hf" || Name == "vcvthf2fp") {
1009 NewFn = nullptr;
1010 return true;
1011 }
1012
1013 return false; // No other 'aarch64.neon.*'.
1014 }
1015 if (Name.consume_front("sve.")) {
1016 // 'aarch64.sve.*'.
1017 if (Name.consume_front("bf")) {
1018 if (Name == "mmla") {
1019 Type *Tys[] = {F->getReturnType(),
1020 std::next(F->arg_begin())->getType()};
1022 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1023 return true;
1024 }
1025 if (Name.consume_back(".lane")) {
1026 // 'aarch64.sve.bf*.lane'.
1029 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1030 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1031 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1034 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1035 return true;
1036 }
1037 return false; // No other 'aarch64.sve.bf*.lane'.
1038 }
1039 return false; // No other 'aarch64.sve.bf*'.
1040 }
1041
1042 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1043 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1044 NewFn = nullptr;
1045 return true;
1046 }
1047
1048 if (Name.consume_front("addqv")) {
1049 // 'aarch64.sve.addqv'.
1050 if (!F->getReturnType()->isFPOrFPVectorTy())
1051 return false;
1052
1053 auto Args = F->getFunctionType()->params();
1054 Type *Tys[] = {F->getReturnType(), Args[1]};
1056 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1057 return true;
1058 }
1059
1060 if (Name.consume_front("ld")) {
1061 // 'aarch64.sve.ld*'.
1062 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1063 if (LdRegex.match(Name)) {
1064 Type *ScalarTy =
1065 cast<VectorType>(F->getReturnType())->getElementType();
1066 ElementCount EC =
1067 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1068 assert(F->arg_size() == 2 &&
1069 "Expected 2 arguments for ld* intrinsic.");
1070 Type *PtrTy = F->getArg(1)->getType();
1071 Type *Ty = VectorType::get(ScalarTy, EC);
1072 static const Intrinsic::ID LoadIDs[] = {
1073 Intrinsic::aarch64_sve_ld2_sret,
1074 Intrinsic::aarch64_sve_ld3_sret,
1075 Intrinsic::aarch64_sve_ld4_sret,
1076 };
1078 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.ld*'.
1082 }
1083
1084 if (Name.consume_front("tuple.")) {
1085 // 'aarch64.sve.tuple.*'.
1086 if (Name.starts_with("get")) {
1087 // 'aarch64.sve.tuple.get*'.
1088 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1090 F->getParent(), Intrinsic::vector_extract, Tys);
1091 return true;
1092 }
1093
1094 if (Name.starts_with("set")) {
1095 // 'aarch64.sve.tuple.set*'.
1096 auto Args = F->getFunctionType()->params();
1097 Type *Tys[] = {Args[0], Args[2], Args[1]};
1099 F->getParent(), Intrinsic::vector_insert, Tys);
1100 return true;
1101 }
1102
1103 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1104 if (CreateTupleRegex.match(Name)) {
1105 // 'aarch64.sve.tuple.create*'.
1106 auto Args = F->getFunctionType()->params();
1107 Type *Tys[] = {F->getReturnType(), Args[1]};
1109 F->getParent(), Intrinsic::vector_insert, Tys);
1110 return true;
1111 }
1112 return false; // No other 'aarch64.sve.tuple.*'.
1113 }
1114
1115 if (Name.starts_with("rev.nxv")) {
1116 // 'aarch64.sve.rev.<Ty>'
1118 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1119 return true;
1120 }
1121
1122 return false; // No other 'aarch64.sve.*'.
1123 }
1124 if (Name.consume_front("sme.")) {
1125 // 'aarch64.sme.*'.
1126 if (Name.consume_front("ftmopa.")) {
1127 // The FP8 FTMOPA intrinsics were split out from the non-FP8 FTMOPA
1128 // intrinsics to model their FPMR dependency.
1131 .Case("za16.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za16)
1132 .Case("za32.nxv16i8", Intrinsic::aarch64_sme_fp8_ftmopa_za32)
1135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1136 return true;
1137 }
1138 return false; // No other 'aarch64.sme.ftmopa.*'.
1139 }
1140
1141 return false; // No other 'aarch64.sme.*'.
1142 }
1143 }
1144 return false; // No other 'arm.*', 'aarch64.*'.
1145}
1146
1148 StringRef Name) {
1149 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1152 .Case("im2col.3d",
1153 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1154 .Case("im2col.4d",
1155 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1156 .Case("im2col.5d",
1157 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1158 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1159 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1160 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1161 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1162 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1164
1166 return ID;
1167
1168 // These intrinsics may need upgrade for two reasons:
1169 // (1) When the address-space of the first argument is shared[AS=3]
1170 // (and we upgrade it to use shared_cluster address-space[AS=7])
1171 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1173 return ID;
1174
1175 // (2) When there are only two boolean flag arguments at the end:
1176 //
1177 // The last three parameters of the older version of these
1178 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1179 //
1180 // The newer version reads as:
1181 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1182 //
1183 // So, when the type of the [N-3]rd argument is "not i1", then
1184 // it is the older version and we need to upgrade.
1185 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1186 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1187 if (!ArgType->isIntegerTy(1))
1188 return ID;
1189 }
1190
1192}
1193
1195 StringRef Name) {
1196 if (Name.consume_front("mapa.shared.cluster"))
1197 if (F->getReturnType()->getPointerAddressSpace() ==
1199 return Intrinsic::nvvm_mapa_shared_cluster;
1200
1201 if (Name.consume_front("cp.async.bulk.")) {
1204 .Case("global.to.shared.cluster",
1205 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1206 .Case("shared.cta.to.cluster",
1207 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1209
1211 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1213 return ID;
1214 }
1215
1217}
1218
1220 if (Name.consume_front("fma.rn."))
1221 return StringSwitch<Intrinsic::ID>(Name)
1222 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1223 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1224 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1225 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1227
1228 if (Name.consume_front("fmax."))
1229 return StringSwitch<Intrinsic::ID>(Name)
1230 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1231 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1232 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1233 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1234 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1235 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1236 .Case("ftz.nan.xorsign.abs.bf16",
1237 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1238 .Case("ftz.nan.xorsign.abs.bf16x2",
1239 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1240 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1241 .Case("ftz.xorsign.abs.bf16x2",
1242 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1243 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1244 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1245 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1246 .Case("nan.xorsign.abs.bf16x2",
1247 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1248 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1249 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1251
1252 if (Name.consume_front("fmin."))
1253 return StringSwitch<Intrinsic::ID>(Name)
1254 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1255 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1256 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1257 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1258 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1259 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1260 .Case("ftz.nan.xorsign.abs.bf16",
1261 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1262 .Case("ftz.nan.xorsign.abs.bf16x2",
1263 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1264 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1265 .Case("ftz.xorsign.abs.bf16x2",
1266 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1267 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1268 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1269 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1270 .Case("nan.xorsign.abs.bf16x2",
1271 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1272 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1273 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1275
1276 if (Name.consume_front("neg."))
1277 return StringSwitch<Intrinsic::ID>(Name)
1278 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1279 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1281
1283}
1284
1286 return Name.consume_front("local") || Name.consume_front("shared") ||
1287 Name.consume_front("global") || Name.consume_front("constant") ||
1288 Name.consume_front("param");
1289}
1290
1292 const FunctionType *FuncTy) {
1293 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1294 if (Name.starts_with("to.fp16")) {
1295 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1296 HalfTy) &&
1297 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1298 FuncTy->getReturnType());
1299 }
1300
1301 if (Name.starts_with("from.fp16")) {
1302 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1303 HalfTy) &&
1304 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1305 FuncTy->getReturnType());
1306 }
1307
1308 return false;
1309}
1310
1312 bool CanUpgradeDebugIntrinsicsToRecords) {
1313 assert(F && "Illegal to upgrade a non-existent Function.");
1314
1315 StringRef Name = F->getName();
1316
1317 // Quickly eliminate it, if it's not a candidate.
1318 if (!Name.consume_front("llvm.") || Name.empty())
1319 return false;
1320
1321 switch (Name[0]) {
1322 default: break;
1323 case 'a': {
1324 bool IsArm = Name.consume_front("arm.");
1325 if (IsArm || Name.consume_front("aarch64.")) {
1326 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1327 return true;
1328 break;
1329 }
1330
1331 if (Name.consume_front("amdgcn.")) {
1332 if (Name == "alignbit") {
1333 // Target specific intrinsic became redundant
1335 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1336 return true;
1337 }
1338
1339 if (Name.consume_front("atomic.")) {
1340 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1341 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1342 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1343 // and usub_sat so there's no new declaration.
1344 NewFn = nullptr;
1345 return true;
1346 }
1347 break; // No other 'amdgcn.atomic.*'
1348 }
1349
1350 switch (F->getIntrinsicID()) {
1351 default:
1352 break;
1353 // Legacy wmma iu intrinsics without the optional clamp operand.
1354 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1355 if (F->arg_size() == 7) {
1356 NewFn = nullptr;
1357 return true;
1358 }
1359 break;
1360 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1361 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1362 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1363 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1364 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1365 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1366 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1367 if (F->arg_size() == 8) {
1368 NewFn = nullptr;
1369 return true;
1370 }
1371 break;
1372 }
1373
1374 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1375 Name.consume_front("flat.atomic.")) {
1376 if (Name.starts_with("fadd") ||
1377 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1378 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1379 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1380 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1381 // declaration.
1382 NewFn = nullptr;
1383 return true;
1384 }
1385 }
1386
1387 if (Name.starts_with("ldexp.")) {
1388 // Target specific intrinsic became redundant
1390 F->getParent(), Intrinsic::ldexp,
1391 {F->getReturnType(), F->getArg(1)->getType()});
1392 return true;
1393 }
1394 break; // No other 'amdgcn.*'
1395 }
1396
1397 break;
1398 }
1399 case 'c': {
1400 if (F->arg_size() == 1) {
1401 if (Name.consume_front("convert.")) {
1402 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1403 NewFn = nullptr;
1404 return true;
1405 }
1406 }
1407
1409 .StartsWith("ctlz.", Intrinsic::ctlz)
1410 .StartsWith("cttz.", Intrinsic::cttz)
1413 rename(F);
1414 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1415 F->arg_begin()->getType());
1416 return true;
1417 }
1418 }
1419
1420 if (F->arg_size() == 2 && Name == "coro.end") {
1421 rename(F);
1422 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1423 Intrinsic::coro_end);
1424 return true;
1425 }
1426
1427 break;
1428 }
1429 case 'd':
1430 if (Name.consume_front("dbg.")) {
1431 // Mark debug intrinsics for upgrade to new debug format.
1432 if (CanUpgradeDebugIntrinsicsToRecords) {
1433 if (Name == "addr" || Name == "value" || Name == "assign" ||
1434 Name == "declare" || Name == "label") {
1435 // There's no function to replace these with.
1436 NewFn = nullptr;
1437 // But we do want these to get upgraded.
1438 return true;
1439 }
1440 }
1441 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1442 // converted to DbgVariableRecords later.
1443 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1444 rename(F);
1445 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1446 Intrinsic::dbg_value);
1447 return true;
1448 }
1449 break; // No other 'dbg.*'.
1450 }
1451 break;
1452 case 'e':
1453 if (Name.consume_front("experimental.vector.")) {
1456 // Skip over extract.last.active, otherwise it will be 'upgraded'
1457 // to a regular vector extract which is a different operation.
1458 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1459 .StartsWith("extract.", Intrinsic::vector_extract)
1460 .StartsWith("insert.", Intrinsic::vector_insert)
1461 .StartsWith("reverse.", Intrinsic::vector_reverse)
1462 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1463 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1464 .StartsWith("partial.reduce.add",
1465 Intrinsic::vector_partial_reduce_add)
1468 const auto *FT = F->getFunctionType();
1470 if (ID == Intrinsic::vector_extract ||
1471 ID == Intrinsic::vector_interleave2)
1472 // Extracting overloads the return type.
1473 Tys.push_back(FT->getReturnType());
1474 if (ID != Intrinsic::vector_interleave2)
1475 Tys.push_back(FT->getParamType(0));
1476 if (ID == Intrinsic::vector_insert ||
1477 ID == Intrinsic::vector_partial_reduce_add)
1478 // Inserting overloads the inserted type.
1479 Tys.push_back(FT->getParamType(1));
1480 rename(F);
1481 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1482 return true;
1483 }
1484
1485 if (Name.consume_front("reduce.")) {
1487 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1488 if (R.match(Name, &Groups))
1490 .Case("add", Intrinsic::vector_reduce_add)
1491 .Case("mul", Intrinsic::vector_reduce_mul)
1492 .Case("and", Intrinsic::vector_reduce_and)
1493 .Case("or", Intrinsic::vector_reduce_or)
1494 .Case("xor", Intrinsic::vector_reduce_xor)
1495 .Case("smax", Intrinsic::vector_reduce_smax)
1496 .Case("smin", Intrinsic::vector_reduce_smin)
1497 .Case("umax", Intrinsic::vector_reduce_umax)
1498 .Case("umin", Intrinsic::vector_reduce_umin)
1499 .Case("fmax", Intrinsic::vector_reduce_fmax)
1500 .Case("fmin", Intrinsic::vector_reduce_fmin)
1502
1503 bool V2 = false;
1505 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1506 Groups.clear();
1507 V2 = true;
1508 if (R2.match(Name, &Groups))
1510 .Case("fadd", Intrinsic::vector_reduce_fadd)
1511 .Case("fmul", Intrinsic::vector_reduce_fmul)
1513 }
1515 rename(F);
1516 auto Args = F->getFunctionType()->params();
1517 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1518 {Args[V2 ? 1 : 0]});
1519 return true;
1520 }
1521 break; // No other 'expermental.vector.reduce.*'.
1522 }
1523
1524 if (Name.consume_front("splice"))
1525 return true;
1526 break; // No other 'experimental.vector.*'.
1527 }
1528 if (Name.consume_front("experimental.stepvector.")) {
1529 Intrinsic::ID ID = Intrinsic::stepvector;
1530 rename(F);
1532 F->getParent(), ID, F->getFunctionType()->getReturnType());
1533 return true;
1534 }
1535 break; // No other 'e*'.
1536 case 'f':
1537 if (Name.starts_with("flt.rounds")) {
1538 rename(F);
1539 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1540 Intrinsic::get_rounding);
1541 return true;
1542 }
1543 break;
1544 case 'i':
1545 if (Name.starts_with("invariant.group.barrier")) {
1546 // Rename invariant.group.barrier to launder.invariant.group
1547 auto Args = F->getFunctionType()->params();
1548 Type* ObjectPtr[1] = {Args[0]};
1549 rename(F);
1551 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1552 return true;
1553 }
1554 break;
1555 case 'l': {
1556 bool IsLifetimeStart = Name.consume_front("lifetime.start");
1557 bool IsLifetimeEnd = !IsLifetimeStart && Name.consume_front("lifetime.end");
1558 if (IsLifetimeStart || IsLifetimeEnd) {
1559 if (F->arg_size() == 2) {
1560 Intrinsic::ID IID = IsLifetimeStart ? Intrinsic::lifetime_start
1561 : Intrinsic::lifetime_end;
1562 rename(F);
1563 // Old 2 argument form of these intrinsics have [Size, Ptr] as
1564 // arguments. Use the Ptr argument to create new declaration.
1565 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1566 F->getArg(1)->getType());
1567 return true;
1568 } else if (F->arg_size() == 1 && Name == ".i64") {
1569 // Matches @llvm.lifetime.{start/end}.i64 which used to be created by
1570 // Autoupgrade prior to
1571 // https://github.com/llvm/llvm-project/pull/204601. This is an invalid
1572 // intrinsic with no expected calls. To allow auto-upgrade process to
1573 // delete such invalid intrinsic declaration, set NewFn = nullptr
1574 // and return true here. If there are actual calls to this intrinsic
1575 // (which is not expected), they will be deleted in
1576 // UpgradeIntrinsicCall.
1577 NewFn = nullptr;
1578 return true;
1579 }
1580 }
1581 break;
1582 }
1583 case 'm': {
1584 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1585 // alignment parameter to embedding the alignment as an attribute of
1586 // the pointer args.
1587 if (unsigned ID = StringSwitch<unsigned>(Name)
1588 .StartsWith("memcpy.", Intrinsic::memcpy)
1589 .StartsWith("memmove.", Intrinsic::memmove)
1590 .Default(0)) {
1591 if (F->arg_size() == 5) {
1592 rename(F);
1593 // Get the types of dest, src, and len
1594 ArrayRef<Type *> ParamTypes =
1595 F->getFunctionType()->params().slice(0, 3);
1596 NewFn =
1597 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1598 return true;
1599 }
1600 }
1601 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1602 rename(F);
1603 // Get the types of dest, and len
1604 const auto *FT = F->getFunctionType();
1605 Type *ParamTypes[2] = {
1606 FT->getParamType(0), // Dest
1607 FT->getParamType(2) // len
1608 };
1609 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1610 Intrinsic::memset, ParamTypes);
1611 return true;
1612 }
1613
1614 unsigned MaskedID =
1616 .StartsWith("masked.load", Intrinsic::masked_load)
1617 .StartsWith("masked.gather", Intrinsic::masked_gather)
1618 .StartsWith("masked.store", Intrinsic::masked_store)
1619 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1620 .Default(0);
1621 if (MaskedID && F->arg_size() == 4) {
1622 rename(F);
1623 if (MaskedID == Intrinsic::masked_load ||
1624 MaskedID == Intrinsic::masked_gather) {
1626 F->getParent(), MaskedID,
1627 {F->getReturnType(), F->getArg(0)->getType()});
1628 return true;
1629 }
1631 F->getParent(), MaskedID,
1632 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1633 return true;
1634 }
1635 break;
1636 }
1637 case 'n': {
1638 if (Name.consume_front("nvvm.")) {
1639 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1640 if (F->arg_size() == 1) {
1641 Intrinsic::ID IID =
1643 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1644 .Case("clz.i", Intrinsic::ctlz)
1645 .Case("popc.i", Intrinsic::ctpop)
1647 if (IID != Intrinsic::not_intrinsic) {
1648 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1649 {F->getReturnType()});
1650 return true;
1651 }
1652 } else if (F->arg_size() == 2) {
1653 Intrinsic::ID IID =
1655 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1656 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1657 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1658 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1660 if (IID != Intrinsic::not_intrinsic) {
1661 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1662 {F->getReturnType()});
1663 return true;
1664 }
1665 }
1666
1667 // Check for nvvm intrinsics that need a return type adjustment.
1668 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1670 if (IID != Intrinsic::not_intrinsic) {
1671 NewFn = nullptr;
1672 return true;
1673 }
1674 }
1675
1676 // Upgrade Distributed Shared Memory Intrinsics
1678 if (IID != Intrinsic::not_intrinsic) {
1679 rename(F);
1680 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1681 return true;
1682 }
1683
1684 // Upgrade TMA copy G2S Intrinsics
1686 if (IID != Intrinsic::not_intrinsic) {
1687 rename(F);
1688 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1689 return true;
1690 }
1691
1692 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1693 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1694 //
1695 // TODO: We could add lohi.i2d.
1696 bool Expand = false;
1697 if (Name.consume_front("abs."))
1698 // nvvm.abs.{i,ii}
1699 Expand =
1700 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1701 else if (Name.consume_front("fabs."))
1702 // nvvm.fabs.{f,ftz.f,d}
1703 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1704 else if (Name.consume_front("ex2.approx."))
1705 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1706 Expand =
1707 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1708 else if (Name.consume_front("atomic.load."))
1709 // nvvm.atomic.load.add.{f32,f64}.p
1710 // nvvm.atomic.load.{inc,dec}.32.p
1711 Expand = StringSwitch<bool>(Name)
1712 .StartsWith("add.f32.p", true)
1713 .StartsWith("add.f64.p", true)
1714 .StartsWith("inc.32.p", true)
1715 .StartsWith("dec.32.p", true)
1716 .Default(false);
1717 else if (Name.consume_front("atomic."))
1718 // nvvm.atomic.{add,exch,max,min,inc,dec,and,or,xor}.gen.{i,f}.{cta,sys}
1719 // nvvm.atomic.cas.gen.i.{cta,sys}
1720 Expand = StringSwitch<bool>(Name)
1721 .StartsWith("add.gen.", true)
1722 .StartsWith("exch.gen.", true)
1723 .StartsWith("max.gen.", true)
1724 .StartsWith("min.gen.", true)
1725 .StartsWith("inc.gen.", true)
1726 .StartsWith("dec.gen.", true)
1727 .StartsWith("and.gen.", true)
1728 .StartsWith("or.gen.", true)
1729 .StartsWith("xor.gen.", true)
1730 .StartsWith("cas.gen.", true)
1731 .Default(false);
1732 else if (Name.consume_front("bitcast."))
1733 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1734 Expand =
1735 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1736 else if (Name.consume_front("rotate."))
1737 // nvvm.rotate.{b32,b64,right.b64}
1738 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1739 else if (Name.consume_front("ptr.gen.to."))
1740 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1741 Expand = consumeNVVMPtrAddrSpace(Name);
1742 else if (Name.consume_front("ptr."))
1743 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1744 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1745 else if (Name.consume_front("ldg.global."))
1746 // nvvm.ldg.global.{i,p,f}
1747 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1748 Name.starts_with("p."));
1749 else
1750 Expand = StringSwitch<bool>(Name)
1751 .Case("barrier0", true)
1752 .Case("barrier.n", true)
1753 .Case("barrier.sync.cnt", true)
1754 .Case("barrier.sync", true)
1755 .Case("barrier", true)
1756 .Case("bar.sync", true)
1757 .Case("barrier0.popc", true)
1758 .Case("barrier0.and", true)
1759 .Case("barrier0.or", true)
1760 .Case("clz.ll", true)
1761 .Case("popc.ll", true)
1762 .Case("h2f", true)
1763 .Case("swap.lo.hi.b64", true)
1764 .Case("tanh.approx.f32", true)
1765 .Default(false);
1766
1767 if (Expand) {
1768 NewFn = nullptr;
1769 return true;
1770 }
1771 break; // No other 'nvvm.*'.
1772 }
1773 break;
1774 }
1775 case 'o':
1776 if (Name.starts_with("objectsize.")) {
1777 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1778 if (F->arg_size() == 2 || F->arg_size() == 3) {
1779 rename(F);
1780 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1781 Intrinsic::objectsize, Tys);
1782 return true;
1783 }
1784 }
1785 break;
1786
1787 case 'p':
1788 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1789 rename(F);
1791 F->getParent(), Intrinsic::ptr_annotation,
1792 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1793 return true;
1794 }
1795 break;
1796
1797 case 'r': {
1798 if (Name.consume_front("riscv.")) {
1801 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1802 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1803 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1804 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1807 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1808 rename(F);
1809 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1810 return true;
1811 }
1812 break; // No other applicable upgrades.
1813 }
1814
1816 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1817 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1820 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1821 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1822 rename(F);
1823 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1824 return true;
1825 }
1826 break; // No other applicable upgrades.
1827 }
1828
1830 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1831 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1832 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1833 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1834 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1835 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1838 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1839 rename(F);
1840 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1841 return true;
1842 }
1843 break; // No other applicable upgrades.
1844 }
1845
1846 // Replace llvm.riscv.clmul with llvm.clmul.
1847 if (Name == "clmul.i32" || Name == "clmul.i64") {
1849 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1850 return true;
1851 }
1852
1853 break; // No other 'riscv.*' intrinsics
1854 }
1855 } break;
1856
1857 case 's':
1858 if (Name == "stackprotectorcheck") {
1859 NewFn = nullptr;
1860 return true;
1861 }
1862 break;
1863
1864 case 't':
1865 if (Name == "thread.pointer") {
1867 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1868 return true;
1869 }
1870 break;
1871
1872 case 'v': {
1873 if (Name == "var.annotation" && F->arg_size() == 4) {
1874 rename(F);
1876 F->getParent(), Intrinsic::var_annotation,
1877 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1878 return true;
1879 }
1880 if (Name.consume_front("vector.splice")) {
1881 if (Name.starts_with(".left") || Name.starts_with(".right"))
1882 break;
1883 return true;
1884 }
1885 break;
1886 }
1887
1888 case 'w':
1889 if (Name.consume_front("wasm.")) {
1892 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1893 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1894 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1897 rename(F);
1898 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1899 F->getReturnType());
1900 return true;
1901 }
1902
1903 if (Name.consume_front("dot.i8x16.i7x16.")) {
1905 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1906 .Case("add.signed",
1907 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1910 rename(F);
1911 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1912 return true;
1913 }
1914 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1915 }
1916 break; // No other 'wasm.*'.
1917 }
1918 break;
1919
1920 case 'x':
1921 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1922 return true;
1923 }
1924
1925 auto *ST = dyn_cast<StructType>(F->getReturnType());
1926 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1927 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1928 // Replace return type with literal non-packed struct. Only do this for
1929 // intrinsics declared to return a struct, not for intrinsics with
1930 // overloaded return type, in which case the exact struct type will be
1931 // mangled into the name.
1932 if (Intrinsic::hasStructReturnType(F->getIntrinsicID())) {
1933 FunctionType *FT = F->getFunctionType();
1934 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1935 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1936 std::string Name = F->getName().str();
1937 rename(F);
1938 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1939 Name, F->getParent());
1940
1941 // The new function may also need remangling.
1942 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1943 NewFn = *Result;
1944 return true;
1945 }
1946 }
1947
1948 // Remangle our intrinsic since we upgrade the mangling
1950 if (Result != std::nullopt) {
1951 NewFn = *Result;
1952 return true;
1953 }
1954
1955 // This may not belong here. This function is effectively being overloaded
1956 // to both detect an intrinsic which needs upgrading, and to provide the
1957 // upgraded form of the intrinsic. We should perhaps have two separate
1958 // functions for this.
1959 return false;
1960}
1961
1963 bool CanUpgradeDebugIntrinsicsToRecords) {
1964 NewFn = nullptr;
1965 bool Upgraded =
1966 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1967
1968 // Upgrade intrinsic attributes. This does not change the function.
1969 if (NewFn)
1970 F = NewFn;
1971 if (Intrinsic::ID id = F->getIntrinsicID()) {
1972 // Only do this if the intrinsic signature is valid.
1973 SmallVector<Type *> OverloadTys;
1974 if (Intrinsic::isSignatureValid(id, F->getFunctionType(), OverloadTys))
1975 F->setAttributes(
1976 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1977 }
1978 return Upgraded;
1979}
1980
1982 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1983 GV->getName() == "llvm.global_dtors")) ||
1984 !GV->hasInitializer())
1985 return nullptr;
1987 if (!ATy)
1988 return nullptr;
1990 if (!STy || STy->getNumElements() != 2)
1991 return nullptr;
1992
1993 LLVMContext &C = GV->getContext();
1994 IRBuilder<> IRB(C);
1995 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1996 IRB.getPtrTy());
1997 Constant *Init = GV->getInitializer();
1998 unsigned N = Init->getNumOperands();
1999 std::vector<Constant *> NewCtors(N);
2000 for (unsigned i = 0; i != N; ++i) {
2001 auto Ctor = cast<Constant>(Init->getOperand(i));
2002 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
2003 Ctor->getAggregateElement(1),
2005 }
2006 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
2007
2008 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
2009 NewInit, GV->getName());
2010}
2011
2012// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
2013// to byte shuffles.
2015 unsigned Shift) {
2016 auto *ResultTy = cast<FixedVectorType>(Op->getType());
2017 unsigned NumElts = ResultTy->getNumElements() * 8;
2018
2019 // Bitcast from a 64-bit element type to a byte element type.
2020 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
2021 Op = Builder.CreateBitCast(Op, VecTy, "cast");
2022
2023 // We'll be shuffling in zeroes.
2024 Value *Res = Constant::getNullValue(VecTy);
2025
2026 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2027 // we'll just return the zero vector.
2028 if (Shift < 16) {
2029 int Idxs[64];
2030 // 256/512-bit version is split into 2/4 16-byte lanes.
2031 for (unsigned l = 0; l != NumElts; l += 16)
2032 for (unsigned i = 0; i != 16; ++i) {
2033 unsigned Idx = NumElts + i - Shift;
2034 if (Idx < NumElts)
2035 Idx -= NumElts - 16; // end of lane, switch operand.
2036 Idxs[l + i] = Idx + l;
2037 }
2038
2039 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
2040 }
2041
2042 // Bitcast back to a 64-bit element type.
2043 return Builder.CreateBitCast(Res, ResultTy, "cast");
2044}
2045
2046// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
2047// to byte shuffles.
2049 unsigned Shift) {
2050 auto *ResultTy = cast<FixedVectorType>(Op->getType());
2051 unsigned NumElts = ResultTy->getNumElements() * 8;
2052
2053 // Bitcast from a 64-bit element type to a byte element type.
2054 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
2055 Op = Builder.CreateBitCast(Op, VecTy, "cast");
2056
2057 // We'll be shuffling in zeroes.
2058 Value *Res = Constant::getNullValue(VecTy);
2059
2060 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2061 // we'll just return the zero vector.
2062 if (Shift < 16) {
2063 int Idxs[64];
2064 // 256/512-bit version is split into 2/4 16-byte lanes.
2065 for (unsigned l = 0; l != NumElts; l += 16)
2066 for (unsigned i = 0; i != 16; ++i) {
2067 unsigned Idx = i + Shift;
2068 if (Idx >= 16)
2069 Idx += NumElts - 16; // end of lane, switch operand.
2070 Idxs[l + i] = Idx + l;
2071 }
2072
2073 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2074 }
2075
2076 // Bitcast back to a 64-bit element type.
2077 return Builder.CreateBitCast(Res, ResultTy, "cast");
2078}
2079
2080static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2081 unsigned NumElts) {
2082 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2084 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2085 Mask = Builder.CreateBitCast(Mask, MaskTy);
2086
2087 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2088 // i8 and we need to extract down to the right number of elements.
2089 if (NumElts <= 4) {
2090 int Indices[4];
2091 for (unsigned i = 0; i != NumElts; ++i)
2092 Indices[i] = i;
2093 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2094 "extract");
2095 }
2096
2097 return Mask;
2098}
2099
2100static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2101 Value *Op1) {
2102 // If the mask is all ones just emit the first operation.
2103 if (const auto *C = dyn_cast<Constant>(Mask))
2104 if (C->isAllOnesValue())
2105 return Op0;
2106
2107 Mask = getX86MaskVec(Builder, Mask,
2108 cast<FixedVectorType>(Op0->getType())->getNumElements());
2109 return Builder.CreateSelect(Mask, Op0, Op1);
2110}
2111
2112static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2113 Value *Op1) {
2114 // If the mask is all ones just emit the first operation.
2115 if (const auto *C = dyn_cast<Constant>(Mask))
2116 if (C->isAllOnesValue())
2117 return Op0;
2118
2119 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2120 Mask->getType()->getIntegerBitWidth());
2121 Mask = Builder.CreateBitCast(Mask, MaskTy);
2122 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2123 return Builder.CreateSelect(Mask, Op0, Op1);
2124}
2125
2126// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2127// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2128// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2130 Value *Op1, Value *Shift,
2131 Value *Passthru, Value *Mask,
2132 bool IsVALIGN) {
2133 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2134
2135 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2136 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2137 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2138 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2139
2140 // Mask the immediate for VALIGN.
2141 if (IsVALIGN)
2142 ShiftVal &= (NumElts - 1);
2143
2144 // If palignr is shifting the pair of vectors more than the size of two
2145 // lanes, emit zero.
2146 if (ShiftVal >= 32)
2148
2149 // If palignr is shifting the pair of input vectors more than one lane,
2150 // but less than two lanes, convert to shifting in zeroes.
2151 if (ShiftVal > 16) {
2152 ShiftVal -= 16;
2153 Op1 = Op0;
2155 }
2156
2157 int Indices[64];
2158 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2159 for (unsigned l = 0; l < NumElts; l += 16) {
2160 for (unsigned i = 0; i != 16; ++i) {
2161 unsigned Idx = ShiftVal + i;
2162 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2163 Idx += NumElts - 16; // End of lane, switch operand.
2164 Indices[l + i] = Idx + l;
2165 }
2166 }
2167
2168 Value *Align = Builder.CreateShuffleVector(
2169 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2170
2171 return emitX86Select(Builder, Mask, Align, Passthru);
2172}
2173
2175 bool ZeroMask, bool IndexForm) {
2176 Type *Ty = CI.getType();
2177 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2178 unsigned EltWidth = Ty->getScalarSizeInBits();
2179 bool IsFloat = Ty->isFPOrFPVectorTy();
2180 Intrinsic::ID IID;
2181 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2182 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2183 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2184 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2185 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2186 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2187 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2188 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2189 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2190 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2191 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2192 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2193 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2194 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2195 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2196 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2197 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2198 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2199 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2200 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2201 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2202 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2203 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2204 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2205 else if (VecWidth == 128 && EltWidth == 16)
2206 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2207 else if (VecWidth == 256 && EltWidth == 16)
2208 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2209 else if (VecWidth == 512 && EltWidth == 16)
2210 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2211 else if (VecWidth == 128 && EltWidth == 8)
2212 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2213 else if (VecWidth == 256 && EltWidth == 8)
2214 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2215 else if (VecWidth == 512 && EltWidth == 8)
2216 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2217 else
2218 llvm_unreachable("Unexpected intrinsic");
2219
2220 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2221 CI.getArgOperand(2) };
2222
2223 // If this isn't index form we need to swap operand 0 and 1.
2224 if (!IndexForm)
2225 std::swap(Args[0], Args[1]);
2226
2227 Value *V = Builder.CreateIntrinsic(IID, Args);
2228 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2229 : Builder.CreateBitCast(CI.getArgOperand(1),
2230 Ty);
2231 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2232}
2233
2235 Intrinsic::ID IID) {
2236 Type *Ty = CI.getType();
2237 Value *Op0 = CI.getOperand(0);
2238 Value *Op1 = CI.getOperand(1);
2239 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2240
2241 if (CI.arg_size() == 4) { // For masked intrinsics.
2242 Value *VecSrc = CI.getOperand(2);
2243 Value *Mask = CI.getOperand(3);
2244 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2245 }
2246 return Res;
2247}
2248
2250 bool IsRotateRight) {
2251 Type *Ty = CI.getType();
2252 Value *Src = CI.getArgOperand(0);
2253 Value *Amt = CI.getArgOperand(1);
2254
2255 // Amount may be scalar immediate, in which case create a splat vector.
2256 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2257 // we only care about the lowest log2 bits anyway.
2258 if (Amt->getType() != Ty) {
2259 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2260 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2261 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2262 }
2263
2264 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2265 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2266
2267 if (CI.arg_size() == 4) { // For masked intrinsics.
2268 Value *VecSrc = CI.getOperand(2);
2269 Value *Mask = CI.getOperand(3);
2270 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2271 }
2272 return Res;
2273}
2274
2275static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2276 bool IsSigned) {
2277 Type *Ty = CI.getType();
2278 Value *LHS = CI.getArgOperand(0);
2279 Value *RHS = CI.getArgOperand(1);
2280
2281 CmpInst::Predicate Pred;
2282 switch (Imm) {
2283 case 0x0:
2284 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2285 break;
2286 case 0x1:
2287 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2288 break;
2289 case 0x2:
2290 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2291 break;
2292 case 0x3:
2293 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2294 break;
2295 case 0x4:
2296 Pred = ICmpInst::ICMP_EQ;
2297 break;
2298 case 0x5:
2299 Pred = ICmpInst::ICMP_NE;
2300 break;
2301 case 0x6:
2302 return Constant::getNullValue(Ty); // FALSE
2303 case 0x7:
2304 return Constant::getAllOnesValue(Ty); // TRUE
2305 default:
2306 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2307 }
2308
2309 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2310 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2311 return Ext;
2312}
2313
2315 bool IsShiftRight, bool ZeroMask) {
2316 Type *Ty = CI.getType();
2317 Value *Op0 = CI.getArgOperand(0);
2318 Value *Op1 = CI.getArgOperand(1);
2319 Value *Amt = CI.getArgOperand(2);
2320
2321 if (IsShiftRight)
2322 std::swap(Op0, Op1);
2323
2324 // Amount may be scalar immediate, in which case create a splat vector.
2325 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2326 // we only care about the lowest log2 bits anyway.
2327 if (Amt->getType() != Ty) {
2328 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2329 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2330 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2331 }
2332
2333 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2334 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2335
2336 unsigned NumArgs = CI.arg_size();
2337 if (NumArgs >= 4) { // For masked intrinsics.
2338 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2339 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2340 CI.getArgOperand(0);
2341 Value *Mask = CI.getOperand(NumArgs - 1);
2342 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2343 }
2344 return Res;
2345}
2346
2348 Value *Mask, bool Aligned) {
2349 const Align Alignment =
2350 Aligned
2351 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2352 : Align(1);
2353
2354 // If the mask is all ones just emit a regular store.
2355 if (const auto *C = dyn_cast<Constant>(Mask))
2356 if (C->isAllOnesValue())
2357 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2358
2359 // Convert the mask from an integer type to a vector of i1.
2360 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2361 Mask = getX86MaskVec(Builder, Mask, NumElts);
2362 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2363}
2364
2366 Value *Passthru, Value *Mask, bool Aligned) {
2367 Type *ValTy = Passthru->getType();
2368 const Align Alignment =
2369 Aligned
2370 ? Align(
2372 8)
2373 : Align(1);
2374
2375 // If the mask is all ones just emit a regular store.
2376 if (const auto *C = dyn_cast<Constant>(Mask))
2377 if (C->isAllOnesValue())
2378 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2379
2380 // Convert the mask from an integer type to a vector of i1.
2381 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2382 Mask = getX86MaskVec(Builder, Mask, NumElts);
2383 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2384}
2385
2386static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2387 Type *Ty = CI.getType();
2388 Value *Op0 = CI.getArgOperand(0);
2389 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2390 {Op0, Builder.getInt1(false)});
2391 if (CI.arg_size() == 3)
2392 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2393 return Res;
2394}
2395
2396static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2397 Type *Ty = CI.getType();
2398
2399 // Arguments have a vXi32 type so cast to vXi64.
2400 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2401 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2402
2403 if (IsSigned) {
2404 // Shift left then arithmetic shift right.
2405 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2406 LHS = Builder.CreateShl(LHS, ShiftAmt);
2407 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2408 RHS = Builder.CreateShl(RHS, ShiftAmt);
2409 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2410 } else {
2411 // Clear the upper bits.
2412 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2413 LHS = Builder.CreateAnd(LHS, Mask);
2414 RHS = Builder.CreateAnd(RHS, Mask);
2415 }
2416
2417 Value *Res = Builder.CreateMul(LHS, RHS);
2418
2419 if (CI.arg_size() == 4)
2420 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2421
2422 return Res;
2423}
2424
2425// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2427 Value *Mask) {
2428 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2429 if (Mask) {
2430 const auto *C = dyn_cast<Constant>(Mask);
2431 if (!C || !C->isAllOnesValue())
2432 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2433 }
2434
2435 if (NumElts < 8) {
2436 int Indices[8];
2437 for (unsigned i = 0; i != NumElts; ++i)
2438 Indices[i] = i;
2439 for (unsigned i = NumElts; i != 8; ++i)
2440 Indices[i] = NumElts + i % NumElts;
2441 Vec = Builder.CreateShuffleVector(Vec,
2443 Indices);
2444 }
2445 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2446}
2447
2449 unsigned CC, bool Signed) {
2450 Value *Op0 = CI.getArgOperand(0);
2451 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2452
2453 Value *Cmp;
2454 if (CC == 3) {
2456 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2457 } else if (CC == 7) {
2459 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2460 } else {
2462 switch (CC) {
2463 default: llvm_unreachable("Unknown condition code");
2464 case 0: Pred = ICmpInst::ICMP_EQ; break;
2465 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2466 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2467 case 4: Pred = ICmpInst::ICMP_NE; break;
2468 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2469 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2470 }
2471 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2472 }
2473
2474 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2475
2476 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2477}
2478
2479// Replace a masked intrinsic with an older unmasked intrinsic.
2481 Intrinsic::ID IID) {
2482 Value *Rep =
2483 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2484 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2485}
2486
2488 Value* A = CI.getArgOperand(0);
2489 Value* B = CI.getArgOperand(1);
2490 Value* Src = CI.getArgOperand(2);
2491 Value* Mask = CI.getArgOperand(3);
2492
2493 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2494 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2495 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2496 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2497 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2498 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2499}
2500
2502 Value* Op = CI.getArgOperand(0);
2503 Type* ReturnOp = CI.getType();
2504 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2505 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2506 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2507}
2508
2509// Replace intrinsic with unmasked version and a select.
2511 CallBase &CI, Value *&Rep) {
2512 Name = Name.substr(12); // Remove avx512.mask.
2513
2514 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2515 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2516 Intrinsic::ID IID;
2517 if (Name.starts_with("max.p")) {
2518 if (VecWidth == 128 && EltWidth == 32)
2519 IID = Intrinsic::x86_sse_max_ps;
2520 else if (VecWidth == 128 && EltWidth == 64)
2521 IID = Intrinsic::x86_sse2_max_pd;
2522 else if (VecWidth == 256 && EltWidth == 32)
2523 IID = Intrinsic::x86_avx_max_ps_256;
2524 else if (VecWidth == 256 && EltWidth == 64)
2525 IID = Intrinsic::x86_avx_max_pd_256;
2526 else
2527 llvm_unreachable("Unexpected intrinsic");
2528 } else if (Name.starts_with("min.p")) {
2529 if (VecWidth == 128 && EltWidth == 32)
2530 IID = Intrinsic::x86_sse_min_ps;
2531 else if (VecWidth == 128 && EltWidth == 64)
2532 IID = Intrinsic::x86_sse2_min_pd;
2533 else if (VecWidth == 256 && EltWidth == 32)
2534 IID = Intrinsic::x86_avx_min_ps_256;
2535 else if (VecWidth == 256 && EltWidth == 64)
2536 IID = Intrinsic::x86_avx_min_pd_256;
2537 else
2538 llvm_unreachable("Unexpected intrinsic");
2539 } else if (Name.starts_with("pshuf.b.")) {
2540 if (VecWidth == 128)
2541 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2542 else if (VecWidth == 256)
2543 IID = Intrinsic::x86_avx2_pshuf_b;
2544 else if (VecWidth == 512)
2545 IID = Intrinsic::x86_avx512_pshuf_b_512;
2546 else
2547 llvm_unreachable("Unexpected intrinsic");
2548 } else if (Name.starts_with("pmul.hr.sw.")) {
2549 if (VecWidth == 128)
2550 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2551 else if (VecWidth == 256)
2552 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2553 else if (VecWidth == 512)
2554 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2555 else
2556 llvm_unreachable("Unexpected intrinsic");
2557 } else if (Name.starts_with("pmulh.w.")) {
2558 if (VecWidth == 128)
2559 IID = Intrinsic::x86_sse2_pmulh_w;
2560 else if (VecWidth == 256)
2561 IID = Intrinsic::x86_avx2_pmulh_w;
2562 else if (VecWidth == 512)
2563 IID = Intrinsic::x86_avx512_pmulh_w_512;
2564 else
2565 llvm_unreachable("Unexpected intrinsic");
2566 } else if (Name.starts_with("pmulhu.w.")) {
2567 if (VecWidth == 128)
2568 IID = Intrinsic::x86_sse2_pmulhu_w;
2569 else if (VecWidth == 256)
2570 IID = Intrinsic::x86_avx2_pmulhu_w;
2571 else if (VecWidth == 512)
2572 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2573 else
2574 llvm_unreachable("Unexpected intrinsic");
2575 } else if (Name.starts_with("pmaddw.d.")) {
2576 if (VecWidth == 128)
2577 IID = Intrinsic::x86_sse2_pmadd_wd;
2578 else if (VecWidth == 256)
2579 IID = Intrinsic::x86_avx2_pmadd_wd;
2580 else if (VecWidth == 512)
2581 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2582 else
2583 llvm_unreachable("Unexpected intrinsic");
2584 } else if (Name.starts_with("pmaddubs.w.")) {
2585 if (VecWidth == 128)
2586 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2587 else if (VecWidth == 256)
2588 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2589 else if (VecWidth == 512)
2590 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2591 else
2592 llvm_unreachable("Unexpected intrinsic");
2593 } else if (Name.starts_with("packsswb.")) {
2594 if (VecWidth == 128)
2595 IID = Intrinsic::x86_sse2_packsswb_128;
2596 else if (VecWidth == 256)
2597 IID = Intrinsic::x86_avx2_packsswb;
2598 else if (VecWidth == 512)
2599 IID = Intrinsic::x86_avx512_packsswb_512;
2600 else
2601 llvm_unreachable("Unexpected intrinsic");
2602 } else if (Name.starts_with("packssdw.")) {
2603 if (VecWidth == 128)
2604 IID = Intrinsic::x86_sse2_packssdw_128;
2605 else if (VecWidth == 256)
2606 IID = Intrinsic::x86_avx2_packssdw;
2607 else if (VecWidth == 512)
2608 IID = Intrinsic::x86_avx512_packssdw_512;
2609 else
2610 llvm_unreachable("Unexpected intrinsic");
2611 } else if (Name.starts_with("packuswb.")) {
2612 if (VecWidth == 128)
2613 IID = Intrinsic::x86_sse2_packuswb_128;
2614 else if (VecWidth == 256)
2615 IID = Intrinsic::x86_avx2_packuswb;
2616 else if (VecWidth == 512)
2617 IID = Intrinsic::x86_avx512_packuswb_512;
2618 else
2619 llvm_unreachable("Unexpected intrinsic");
2620 } else if (Name.starts_with("packusdw.")) {
2621 if (VecWidth == 128)
2622 IID = Intrinsic::x86_sse41_packusdw;
2623 else if (VecWidth == 256)
2624 IID = Intrinsic::x86_avx2_packusdw;
2625 else if (VecWidth == 512)
2626 IID = Intrinsic::x86_avx512_packusdw_512;
2627 else
2628 llvm_unreachable("Unexpected intrinsic");
2629 } else if (Name.starts_with("vpermilvar.")) {
2630 if (VecWidth == 128 && EltWidth == 32)
2631 IID = Intrinsic::x86_avx_vpermilvar_ps;
2632 else if (VecWidth == 128 && EltWidth == 64)
2633 IID = Intrinsic::x86_avx_vpermilvar_pd;
2634 else if (VecWidth == 256 && EltWidth == 32)
2635 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2636 else if (VecWidth == 256 && EltWidth == 64)
2637 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2638 else if (VecWidth == 512 && EltWidth == 32)
2639 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2640 else if (VecWidth == 512 && EltWidth == 64)
2641 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2642 else
2643 llvm_unreachable("Unexpected intrinsic");
2644 } else if (Name == "cvtpd2dq.256") {
2645 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2646 } else if (Name == "cvtpd2ps.256") {
2647 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2648 } else if (Name == "cvttpd2dq.256") {
2649 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2650 } else if (Name == "cvttps2dq.128") {
2651 IID = Intrinsic::x86_sse2_cvttps2dq;
2652 } else if (Name == "cvttps2dq.256") {
2653 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2654 } else if (Name.starts_with("permvar.")) {
2655 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2656 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2657 IID = Intrinsic::x86_avx2_permps;
2658 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2659 IID = Intrinsic::x86_avx2_permd;
2660 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2661 IID = Intrinsic::x86_avx512_permvar_df_256;
2662 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2663 IID = Intrinsic::x86_avx512_permvar_di_256;
2664 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2665 IID = Intrinsic::x86_avx512_permvar_sf_512;
2666 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2667 IID = Intrinsic::x86_avx512_permvar_si_512;
2668 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2669 IID = Intrinsic::x86_avx512_permvar_df_512;
2670 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2671 IID = Intrinsic::x86_avx512_permvar_di_512;
2672 else if (VecWidth == 128 && EltWidth == 16)
2673 IID = Intrinsic::x86_avx512_permvar_hi_128;
2674 else if (VecWidth == 256 && EltWidth == 16)
2675 IID = Intrinsic::x86_avx512_permvar_hi_256;
2676 else if (VecWidth == 512 && EltWidth == 16)
2677 IID = Intrinsic::x86_avx512_permvar_hi_512;
2678 else if (VecWidth == 128 && EltWidth == 8)
2679 IID = Intrinsic::x86_avx512_permvar_qi_128;
2680 else if (VecWidth == 256 && EltWidth == 8)
2681 IID = Intrinsic::x86_avx512_permvar_qi_256;
2682 else if (VecWidth == 512 && EltWidth == 8)
2683 IID = Intrinsic::x86_avx512_permvar_qi_512;
2684 else
2685 llvm_unreachable("Unexpected intrinsic");
2686 } else if (Name.starts_with("dbpsadbw.")) {
2687 if (VecWidth == 128)
2688 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2689 else if (VecWidth == 256)
2690 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2691 else if (VecWidth == 512)
2692 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2693 else
2694 llvm_unreachable("Unexpected intrinsic");
2695 } else if (Name.starts_with("pmultishift.qb.")) {
2696 if (VecWidth == 128)
2697 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2698 else if (VecWidth == 256)
2699 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2700 else if (VecWidth == 512)
2701 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2702 else
2703 llvm_unreachable("Unexpected intrinsic");
2704 } else if (Name.starts_with("conflict.")) {
2705 if (Name[9] == 'd' && VecWidth == 128)
2706 IID = Intrinsic::x86_avx512_conflict_d_128;
2707 else if (Name[9] == 'd' && VecWidth == 256)
2708 IID = Intrinsic::x86_avx512_conflict_d_256;
2709 else if (Name[9] == 'd' && VecWidth == 512)
2710 IID = Intrinsic::x86_avx512_conflict_d_512;
2711 else if (Name[9] == 'q' && VecWidth == 128)
2712 IID = Intrinsic::x86_avx512_conflict_q_128;
2713 else if (Name[9] == 'q' && VecWidth == 256)
2714 IID = Intrinsic::x86_avx512_conflict_q_256;
2715 else if (Name[9] == 'q' && VecWidth == 512)
2716 IID = Intrinsic::x86_avx512_conflict_q_512;
2717 else
2718 llvm_unreachable("Unexpected intrinsic");
2719 } else if (Name.starts_with("pavg.")) {
2720 if (Name[5] == 'b' && VecWidth == 128)
2721 IID = Intrinsic::x86_sse2_pavg_b;
2722 else if (Name[5] == 'b' && VecWidth == 256)
2723 IID = Intrinsic::x86_avx2_pavg_b;
2724 else if (Name[5] == 'b' && VecWidth == 512)
2725 IID = Intrinsic::x86_avx512_pavg_b_512;
2726 else if (Name[5] == 'w' && VecWidth == 128)
2727 IID = Intrinsic::x86_sse2_pavg_w;
2728 else if (Name[5] == 'w' && VecWidth == 256)
2729 IID = Intrinsic::x86_avx2_pavg_w;
2730 else if (Name[5] == 'w' && VecWidth == 512)
2731 IID = Intrinsic::x86_avx512_pavg_w_512;
2732 else
2733 llvm_unreachable("Unexpected intrinsic");
2734 } else
2735 return false;
2736
2737 SmallVector<Value *, 4> Args(CI.args());
2738 Args.pop_back();
2739 Args.pop_back();
2740 Rep = Builder.CreateIntrinsic(IID, Args);
2741 unsigned NumArgs = CI.arg_size();
2742 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2743 CI.getArgOperand(NumArgs - 2));
2744 return true;
2745}
2746
2747/// Upgrade comment in call to inline asm that represents an objc retain release
2748/// marker.
2749void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2750 size_t Pos;
2751 if (AsmStr->find("mov\tfp") == 0 &&
2752 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2753 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2754 AsmStr->replace(Pos, 1, ";");
2755 }
2756}
2757
2759 Function *F, IRBuilder<> &Builder) {
2760 Value *Rep = nullptr;
2761
2762 if (Name == "abs.i" || Name == "abs.ll") {
2763 Value *Arg = CI->getArgOperand(0);
2764 Rep = Builder.CreateIntrinsic(Intrinsic::abs, {Arg->getType()},
2765 {Arg, Builder.getTrue()},
2766 /*FMFSource=*/nullptr, "abs");
2767 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2768 Type *Ty = (Name == "abs.bf16")
2769 ? Builder.getBFloatTy()
2770 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2771 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2772 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2773 Rep = Builder.CreateBitCast(Abs, CI->getType());
2774 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2775 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2776 : Intrinsic::nvvm_fabs;
2777 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2778 } else if (Name.consume_front("ex2.approx.")) {
2779 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2780 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2781 : Intrinsic::nvvm_ex2_approx;
2782 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2783 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2784 Name.starts_with("atomic.load.add.f64.p")) {
2785 Value *Ptr = CI->getArgOperand(0);
2786 Value *Val = CI->getArgOperand(1);
2787 Rep = Builder.CreateAtomicRMW(
2789 CI->getContext().getOrInsertSyncScopeID("device"));
2790 // The default scope for atomic.load.* intrinsics is device
2791 // (= gpu scope in ptx), but the default LLVM atomic scope is
2792 // "system"
2793 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2794 Name.starts_with("atomic.load.dec.32.p")) {
2795 Value *Ptr = CI->getArgOperand(0);
2796 Value *Val = CI->getArgOperand(1);
2797 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2799 Rep = Builder.CreateAtomicRMW(
2801 CI->getContext().getOrInsertSyncScopeID("device"));
2802 // See comment above.
2803 } else if (Name.starts_with("atomic.") && Name.contains(".gen.")) {
2804 // nvvm.atomic.{op}.gen.{i,f}.{cta,sys} -> atomicrmw / cmpxchg.
2805 StringRef Op = Name.substr(StringRef("atomic.").size());
2806 Value *Ptr = CI->getArgOperand(0);
2807 Value *Val = CI->getArgOperand(1);
2809 Op.contains(".cta.") ? "block" : "");
2810 if (Op.starts_with("cas.")) {
2811 Value *New = CI->getArgOperand(2);
2812 Value *Pair = Builder.CreateAtomicCmpXchg(
2813 Ptr, Val, New, MaybeAlign(), AtomicOrdering::Monotonic,
2815 Rep = Builder.CreateExtractValue(Pair, 0);
2816 } else {
2817 // Note we don't upgrade anything to AtomicRMWInst::UMin/UMax. This is
2818 // because we were actually missing those intrinsics!
2819 AtomicRMWInst::BinOp BinOp =
2821 .StartsWith("add.gen.f", AtomicRMWInst::FAdd)
2822 .StartsWith("add.gen.i", AtomicRMWInst::Add)
2833 "unexpected nvvm scoped atomic intrinsic");
2834 Rep = Builder.CreateAtomicRMW(BinOp, Ptr, Val, MaybeAlign(),
2836 }
2837 } else if (Name == "clz.ll") {
2838 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2839 Value *Arg = CI->getArgOperand(0);
2840 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2841 {Arg, Builder.getFalse()},
2842 /*FMFSource=*/nullptr, "ctlz");
2843 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2844 } else if (Name == "popc.ll") {
2845 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2846 // i64.
2847 Value *Arg = CI->getArgOperand(0);
2848 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2849 Arg, /*FMFSource=*/nullptr, "ctpop");
2850 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2851 } else if (Name == "h2f") {
2852 Value *Cast =
2853 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2854 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2855 } else if (Name.consume_front("bitcast.") &&
2856 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2857 Name == "d2ll")) {
2858 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2859 } else if (Name == "rotate.b32") {
2860 Value *Arg = CI->getOperand(0);
2861 Value *ShiftAmt = CI->getOperand(1);
2862 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2863 {Arg, Arg, ShiftAmt});
2864 } else if (Name == "rotate.b64") {
2865 Type *Int64Ty = Builder.getInt64Ty();
2866 Value *Arg = CI->getOperand(0);
2867 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2868 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2869 {Arg, Arg, ZExtShiftAmt});
2870 } else if (Name == "rotate.right.b64") {
2871 Type *Int64Ty = Builder.getInt64Ty();
2872 Value *Arg = CI->getOperand(0);
2873 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2874 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2875 {Arg, Arg, ZExtShiftAmt});
2876 } else if (Name == "swap.lo.hi.b64") {
2877 Type *Int64Ty = Builder.getInt64Ty();
2878 Value *Arg = CI->getOperand(0);
2879 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2880 {Arg, Arg, Builder.getInt64(32)});
2881 } else if ((Name.consume_front("ptr.gen.to.") &&
2882 consumeNVVMPtrAddrSpace(Name)) ||
2883 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2884 Name.starts_with(".to.gen"))) {
2885 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2886 } else if (Name.consume_front("ldg.global")) {
2887 Value *Ptr = CI->getArgOperand(0);
2888 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2889 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2890 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2891 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2892 MDNode *MD = MDNode::get(Builder.getContext(), {});
2893 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2894 return LD;
2895 } else if (Name == "tanh.approx.f32") {
2896 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2897 FastMathFlags FMF;
2898 FMF.setApproxFunc();
2899 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2900 FMF);
2901 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2902 Value *Arg =
2903 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2904 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2905 {}, {Arg});
2906 } else if (Name == "barrier") {
2907 Rep = Builder.CreateIntrinsic(
2908 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2909 {CI->getArgOperand(0), CI->getArgOperand(1)});
2910 } else if (Name == "barrier.sync") {
2911 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2912 {CI->getArgOperand(0)});
2913 } else if (Name == "barrier.sync.cnt") {
2914 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2915 {CI->getArgOperand(0), CI->getArgOperand(1)});
2916 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2917 Name == "barrier0.or") {
2918 Value *C = CI->getArgOperand(0);
2919 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2920
2921 Intrinsic::ID IID =
2923 .Case("barrier0.popc",
2924 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2925 .Case("barrier0.and",
2926 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2927 .Case("barrier0.or",
2928 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2929 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2930 Rep = Builder.CreateZExt(Bar, CI->getType());
2931 } else {
2933 if (IID != Intrinsic::not_intrinsic &&
2934 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2935 rename(F);
2936 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2938 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2939 Value *Arg = CI->getArgOperand(I);
2940 Type *OldType = Arg->getType();
2941 Type *NewType = NewFn->getArg(I)->getType();
2942 Args.push_back(
2943 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2944 ? Builder.CreateBitCast(Arg, NewType)
2945 : Arg);
2946 }
2947 Rep = Builder.CreateCall(NewFn, Args);
2948 if (F->getReturnType()->isIntegerTy())
2949 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2950 }
2951 }
2952
2953 return Rep;
2954}
2955
2957 IRBuilder<> &Builder) {
2958 LLVMContext &C = F->getContext();
2959 Value *Rep = nullptr;
2960
2961 if (Name.starts_with("sse4a.movnt.")) {
2963 Elts.push_back(
2964 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2965 MDNode *Node = MDNode::get(C, Elts);
2966
2967 Value *Arg0 = CI->getArgOperand(0);
2968 Value *Arg1 = CI->getArgOperand(1);
2969
2970 // Nontemporal (unaligned) store of the 0'th element of the float/double
2971 // vector.
2972 Value *Extract =
2973 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2974
2975 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2976 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2977 } else if (Name.starts_with("avx.movnt.") ||
2978 Name.starts_with("avx512.storent.")) {
2980 Elts.push_back(
2981 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2982 MDNode *Node = MDNode::get(C, Elts);
2983
2984 Value *Arg0 = CI->getArgOperand(0);
2985 Value *Arg1 = CI->getArgOperand(1);
2986
2987 StoreInst *SI = Builder.CreateAlignedStore(
2988 Arg1, Arg0,
2990 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2991 } else if (Name == "sse2.storel.dq") {
2992 Value *Arg0 = CI->getArgOperand(0);
2993 Value *Arg1 = CI->getArgOperand(1);
2994
2995 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2996 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2997 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2998 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2999 } else if (Name.starts_with("sse.storeu.") ||
3000 Name.starts_with("sse2.storeu.") ||
3001 Name.starts_with("avx.storeu.")) {
3002 Value *Arg0 = CI->getArgOperand(0);
3003 Value *Arg1 = CI->getArgOperand(1);
3004 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
3005 } else if (Name == "avx512.mask.store.ss") {
3006 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
3007 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3008 Mask, false);
3009 } else if (Name.starts_with("avx512.mask.store")) {
3010 // "avx512.mask.storeu." or "avx512.mask.store."
3011 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
3012 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3013 CI->getArgOperand(2), Aligned);
3014 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
3015 // Upgrade packed integer vector compare intrinsics to compare instructions.
3016 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
3017 bool CmpEq = Name[9] == 'e';
3018 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
3019 CI->getArgOperand(0), CI->getArgOperand(1));
3020 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
3021 } else if (Name.starts_with("avx512.broadcastm")) {
3022 Type *ExtTy = Type::getInt32Ty(C);
3023 if (CI->getOperand(0)->getType()->isIntegerTy(8))
3024 ExtTy = Type::getInt64Ty(C);
3025 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
3026 ExtTy->getPrimitiveSizeInBits();
3027 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
3028 Rep = Builder.CreateVectorSplat(NumElts, Rep);
3029 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
3030 Value *Vec = CI->getArgOperand(0);
3031 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
3032 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
3033 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
3034 } else if (Name.starts_with("avx.sqrt.p") ||
3035 Name.starts_with("sse2.sqrt.p") ||
3036 Name.starts_with("sse.sqrt.p")) {
3037 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
3038 {CI->getArgOperand(0)});
3039 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
3040 if (CI->arg_size() == 4 &&
3041 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3042 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3043 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
3044 : Intrinsic::x86_avx512_sqrt_pd_512;
3045
3046 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
3047 Rep = Builder.CreateIntrinsic(IID, Args);
3048 } else {
3049 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
3050 {CI->getArgOperand(0)});
3051 }
3052 Rep =
3053 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3054 } else if (Name.starts_with("avx512.ptestm") ||
3055 Name.starts_with("avx512.ptestnm")) {
3056 Value *Op0 = CI->getArgOperand(0);
3057 Value *Op1 = CI->getArgOperand(1);
3058 Value *Mask = CI->getArgOperand(2);
3059 Rep = Builder.CreateAnd(Op0, Op1);
3060 llvm::Type *Ty = Op0->getType();
3062 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
3065 Rep = Builder.CreateICmp(Pred, Rep, Zero);
3066 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
3067 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
3068 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
3069 ->getNumElements();
3070 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
3071 Rep =
3072 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3073 } else if (Name.starts_with("avx512.kunpck")) {
3074 unsigned NumElts = CI->getType()->getScalarSizeInBits();
3075 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
3076 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
3077 int Indices[64];
3078 for (unsigned i = 0; i != NumElts; ++i)
3079 Indices[i] = i;
3080
3081 // First extract half of each vector. This gives better codegen than
3082 // doing it in a single shuffle.
3083 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
3084 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
3085 // Concat the vectors.
3086 // NOTE: Operands have to be swapped to match intrinsic definition.
3087 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
3088 Rep = Builder.CreateBitCast(Rep, CI->getType());
3089 } else if (Name == "avx512.kand.w") {
3090 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3091 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3092 Rep = Builder.CreateAnd(LHS, RHS);
3093 Rep = Builder.CreateBitCast(Rep, CI->getType());
3094 } else if (Name == "avx512.kandn.w") {
3095 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3096 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3097 LHS = Builder.CreateNot(LHS);
3098 Rep = Builder.CreateAnd(LHS, RHS);
3099 Rep = Builder.CreateBitCast(Rep, CI->getType());
3100 } else if (Name == "avx512.kor.w") {
3101 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3102 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3103 Rep = Builder.CreateOr(LHS, RHS);
3104 Rep = Builder.CreateBitCast(Rep, CI->getType());
3105 } else if (Name == "avx512.kxor.w") {
3106 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3107 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3108 Rep = Builder.CreateXor(LHS, RHS);
3109 Rep = Builder.CreateBitCast(Rep, CI->getType());
3110 } else if (Name == "avx512.kxnor.w") {
3111 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3112 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3113 LHS = Builder.CreateNot(LHS);
3114 Rep = Builder.CreateXor(LHS, RHS);
3115 Rep = Builder.CreateBitCast(Rep, CI->getType());
3116 } else if (Name == "avx512.knot.w") {
3117 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3118 Rep = Builder.CreateNot(Rep);
3119 Rep = Builder.CreateBitCast(Rep, CI->getType());
3120 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3121 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3122 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3123 Rep = Builder.CreateOr(LHS, RHS);
3124 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3125 Value *C;
3126 if (Name[14] == 'c')
3127 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3128 else
3129 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3130 Rep = Builder.CreateICmpEQ(Rep, C);
3131 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3132 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3133 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3134 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3135 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3136 Type *I32Ty = Type::getInt32Ty(C);
3137 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3138 ConstantInt::get(I32Ty, 0));
3139 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3140 ConstantInt::get(I32Ty, 0));
3141 Value *EltOp;
3142 if (Name.contains(".add."))
3143 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3144 else if (Name.contains(".sub."))
3145 EltOp = Builder.CreateFSub(Elt0, Elt1);
3146 else if (Name.contains(".mul."))
3147 EltOp = Builder.CreateFMul(Elt0, Elt1);
3148 else
3149 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3150 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3151 ConstantInt::get(I32Ty, 0));
3152 } else if (Name.starts_with("avx512.mask.pcmp")) {
3153 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3154 bool CmpEq = Name[16] == 'e';
3155 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3156 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3157 Type *OpTy = CI->getArgOperand(0)->getType();
3158 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3159 Intrinsic::ID IID;
3160 switch (VecWidth) {
3161 default:
3162 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3163 break;
3164 case 128:
3165 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3166 break;
3167 case 256:
3168 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3169 break;
3170 case 512:
3171 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3172 break;
3173 }
3174
3175 Rep =
3176 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3177 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3178 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3179 Type *OpTy = CI->getArgOperand(0)->getType();
3180 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3181 unsigned EltWidth = OpTy->getScalarSizeInBits();
3182 Intrinsic::ID IID;
3183 if (VecWidth == 128 && EltWidth == 32)
3184 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3185 else if (VecWidth == 256 && EltWidth == 32)
3186 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3187 else if (VecWidth == 512 && EltWidth == 32)
3188 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3189 else if (VecWidth == 128 && EltWidth == 64)
3190 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3191 else if (VecWidth == 256 && EltWidth == 64)
3192 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3193 else if (VecWidth == 512 && EltWidth == 64)
3194 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3195 else
3196 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3197
3198 Rep =
3199 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3200 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3201 } else if (Name.starts_with("avx512.cmp.p")) {
3202 SmallVector<Value *, 4> Args(CI->args());
3203 Type *OpTy = Args[0]->getType();
3204 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3205 unsigned EltWidth = OpTy->getScalarSizeInBits();
3206 Intrinsic::ID IID;
3207 if (VecWidth == 128 && EltWidth == 32)
3208 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3209 else if (VecWidth == 256 && EltWidth == 32)
3210 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3211 else if (VecWidth == 512 && EltWidth == 32)
3212 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3213 else if (VecWidth == 128 && EltWidth == 64)
3214 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3215 else if (VecWidth == 256 && EltWidth == 64)
3216 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3217 else if (VecWidth == 512 && EltWidth == 64)
3218 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3219 else
3220 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3221
3223 if (VecWidth == 512)
3224 std::swap(Mask, Args.back());
3225 Args.push_back(Mask);
3226
3227 Rep = Builder.CreateIntrinsic(IID, Args);
3228 } else if (Name.starts_with("avx512.mask.cmp.")) {
3229 // Integer compare intrinsics.
3230 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3231 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3232 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3233 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3234 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3235 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3236 Name.starts_with("avx512.cvtw2mask.") ||
3237 Name.starts_with("avx512.cvtd2mask.") ||
3238 Name.starts_with("avx512.cvtq2mask.")) {
3239 Value *Op = CI->getArgOperand(0);
3240 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3241 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3242 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3243 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3244 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3245 Name.starts_with("avx512.mask.pabs")) {
3246 Rep = upgradeAbs(Builder, *CI);
3247 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3248 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3249 Name.starts_with("avx512.mask.pmaxs")) {
3250 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3251 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3252 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3253 Name.starts_with("avx512.mask.pmaxu")) {
3254 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3255 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3256 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3257 Name.starts_with("avx512.mask.pmins")) {
3258 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3259 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3260 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3261 Name.starts_with("avx512.mask.pminu")) {
3262 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3263 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3264 Name == "avx512.pmulu.dq.512" ||
3265 Name.starts_with("avx512.mask.pmulu.dq.")) {
3266 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3267 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3268 Name == "avx512.pmul.dq.512" ||
3269 Name.starts_with("avx512.mask.pmul.dq.")) {
3270 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3271 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3272 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3273 Rep =
3274 Builder.CreateSIToFP(CI->getArgOperand(1),
3275 cast<VectorType>(CI->getType())->getElementType());
3276 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3277 } else if (Name == "avx512.cvtusi2sd") {
3278 Rep =
3279 Builder.CreateUIToFP(CI->getArgOperand(1),
3280 cast<VectorType>(CI->getType())->getElementType());
3281 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3282 } else if (Name == "sse2.cvtss2sd") {
3283 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3284 Rep = Builder.CreateFPExt(
3285 Rep, cast<VectorType>(CI->getType())->getElementType());
3286 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3287 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3288 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3289 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3290 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3291 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3292 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3293 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3294 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3295 Name == "avx512.mask.cvtqq2ps.256" ||
3296 Name == "avx512.mask.cvtqq2ps.512" ||
3297 Name == "avx512.mask.cvtuqq2ps.256" ||
3298 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3299 Name == "avx.cvt.ps2.pd.256" ||
3300 Name == "avx512.mask.cvtps2pd.128" ||
3301 Name == "avx512.mask.cvtps2pd.256") {
3302 auto *DstTy = cast<FixedVectorType>(CI->getType());
3303 Rep = CI->getArgOperand(0);
3304 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3305
3306 unsigned NumDstElts = DstTy->getNumElements();
3307 if (NumDstElts < SrcTy->getNumElements()) {
3308 assert(NumDstElts == 2 && "Unexpected vector size");
3309 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3310 }
3311
3312 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3313 bool IsUnsigned = Name.contains("cvtu");
3314 if (IsPS2PD)
3315 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3316 else if (CI->arg_size() == 4 &&
3317 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3318 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3319 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3320 : Intrinsic::x86_avx512_sitofp_round;
3321 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3322 {Rep, CI->getArgOperand(3)});
3323 } else {
3324 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3325 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3326 }
3327
3328 if (CI->arg_size() >= 3)
3329 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3330 CI->getArgOperand(1));
3331 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3332 Name.starts_with("vcvtph2ps.")) {
3333 auto *DstTy = cast<FixedVectorType>(CI->getType());
3334 Rep = CI->getArgOperand(0);
3335 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3336 unsigned NumDstElts = DstTy->getNumElements();
3337 if (NumDstElts != SrcTy->getNumElements()) {
3338 assert(NumDstElts == 4 && "Unexpected vector size");
3339 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3340 }
3341 Rep = Builder.CreateBitCast(
3342 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3343 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3344 if (CI->arg_size() >= 3)
3345 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3346 CI->getArgOperand(1));
3347 } else if (Name.starts_with("avx512.mask.load")) {
3348 // "avx512.mask.loadu." or "avx512.mask.load."
3349 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3350 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3351 CI->getArgOperand(2), Aligned);
3352 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3353 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3354 auto *PtrTy = CI->getOperand(0)->getType();
3355 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3356 ResultTy->getNumElements());
3357 Rep = Builder.CreateIntrinsic(
3358 Intrinsic::masked_expandload, {ResultTy, PtrTy},
3359 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3360 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3361 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3362 auto *PtrTy = CI->getArgOperand(0)->getType();
3363 Value *MaskVec =
3364 getX86MaskVec(Builder, CI->getArgOperand(2),
3365 cast<FixedVectorType>(ResultTy)->getNumElements());
3366 Rep = Builder.CreateIntrinsic(
3367 Intrinsic::masked_compressstore, {ResultTy, PtrTy},
3368 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3369 } else if (Name.starts_with("avx512.mask.compress.") ||
3370 Name.starts_with("avx512.mask.expand.")) {
3371 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3372
3373 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3374 ResultTy->getNumElements());
3375
3376 bool IsCompress = Name[12] == 'c';
3377 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3378 : Intrinsic::x86_avx512_mask_expand;
3379 Rep = Builder.CreateIntrinsic(
3380 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3381 } else if (Name.starts_with("xop.vpcom")) {
3382 bool IsSigned;
3383 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3384 Name.ends_with("uq"))
3385 IsSigned = false;
3386 else if (Name.ends_with("b") || Name.ends_with("w") ||
3387 Name.ends_with("d") || Name.ends_with("q"))
3388 IsSigned = true;
3389 else
3390 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3391
3392 unsigned Imm;
3393 if (CI->arg_size() == 3) {
3394 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3395 } else {
3396 Name = Name.substr(9); // strip off "xop.vpcom"
3397 if (Name.starts_with("lt"))
3398 Imm = 0;
3399 else if (Name.starts_with("le"))
3400 Imm = 1;
3401 else if (Name.starts_with("gt"))
3402 Imm = 2;
3403 else if (Name.starts_with("ge"))
3404 Imm = 3;
3405 else if (Name.starts_with("eq"))
3406 Imm = 4;
3407 else if (Name.starts_with("ne"))
3408 Imm = 5;
3409 else if (Name.starts_with("false"))
3410 Imm = 6;
3411 else if (Name.starts_with("true"))
3412 Imm = 7;
3413 else
3414 llvm_unreachable("Unknown condition");
3415 }
3416
3417 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3418 } else if (Name.starts_with("xop.vpcmov")) {
3419 Value *Sel = CI->getArgOperand(2);
3420 Value *NotSel = Builder.CreateNot(Sel);
3421 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3422 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3423 Rep = Builder.CreateOr(Sel0, Sel1);
3424 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3425 Name.starts_with("avx512.mask.prol")) {
3426 Rep = upgradeX86Rotate(Builder, *CI, false);
3427 } else if (Name.starts_with("avx512.pror") ||
3428 Name.starts_with("avx512.mask.pror")) {
3429 Rep = upgradeX86Rotate(Builder, *CI, true);
3430 } else if (Name.starts_with("avx512.vpshld.") ||
3431 Name.starts_with("avx512.mask.vpshld") ||
3432 Name.starts_with("avx512.maskz.vpshld")) {
3433 bool ZeroMask = Name[11] == 'z';
3434 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3435 } else if (Name.starts_with("avx512.vpshrd.") ||
3436 Name.starts_with("avx512.mask.vpshrd") ||
3437 Name.starts_with("avx512.maskz.vpshrd")) {
3438 bool ZeroMask = Name[11] == 'z';
3439 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3440 } else if (Name == "sse42.crc32.64.8") {
3441 Value *Trunc0 =
3442 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3443 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3444 {Trunc0, CI->getArgOperand(1)});
3445 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3446 } else if (Name.starts_with("avx.vbroadcast.s") ||
3447 Name.starts_with("avx512.vbroadcast.s")) {
3448 // Replace broadcasts with a series of insertelements.
3449 auto *VecTy = cast<FixedVectorType>(CI->getType());
3450 Type *EltTy = VecTy->getElementType();
3451 unsigned EltNum = VecTy->getNumElements();
3452 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3453 Type *I32Ty = Type::getInt32Ty(C);
3454 Rep = PoisonValue::get(VecTy);
3455 for (unsigned I = 0; I < EltNum; ++I)
3456 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3457 } else if (Name.starts_with("sse41.pmovsx") ||
3458 Name.starts_with("sse41.pmovzx") ||
3459 Name.starts_with("avx2.pmovsx") ||
3460 Name.starts_with("avx2.pmovzx") ||
3461 Name.starts_with("avx512.mask.pmovsx") ||
3462 Name.starts_with("avx512.mask.pmovzx")) {
3463 auto *DstTy = cast<FixedVectorType>(CI->getType());
3464 unsigned NumDstElts = DstTy->getNumElements();
3465
3466 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3467 SmallVector<int, 8> ShuffleMask(NumDstElts);
3468 for (unsigned i = 0; i != NumDstElts; ++i)
3469 ShuffleMask[i] = i;
3470
3471 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3472
3473 bool DoSext = Name.contains("pmovsx");
3474 Rep =
3475 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3476 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3477 if (CI->arg_size() == 3)
3478 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3479 CI->getArgOperand(1));
3480 } else if (Name == "avx512.mask.pmov.qd.256" ||
3481 Name == "avx512.mask.pmov.qd.512" ||
3482 Name == "avx512.mask.pmov.wb.256" ||
3483 Name == "avx512.mask.pmov.wb.512") {
3484 Type *Ty = CI->getArgOperand(1)->getType();
3485 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3486 Rep =
3487 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3488 } else if (Name.starts_with("avx.vbroadcastf128") ||
3489 Name == "avx2.vbroadcasti128") {
3490 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3491 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3492 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3493 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3494 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3495 if (NumSrcElts == 2)
3496 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3497 else
3498 Rep = Builder.CreateShuffleVector(Load,
3499 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3500 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3501 Name.starts_with("avx512.mask.shuf.f")) {
3502 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3503 Type *VT = CI->getType();
3504 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3505 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3506 unsigned ControlBitsMask = NumLanes - 1;
3507 unsigned NumControlBits = NumLanes / 2;
3508 SmallVector<int, 8> ShuffleMask(0);
3509
3510 for (unsigned l = 0; l != NumLanes; ++l) {
3511 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3512 // We actually need the other source.
3513 if (l >= NumLanes / 2)
3514 LaneMask += NumLanes;
3515 for (unsigned i = 0; i != NumElementsInLane; ++i)
3516 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3517 }
3518 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3519 CI->getArgOperand(1), ShuffleMask);
3520 Rep =
3521 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3522 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3523 Name.starts_with("avx512.mask.broadcasti")) {
3524 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3525 ->getNumElements();
3526 unsigned NumDstElts =
3527 cast<FixedVectorType>(CI->getType())->getNumElements();
3528
3529 SmallVector<int, 8> ShuffleMask(NumDstElts);
3530 for (unsigned i = 0; i != NumDstElts; ++i)
3531 ShuffleMask[i] = i % NumSrcElts;
3532
3533 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3534 CI->getArgOperand(0), ShuffleMask);
3535 Rep =
3536 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3537 } else if (Name.starts_with("avx2.pbroadcast") ||
3538 Name.starts_with("avx2.vbroadcast") ||
3539 Name.starts_with("avx512.pbroadcast") ||
3540 Name.starts_with("avx512.mask.broadcast.s")) {
3541 // Replace vp?broadcasts with a vector shuffle.
3542 Value *Op = CI->getArgOperand(0);
3543 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3544 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3547 Rep = Builder.CreateShuffleVector(Op, M);
3548
3549 if (CI->arg_size() == 3)
3550 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3551 CI->getArgOperand(1));
3552 } else if (Name.starts_with("sse2.padds.") ||
3553 Name.starts_with("avx2.padds.") ||
3554 Name.starts_with("avx512.padds.") ||
3555 Name.starts_with("avx512.mask.padds.")) {
3556 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3557 } else if (Name.starts_with("sse2.psubs.") ||
3558 Name.starts_with("avx2.psubs.") ||
3559 Name.starts_with("avx512.psubs.") ||
3560 Name.starts_with("avx512.mask.psubs.")) {
3561 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3562 } else if (Name.starts_with("sse2.paddus.") ||
3563 Name.starts_with("avx2.paddus.") ||
3564 Name.starts_with("avx512.mask.paddus.")) {
3565 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3566 } else if (Name.starts_with("sse2.psubus.") ||
3567 Name.starts_with("avx2.psubus.") ||
3568 Name.starts_with("avx512.mask.psubus.")) {
3569 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3570 } else if (Name.starts_with("avx512.mask.palignr.")) {
3571 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3572 CI->getArgOperand(1), CI->getArgOperand(2),
3573 CI->getArgOperand(3), CI->getArgOperand(4),
3574 false);
3575 } else if (Name.starts_with("avx512.mask.valign.")) {
3577 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3578 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3579 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3580 // 128/256-bit shift left specified in bits.
3581 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3582 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3583 Shift / 8); // Shift is in bits.
3584 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3585 // 128/256-bit shift right specified in bits.
3586 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3587 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3588 Shift / 8); // Shift is in bits.
3589 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3590 Name == "avx512.psll.dq.512") {
3591 // 128/256/512-bit shift left specified in bytes.
3592 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3593 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3594 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3595 Name == "avx512.psrl.dq.512") {
3596 // 128/256/512-bit shift right specified in bytes.
3597 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3598 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3599 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3600 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3601 Name.starts_with("avx2.pblendd.")) {
3602 Value *Op0 = CI->getArgOperand(0);
3603 Value *Op1 = CI->getArgOperand(1);
3604 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3605 auto *VecTy = cast<FixedVectorType>(CI->getType());
3606 unsigned NumElts = VecTy->getNumElements();
3607
3608 SmallVector<int, 16> Idxs(NumElts);
3609 for (unsigned i = 0; i != NumElts; ++i)
3610 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3611
3612 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3613 } else if (Name.starts_with("avx.vinsertf128.") ||
3614 Name == "avx2.vinserti128" ||
3615 Name.starts_with("avx512.mask.insert")) {
3616 Value *Op0 = CI->getArgOperand(0);
3617 Value *Op1 = CI->getArgOperand(1);
3618 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3619 unsigned DstNumElts =
3620 cast<FixedVectorType>(CI->getType())->getNumElements();
3621 unsigned SrcNumElts =
3622 cast<FixedVectorType>(Op1->getType())->getNumElements();
3623 unsigned Scale = DstNumElts / SrcNumElts;
3624
3625 // Mask off the high bits of the immediate value; hardware ignores those.
3626 Imm = Imm % Scale;
3627
3628 // Extend the second operand into a vector the size of the destination.
3629 SmallVector<int, 8> Idxs(DstNumElts);
3630 for (unsigned i = 0; i != SrcNumElts; ++i)
3631 Idxs[i] = i;
3632 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3633 Idxs[i] = SrcNumElts;
3634 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3635
3636 // Insert the second operand into the first operand.
3637
3638 // Note that there is no guarantee that instruction lowering will actually
3639 // produce a vinsertf128 instruction for the created shuffles. In
3640 // particular, the 0 immediate case involves no lane changes, so it can
3641 // be handled as a blend.
3642
3643 // Example of shuffle mask for 32-bit elements:
3644 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3645 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3646
3647 // First fill with identify mask.
3648 for (unsigned i = 0; i != DstNumElts; ++i)
3649 Idxs[i] = i;
3650 // Then replace the elements where we need to insert.
3651 for (unsigned i = 0; i != SrcNumElts; ++i)
3652 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3653 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3654
3655 // If the intrinsic has a mask operand, handle that.
3656 if (CI->arg_size() == 5)
3657 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3658 CI->getArgOperand(3));
3659 } else if (Name.starts_with("avx.vextractf128.") ||
3660 Name == "avx2.vextracti128" ||
3661 Name.starts_with("avx512.mask.vextract")) {
3662 Value *Op0 = CI->getArgOperand(0);
3663 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3664 unsigned DstNumElts =
3665 cast<FixedVectorType>(CI->getType())->getNumElements();
3666 unsigned SrcNumElts =
3667 cast<FixedVectorType>(Op0->getType())->getNumElements();
3668 unsigned Scale = SrcNumElts / DstNumElts;
3669
3670 // Mask off the high bits of the immediate value; hardware ignores those.
3671 Imm = Imm % Scale;
3672
3673 // Get indexes for the subvector of the input vector.
3674 SmallVector<int, 8> Idxs(DstNumElts);
3675 for (unsigned i = 0; i != DstNumElts; ++i) {
3676 Idxs[i] = i + (Imm * DstNumElts);
3677 }
3678 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3679
3680 // If the intrinsic has a mask operand, handle that.
3681 if (CI->arg_size() == 4)
3682 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3683 CI->getArgOperand(2));
3684 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3685 Name.starts_with("avx512.mask.perm.di.")) {
3686 Value *Op0 = CI->getArgOperand(0);
3687 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3688 auto *VecTy = cast<FixedVectorType>(CI->getType());
3689 unsigned NumElts = VecTy->getNumElements();
3690
3691 SmallVector<int, 8> Idxs(NumElts);
3692 for (unsigned i = 0; i != NumElts; ++i)
3693 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3694
3695 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3696
3697 if (CI->arg_size() == 4)
3698 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3699 CI->getArgOperand(2));
3700 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3701 // The immediate permute control byte looks like this:
3702 // [1:0] - select 128 bits from sources for low half of destination
3703 // [2] - ignore
3704 // [3] - zero low half of destination
3705 // [5:4] - select 128 bits from sources for high half of destination
3706 // [6] - ignore
3707 // [7] - zero high half of destination
3708
3709 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3710
3711 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3712 unsigned HalfSize = NumElts / 2;
3713 SmallVector<int, 8> ShuffleMask(NumElts);
3714
3715 // Determine which operand(s) are actually in use for this instruction.
3716 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3717 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3718
3719 // If needed, replace operands based on zero mask.
3720 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3721 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3722
3723 // Permute low half of result.
3724 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3725 for (unsigned i = 0; i < HalfSize; ++i)
3726 ShuffleMask[i] = StartIndex + i;
3727
3728 // Permute high half of result.
3729 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3730 for (unsigned i = 0; i < HalfSize; ++i)
3731 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3732
3733 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3734
3735 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3736 Name.starts_with("avx512.mask.vpermil.p") ||
3737 Name.starts_with("avx512.mask.pshuf.d.")) {
3738 Value *Op0 = CI->getArgOperand(0);
3739 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3740 auto *VecTy = cast<FixedVectorType>(CI->getType());
3741 unsigned NumElts = VecTy->getNumElements();
3742 // Calculate the size of each index in the immediate.
3743 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3744 unsigned IdxMask = ((1 << IdxSize) - 1);
3745
3746 SmallVector<int, 8> Idxs(NumElts);
3747 // Lookup the bits for this element, wrapping around the immediate every
3748 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3749 // to offset by the first index of each group.
3750 for (unsigned i = 0; i != NumElts; ++i)
3751 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3752
3753 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3754
3755 if (CI->arg_size() == 4)
3756 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3757 CI->getArgOperand(2));
3758 } else if (Name == "sse2.pshufl.w" ||
3759 Name.starts_with("avx512.mask.pshufl.w.")) {
3760 Value *Op0 = CI->getArgOperand(0);
3761 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3762 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3763
3764 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3765 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3766
3767 SmallVector<int, 16> Idxs(NumElts);
3768 for (unsigned l = 0; l != NumElts; l += 8) {
3769 for (unsigned i = 0; i != 4; ++i)
3770 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3771 for (unsigned i = 4; i != 8; ++i)
3772 Idxs[i + l] = i + l;
3773 }
3774
3775 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3776
3777 if (CI->arg_size() == 4)
3778 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3779 CI->getArgOperand(2));
3780 } else if (Name == "sse2.pshufh.w" ||
3781 Name.starts_with("avx512.mask.pshufh.w.")) {
3782 Value *Op0 = CI->getArgOperand(0);
3783 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3784 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3785
3786 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3787 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3788
3789 SmallVector<int, 16> Idxs(NumElts);
3790 for (unsigned l = 0; l != NumElts; l += 8) {
3791 for (unsigned i = 0; i != 4; ++i)
3792 Idxs[i + l] = i + l;
3793 for (unsigned i = 0; i != 4; ++i)
3794 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3795 }
3796
3797 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3798
3799 if (CI->arg_size() == 4)
3800 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3801 CI->getArgOperand(2));
3802 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3803 Value *Op0 = CI->getArgOperand(0);
3804 Value *Op1 = CI->getArgOperand(1);
3805 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3806 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3807
3808 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3809 unsigned HalfLaneElts = NumLaneElts / 2;
3810
3811 SmallVector<int, 16> Idxs(NumElts);
3812 for (unsigned i = 0; i != NumElts; ++i) {
3813 // Base index is the starting element of the lane.
3814 Idxs[i] = i - (i % NumLaneElts);
3815 // If we are half way through the lane switch to the other source.
3816 if ((i % NumLaneElts) >= HalfLaneElts)
3817 Idxs[i] += NumElts;
3818 // Now select the specific element. By adding HalfLaneElts bits from
3819 // the immediate. Wrapping around the immediate every 8-bits.
3820 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3821 }
3822
3823 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3824
3825 Rep =
3826 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3827 } else if (Name.starts_with("avx512.mask.movddup") ||
3828 Name.starts_with("avx512.mask.movshdup") ||
3829 Name.starts_with("avx512.mask.movsldup")) {
3830 Value *Op0 = CI->getArgOperand(0);
3831 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3832 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3833
3834 unsigned Offset = 0;
3835 if (Name.starts_with("avx512.mask.movshdup."))
3836 Offset = 1;
3837
3838 SmallVector<int, 16> Idxs(NumElts);
3839 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3840 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3841 Idxs[i + l + 0] = i + l + Offset;
3842 Idxs[i + l + 1] = i + l + Offset;
3843 }
3844
3845 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3846
3847 Rep =
3848 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3849 } else if (Name.starts_with("avx512.mask.punpckl") ||
3850 Name.starts_with("avx512.mask.unpckl.")) {
3851 Value *Op0 = CI->getArgOperand(0);
3852 Value *Op1 = CI->getArgOperand(1);
3853 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3854 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3855
3856 SmallVector<int, 64> Idxs(NumElts);
3857 for (int l = 0; l != NumElts; l += NumLaneElts)
3858 for (int i = 0; i != NumLaneElts; ++i)
3859 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3860
3861 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3862
3863 Rep =
3864 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3865 } else if (Name.starts_with("avx512.mask.punpckh") ||
3866 Name.starts_with("avx512.mask.unpckh.")) {
3867 Value *Op0 = CI->getArgOperand(0);
3868 Value *Op1 = CI->getArgOperand(1);
3869 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3870 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3871
3872 SmallVector<int, 64> Idxs(NumElts);
3873 for (int l = 0; l != NumElts; l += NumLaneElts)
3874 for (int i = 0; i != NumLaneElts; ++i)
3875 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3876
3877 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3878
3879 Rep =
3880 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3881 } else if (Name.starts_with("avx512.mask.and.") ||
3882 Name.starts_with("avx512.mask.pand.")) {
3883 VectorType *FTy = cast<VectorType>(CI->getType());
3885 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3886 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3887 Rep = Builder.CreateBitCast(Rep, FTy);
3888 Rep =
3889 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3890 } else if (Name.starts_with("avx512.mask.andn.") ||
3891 Name.starts_with("avx512.mask.pandn.")) {
3892 VectorType *FTy = cast<VectorType>(CI->getType());
3894 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3895 Rep = Builder.CreateAnd(Rep,
3896 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3897 Rep = Builder.CreateBitCast(Rep, FTy);
3898 Rep =
3899 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3900 } else if (Name.starts_with("avx512.mask.or.") ||
3901 Name.starts_with("avx512.mask.por.")) {
3902 VectorType *FTy = cast<VectorType>(CI->getType());
3904 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3905 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3906 Rep = Builder.CreateBitCast(Rep, FTy);
3907 Rep =
3908 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3909 } else if (Name.starts_with("avx512.mask.xor.") ||
3910 Name.starts_with("avx512.mask.pxor.")) {
3911 VectorType *FTy = cast<VectorType>(CI->getType());
3913 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3914 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3915 Rep = Builder.CreateBitCast(Rep, FTy);
3916 Rep =
3917 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3918 } else if (Name.starts_with("avx512.mask.padd.")) {
3919 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3920 Rep =
3921 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3922 } else if (Name.starts_with("avx512.mask.psub.")) {
3923 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3924 Rep =
3925 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3926 } else if (Name.starts_with("avx512.mask.pmull.")) {
3927 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3928 Rep =
3929 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3930 } else if (Name.starts_with("avx512.mask.add.p")) {
3931 if (Name.ends_with(".512")) {
3932 Intrinsic::ID IID;
3933 if (Name[17] == 's')
3934 IID = Intrinsic::x86_avx512_add_ps_512;
3935 else
3936 IID = Intrinsic::x86_avx512_add_pd_512;
3937
3938 Rep = Builder.CreateIntrinsic(
3939 IID,
3940 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3941 } else {
3942 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3943 }
3944 Rep =
3945 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3946 } else if (Name.starts_with("avx512.mask.div.p")) {
3947 if (Name.ends_with(".512")) {
3948 Intrinsic::ID IID;
3949 if (Name[17] == 's')
3950 IID = Intrinsic::x86_avx512_div_ps_512;
3951 else
3952 IID = Intrinsic::x86_avx512_div_pd_512;
3953
3954 Rep = Builder.CreateIntrinsic(
3955 IID,
3956 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3957 } else {
3958 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3959 }
3960 Rep =
3961 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3962 } else if (Name.starts_with("avx512.mask.mul.p")) {
3963 if (Name.ends_with(".512")) {
3964 Intrinsic::ID IID;
3965 if (Name[17] == 's')
3966 IID = Intrinsic::x86_avx512_mul_ps_512;
3967 else
3968 IID = Intrinsic::x86_avx512_mul_pd_512;
3969
3970 Rep = Builder.CreateIntrinsic(
3971 IID,
3972 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3973 } else {
3974 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3975 }
3976 Rep =
3977 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3978 } else if (Name.starts_with("avx512.mask.sub.p")) {
3979 if (Name.ends_with(".512")) {
3980 Intrinsic::ID IID;
3981 if (Name[17] == 's')
3982 IID = Intrinsic::x86_avx512_sub_ps_512;
3983 else
3984 IID = Intrinsic::x86_avx512_sub_pd_512;
3985
3986 Rep = Builder.CreateIntrinsic(
3987 IID,
3988 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3989 } else {
3990 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3991 }
3992 Rep =
3993 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3994 } else if ((Name.starts_with("avx512.mask.max.p") ||
3995 Name.starts_with("avx512.mask.min.p")) &&
3996 Name.drop_front(18) == ".512") {
3997 bool IsDouble = Name[17] == 'd';
3998 bool IsMin = Name[13] == 'i';
3999 static const Intrinsic::ID MinMaxTbl[2][2] = {
4000 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
4001 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
4002 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
4003
4004 Rep = Builder.CreateIntrinsic(
4005 IID,
4006 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
4007 Rep =
4008 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
4009 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
4010 Rep =
4011 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
4012 {CI->getArgOperand(0), Builder.getInt1(false)});
4013 Rep =
4014 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
4015 } else if (Name.starts_with("avx512.mask.psll")) {
4016 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4017 bool IsVariable = Name[16] == 'v';
4018 char Size = Name[16] == '.' ? Name[17]
4019 : Name[17] == '.' ? Name[18]
4020 : Name[18] == '.' ? Name[19]
4021 : Name[20];
4022
4023 Intrinsic::ID IID;
4024 if (IsVariable && Name[17] != '.') {
4025 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
4026 IID = Intrinsic::x86_avx2_psllv_q;
4027 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
4028 IID = Intrinsic::x86_avx2_psllv_q_256;
4029 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
4030 IID = Intrinsic::x86_avx2_psllv_d;
4031 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
4032 IID = Intrinsic::x86_avx2_psllv_d_256;
4033 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
4034 IID = Intrinsic::x86_avx512_psllv_w_128;
4035 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
4036 IID = Intrinsic::x86_avx512_psllv_w_256;
4037 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
4038 IID = Intrinsic::x86_avx512_psllv_w_512;
4039 else
4040 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4041 } else if (Name.ends_with(".128")) {
4042 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
4043 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
4044 : Intrinsic::x86_sse2_psll_d;
4045 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
4046 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
4047 : Intrinsic::x86_sse2_psll_q;
4048 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
4049 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
4050 : Intrinsic::x86_sse2_psll_w;
4051 else
4052 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4053 } else if (Name.ends_with(".256")) {
4054 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
4055 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
4056 : Intrinsic::x86_avx2_psll_d;
4057 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
4058 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
4059 : Intrinsic::x86_avx2_psll_q;
4060 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
4061 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
4062 : Intrinsic::x86_avx2_psll_w;
4063 else
4064 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4065 } else {
4066 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
4067 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
4068 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
4069 : Intrinsic::x86_avx512_psll_d_512;
4070 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
4071 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
4072 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
4073 : Intrinsic::x86_avx512_psll_q_512;
4074 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
4075 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
4076 : Intrinsic::x86_avx512_psll_w_512;
4077 else
4078 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4079 }
4080
4081 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4082 } else if (Name.starts_with("avx512.mask.psrl")) {
4083 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4084 bool IsVariable = Name[16] == 'v';
4085 char Size = Name[16] == '.' ? Name[17]
4086 : Name[17] == '.' ? Name[18]
4087 : Name[18] == '.' ? Name[19]
4088 : Name[20];
4089
4090 Intrinsic::ID IID;
4091 if (IsVariable && Name[17] != '.') {
4092 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
4093 IID = Intrinsic::x86_avx2_psrlv_q;
4094 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
4095 IID = Intrinsic::x86_avx2_psrlv_q_256;
4096 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4097 IID = Intrinsic::x86_avx2_psrlv_d;
4098 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4099 IID = Intrinsic::x86_avx2_psrlv_d_256;
4100 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4101 IID = Intrinsic::x86_avx512_psrlv_w_128;
4102 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4103 IID = Intrinsic::x86_avx512_psrlv_w_256;
4104 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4105 IID = Intrinsic::x86_avx512_psrlv_w_512;
4106 else
4107 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4108 } else if (Name.ends_with(".128")) {
4109 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4110 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4111 : Intrinsic::x86_sse2_psrl_d;
4112 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4113 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4114 : Intrinsic::x86_sse2_psrl_q;
4115 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4116 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4117 : Intrinsic::x86_sse2_psrl_w;
4118 else
4119 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4120 } else if (Name.ends_with(".256")) {
4121 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4122 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4123 : Intrinsic::x86_avx2_psrl_d;
4124 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4125 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4126 : Intrinsic::x86_avx2_psrl_q;
4127 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4128 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4129 : Intrinsic::x86_avx2_psrl_w;
4130 else
4131 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4132 } else {
4133 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4134 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4135 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4136 : Intrinsic::x86_avx512_psrl_d_512;
4137 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4138 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4139 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4140 : Intrinsic::x86_avx512_psrl_q_512;
4141 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4142 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4143 : Intrinsic::x86_avx512_psrl_w_512;
4144 else
4145 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4146 }
4147
4148 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4149 } else if (Name.starts_with("avx512.mask.psra")) {
4150 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4151 bool IsVariable = Name[16] == 'v';
4152 char Size = Name[16] == '.' ? Name[17]
4153 : Name[17] == '.' ? Name[18]
4154 : Name[18] == '.' ? Name[19]
4155 : Name[20];
4156
4157 Intrinsic::ID IID;
4158 if (IsVariable && Name[17] != '.') {
4159 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4160 IID = Intrinsic::x86_avx2_psrav_d;
4161 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4162 IID = Intrinsic::x86_avx2_psrav_d_256;
4163 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4164 IID = Intrinsic::x86_avx512_psrav_w_128;
4165 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4166 IID = Intrinsic::x86_avx512_psrav_w_256;
4167 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4168 IID = Intrinsic::x86_avx512_psrav_w_512;
4169 else
4170 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4171 } else if (Name.ends_with(".128")) {
4172 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4173 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4174 : Intrinsic::x86_sse2_psra_d;
4175 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4176 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4177 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4178 : Intrinsic::x86_avx512_psra_q_128;
4179 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4180 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4181 : Intrinsic::x86_sse2_psra_w;
4182 else
4183 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4184 } else if (Name.ends_with(".256")) {
4185 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4186 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4187 : Intrinsic::x86_avx2_psra_d;
4188 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4189 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4190 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4191 : Intrinsic::x86_avx512_psra_q_256;
4192 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4193 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4194 : Intrinsic::x86_avx2_psra_w;
4195 else
4196 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4197 } else {
4198 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4199 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4200 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4201 : Intrinsic::x86_avx512_psra_d_512;
4202 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4203 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4204 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4205 : Intrinsic::x86_avx512_psra_q_512;
4206 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4207 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4208 : Intrinsic::x86_avx512_psra_w_512;
4209 else
4210 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4211 }
4212
4213 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4214 } else if (Name.starts_with("avx512.mask.move.s")) {
4215 Rep = upgradeMaskedMove(Builder, *CI);
4216 } else if (Name.starts_with("avx512.cvtmask2")) {
4217 Rep = upgradeMaskToInt(Builder, *CI);
4218 } else if (Name.ends_with(".movntdqa")) {
4220 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4221
4222 LoadInst *LI = Builder.CreateAlignedLoad(
4223 CI->getType(), CI->getArgOperand(0),
4225 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4226 Rep = LI;
4227 } else if (Name.starts_with("fma.vfmadd.") ||
4228 Name.starts_with("fma.vfmsub.") ||
4229 Name.starts_with("fma.vfnmadd.") ||
4230 Name.starts_with("fma.vfnmsub.")) {
4231 bool NegMul = Name[6] == 'n';
4232 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4233 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4234
4235 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4236 CI->getArgOperand(2)};
4237
4238 if (IsScalar) {
4239 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4240 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4241 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4242 }
4243
4244 if (NegMul && !IsScalar)
4245 Ops[0] = Builder.CreateFNeg(Ops[0]);
4246 if (NegMul && IsScalar)
4247 Ops[1] = Builder.CreateFNeg(Ops[1]);
4248 if (NegAcc)
4249 Ops[2] = Builder.CreateFNeg(Ops[2]);
4250
4251 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4252
4253 if (IsScalar)
4254 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4255 } else if (Name.starts_with("fma4.vfmadd.s")) {
4256 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4257 CI->getArgOperand(2)};
4258
4259 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4260 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4261 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4262
4263 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4264
4265 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4266 Rep, (uint64_t)0);
4267 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4268 Name.starts_with("avx512.maskz.vfmadd.s") ||
4269 Name.starts_with("avx512.mask3.vfmadd.s") ||
4270 Name.starts_with("avx512.mask3.vfmsub.s") ||
4271 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4272 bool IsMask3 = Name[11] == '3';
4273 bool IsMaskZ = Name[11] == 'z';
4274 // Drop the "avx512.mask." to make it easier.
4275 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4276 bool NegMul = Name[2] == 'n';
4277 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4278
4279 Value *A = CI->getArgOperand(0);
4280 Value *B = CI->getArgOperand(1);
4281 Value *C = CI->getArgOperand(2);
4282
4283 if (NegMul && (IsMask3 || IsMaskZ))
4284 A = Builder.CreateFNeg(A);
4285 if (NegMul && !(IsMask3 || IsMaskZ))
4286 B = Builder.CreateFNeg(B);
4287 if (NegAcc)
4288 C = Builder.CreateFNeg(C);
4289
4290 A = Builder.CreateExtractElement(A, (uint64_t)0);
4291 B = Builder.CreateExtractElement(B, (uint64_t)0);
4292 C = Builder.CreateExtractElement(C, (uint64_t)0);
4293
4294 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4295 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4296 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4297
4298 Intrinsic::ID IID;
4299 if (Name.back() == 'd')
4300 IID = Intrinsic::x86_avx512_vfmadd_f64;
4301 else
4302 IID = Intrinsic::x86_avx512_vfmadd_f32;
4303 Rep = Builder.CreateIntrinsic(IID, Ops);
4304 } else {
4305 Rep = Builder.CreateFMA(A, B, C);
4306 }
4307
4308 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4309 : IsMask3 ? C
4310 : A;
4311
4312 // For Mask3 with NegAcc, we need to create a new extractelement that
4313 // avoids the negation above.
4314 if (NegAcc && IsMask3)
4315 PassThru =
4316 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4317
4318 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4319 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4320 (uint64_t)0);
4321 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4322 Name.starts_with("avx512.mask.vfnmadd.p") ||
4323 Name.starts_with("avx512.mask.vfnmsub.p") ||
4324 Name.starts_with("avx512.mask3.vfmadd.p") ||
4325 Name.starts_with("avx512.mask3.vfmsub.p") ||
4326 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4327 Name.starts_with("avx512.maskz.vfmadd.p")) {
4328 bool IsMask3 = Name[11] == '3';
4329 bool IsMaskZ = Name[11] == 'z';
4330 // Drop the "avx512.mask." to make it easier.
4331 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4332 bool NegMul = Name[2] == 'n';
4333 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4334
4335 Value *A = CI->getArgOperand(0);
4336 Value *B = CI->getArgOperand(1);
4337 Value *C = CI->getArgOperand(2);
4338
4339 if (NegMul && (IsMask3 || IsMaskZ))
4340 A = Builder.CreateFNeg(A);
4341 if (NegMul && !(IsMask3 || IsMaskZ))
4342 B = Builder.CreateFNeg(B);
4343 if (NegAcc)
4344 C = Builder.CreateFNeg(C);
4345
4346 if (CI->arg_size() == 5 &&
4347 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4348 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4349 Intrinsic::ID IID;
4350 // Check the character before ".512" in string.
4351 if (Name[Name.size() - 5] == 's')
4352 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4353 else
4354 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4355
4356 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4357 } else {
4358 Rep = Builder.CreateFMA(A, B, C);
4359 }
4360
4361 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4362 : IsMask3 ? CI->getArgOperand(2)
4363 : CI->getArgOperand(0);
4364
4365 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4366 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4367 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4368 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4369 Intrinsic::ID IID;
4370 if (VecWidth == 128 && EltWidth == 32)
4371 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4372 else if (VecWidth == 256 && EltWidth == 32)
4373 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4374 else if (VecWidth == 128 && EltWidth == 64)
4375 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4376 else if (VecWidth == 256 && EltWidth == 64)
4377 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4378 else
4379 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4380
4381 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4382 CI->getArgOperand(2)};
4383 Ops[2] = Builder.CreateFNeg(Ops[2]);
4384 Rep = Builder.CreateIntrinsic(IID, Ops);
4385 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4386 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4387 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4388 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4389 bool IsMask3 = Name[11] == '3';
4390 bool IsMaskZ = Name[11] == 'z';
4391 // Drop the "avx512.mask." to make it easier.
4392 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4393 bool IsSubAdd = Name[3] == 's';
4394 if (CI->arg_size() == 5) {
4395 Intrinsic::ID IID;
4396 // Check the character before ".512" in string.
4397 if (Name[Name.size() - 5] == 's')
4398 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4399 else
4400 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4401
4402 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4403 CI->getArgOperand(2), CI->getArgOperand(4)};
4404 if (IsSubAdd)
4405 Ops[2] = Builder.CreateFNeg(Ops[2]);
4406
4407 Rep = Builder.CreateIntrinsic(IID, Ops);
4408 } else {
4409 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4410
4411 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4412 CI->getArgOperand(2)};
4413
4415 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4416 Value *Odd = Builder.CreateCall(FMA, Ops);
4417 Ops[2] = Builder.CreateFNeg(Ops[2]);
4418 Value *Even = Builder.CreateCall(FMA, Ops);
4419
4420 if (IsSubAdd)
4421 std::swap(Even, Odd);
4422
4423 SmallVector<int, 32> Idxs(NumElts);
4424 for (int i = 0; i != NumElts; ++i)
4425 Idxs[i] = i + (i % 2) * NumElts;
4426
4427 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4428 }
4429
4430 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4431 : IsMask3 ? CI->getArgOperand(2)
4432 : CI->getArgOperand(0);
4433
4434 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4435 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4436 Name.starts_with("avx512.maskz.pternlog.")) {
4437 bool ZeroMask = Name[11] == 'z';
4438 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4439 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4440 Intrinsic::ID IID;
4441 if (VecWidth == 128 && EltWidth == 32)
4442 IID = Intrinsic::x86_avx512_pternlog_d_128;
4443 else if (VecWidth == 256 && EltWidth == 32)
4444 IID = Intrinsic::x86_avx512_pternlog_d_256;
4445 else if (VecWidth == 512 && EltWidth == 32)
4446 IID = Intrinsic::x86_avx512_pternlog_d_512;
4447 else if (VecWidth == 128 && EltWidth == 64)
4448 IID = Intrinsic::x86_avx512_pternlog_q_128;
4449 else if (VecWidth == 256 && EltWidth == 64)
4450 IID = Intrinsic::x86_avx512_pternlog_q_256;
4451 else if (VecWidth == 512 && EltWidth == 64)
4452 IID = Intrinsic::x86_avx512_pternlog_q_512;
4453 else
4454 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4455
4456 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4457 CI->getArgOperand(2), CI->getArgOperand(3)};
4458 Rep = Builder.CreateIntrinsic(IID, Args);
4459 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4460 : CI->getArgOperand(0);
4461 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4462 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4463 Name.starts_with("avx512.maskz.vpmadd52")) {
4464 bool ZeroMask = Name[11] == 'z';
4465 bool High = Name[20] == 'h' || Name[21] == 'h';
4466 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4467 Intrinsic::ID IID;
4468 if (VecWidth == 128 && !High)
4469 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4470 else if (VecWidth == 256 && !High)
4471 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4472 else if (VecWidth == 512 && !High)
4473 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4474 else if (VecWidth == 128 && High)
4475 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4476 else if (VecWidth == 256 && High)
4477 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4478 else if (VecWidth == 512 && High)
4479 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4480 else
4481 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4482
4483 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4484 CI->getArgOperand(2)};
4485 Rep = Builder.CreateIntrinsic(IID, Args);
4486 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4487 : CI->getArgOperand(0);
4488 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4489 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4490 Name.starts_with("avx512.mask.vpermt2var.") ||
4491 Name.starts_with("avx512.maskz.vpermt2var.")) {
4492 bool ZeroMask = Name[11] == 'z';
4493 bool IndexForm = Name[17] == 'i';
4494 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4495 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4496 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4497 Name.starts_with("avx512.mask.vpdpbusds.") ||
4498 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4499 bool ZeroMask = Name[11] == 'z';
4500 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4501 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4502 Intrinsic::ID IID;
4503 if (VecWidth == 128 && !IsSaturating)
4504 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4505 else if (VecWidth == 256 && !IsSaturating)
4506 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4507 else if (VecWidth == 512 && !IsSaturating)
4508 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4509 else if (VecWidth == 128 && IsSaturating)
4510 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4511 else if (VecWidth == 256 && IsSaturating)
4512 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4513 else if (VecWidth == 512 && IsSaturating)
4514 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4515 else
4516 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4517
4518 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4519 CI->getArgOperand(2)};
4520
4521 // Input arguments types were incorrectly set to vectors of i32 before but
4522 // they should be vectors of i8. Insert bit cast when encountering the old
4523 // types
4524 if (Args[1]->getType()->isVectorTy() &&
4525 cast<VectorType>(Args[1]->getType())
4526 ->getElementType()
4527 ->isIntegerTy(32) &&
4528 Args[2]->getType()->isVectorTy() &&
4529 cast<VectorType>(Args[2]->getType())
4530 ->getElementType()
4531 ->isIntegerTy(32)) {
4532 Type *NewArgType = nullptr;
4533 if (VecWidth == 128)
4534 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4535 else if (VecWidth == 256)
4536 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4537 else if (VecWidth == 512)
4538 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4539 else
4540 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4541 CI);
4542
4543 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4544 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4545 }
4546
4547 Rep = Builder.CreateIntrinsic(IID, Args);
4548 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4549 : CI->getArgOperand(0);
4550 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4551 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4552 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4553 Name.starts_with("avx512.mask.vpdpwssds.") ||
4554 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4555 bool ZeroMask = Name[11] == 'z';
4556 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4557 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4558 Intrinsic::ID IID;
4559 if (VecWidth == 128 && !IsSaturating)
4560 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4561 else if (VecWidth == 256 && !IsSaturating)
4562 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4563 else if (VecWidth == 512 && !IsSaturating)
4564 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4565 else if (VecWidth == 128 && IsSaturating)
4566 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4567 else if (VecWidth == 256 && IsSaturating)
4568 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4569 else if (VecWidth == 512 && IsSaturating)
4570 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4571 else
4572 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4573
4574 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4575 CI->getArgOperand(2)};
4576
4577 // Input arguments types were incorrectly set to vectors of i32 before but
4578 // they should be vectors of i16. Insert bit cast when encountering the old
4579 // types
4580 if (Args[1]->getType()->isVectorTy() &&
4581 cast<VectorType>(Args[1]->getType())
4582 ->getElementType()
4583 ->isIntegerTy(32) &&
4584 Args[2]->getType()->isVectorTy() &&
4585 cast<VectorType>(Args[2]->getType())
4586 ->getElementType()
4587 ->isIntegerTy(32)) {
4588 Type *NewArgType = nullptr;
4589 if (VecWidth == 128)
4590 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4591 else if (VecWidth == 256)
4592 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4593 else if (VecWidth == 512)
4594 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4595 else
4596 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4597 CI);
4598
4599 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4600 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4601 }
4602
4603 Rep = Builder.CreateIntrinsic(IID, Args);
4604 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4605 : CI->getArgOperand(0);
4606 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4607 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4608 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4609 Name == "subborrow.u32" || Name == "subborrow.u64") {
4610 Intrinsic::ID IID;
4611 if (Name[0] == 'a' && Name.back() == '2')
4612 IID = Intrinsic::x86_addcarry_32;
4613 else if (Name[0] == 'a' && Name.back() == '4')
4614 IID = Intrinsic::x86_addcarry_64;
4615 else if (Name[0] == 's' && Name.back() == '2')
4616 IID = Intrinsic::x86_subborrow_32;
4617 else if (Name[0] == 's' && Name.back() == '4')
4618 IID = Intrinsic::x86_subborrow_64;
4619 else
4620 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4621
4622 // Make a call with 3 operands.
4623 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4624 CI->getArgOperand(2)};
4625 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4626
4627 // Extract the second result and store it.
4628 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4629 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4630 // Replace the original call result with the first result of the new call.
4631 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4632
4633 CI->replaceAllUsesWith(CF);
4634 Rep = nullptr;
4635 } else if (Name.starts_with("avx512.mask.") &&
4636 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4637 // Rep will be updated by the call in the condition.
4638 } else if (Name.starts_with("bmi.pdep.")) {
4639 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pdep);
4640 } else if (Name.starts_with("bmi.pext.")) {
4641 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::pext);
4642 } else
4643 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4644
4645 return Rep;
4646}
4647
4649 Function *F, IRBuilder<> &Builder) {
4650 if (Name.starts_with("neon.bfcvt")) {
4651 if (Name.starts_with("neon.bfcvtn2")) {
4652 SmallVector<int, 32> LoMask(4);
4653 std::iota(LoMask.begin(), LoMask.end(), 0);
4654 SmallVector<int, 32> ConcatMask(8);
4655 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4656 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4657 Value *Trunc =
4658 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4659 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4660 } else if (Name.starts_with("neon.bfcvtn")) {
4661 SmallVector<int, 32> ConcatMask(8);
4662 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4663 Type *V4BF16 =
4664 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4665 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4666 dbgs() << "Trunc: " << *Trunc << "\n";
4667 return Builder.CreateShuffleVector(
4668 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4669 } else {
4670 return Builder.CreateFPTrunc(CI->getOperand(0),
4671 Type::getBFloatTy(F->getContext()));
4672 }
4673 } else if (Name.starts_with("sve.fcvt")) {
4674 Intrinsic::ID NewID =
4676 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4677 .Case("sve.fcvtnt.bf16f32",
4678 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4680 if (NewID == Intrinsic::not_intrinsic)
4681 llvm_unreachable("Unhandled Intrinsic!");
4682
4683 SmallVector<Value *, 3> Args(CI->args());
4684
4685 // The original intrinsics incorrectly used a predicate based on the
4686 // smallest element type rather than the largest.
4687 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4688 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4689
4690 if (Args[1]->getType() != BadPredTy)
4691 llvm_unreachable("Unexpected predicate type!");
4692
4693 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4694 BadPredTy, Args[1]);
4695 Args[1] = Builder.CreateIntrinsic(
4696 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4697
4698 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4699 CI->getName());
4700 }
4701
4702 if (Name == "neon.vcvtfp2hf")
4703 return Builder.CreateBitCast(
4704 Builder.CreateFPTrunc(
4705 CI->getOperand(0),
4706 FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
4707 FixedVectorType::get(Type::getInt16Ty(F->getContext()), 4));
4708 if (Name == "neon.vcvthf2fp")
4709 return Builder.CreateFPExt(
4710 Builder.CreateBitCast(
4711 CI->getOperand(0),
4712 FixedVectorType::get(Type::getHalfTy(F->getContext()), 4)),
4713 FixedVectorType::get(Type::getFloatTy(F->getContext()), 4));
4714
4715 llvm_unreachable("Unhandled Intrinsic!");
4716}
4717
4719 IRBuilder<> &Builder) {
4720 if (Name == "mve.vctp64.old") {
4721 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4722 // correct type.
4723 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4724 CI->getArgOperand(0),
4725 /*FMFSource=*/nullptr, CI->getName());
4726 Value *C1 = Builder.CreateIntrinsic(
4727 Intrinsic::arm_mve_pred_v2i,
4728 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4729 return Builder.CreateIntrinsic(
4730 Intrinsic::arm_mve_pred_i2v,
4731 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4732 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4733 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4734 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4735 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4736 Name ==
4737 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4738 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4739 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4740 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4741 Name ==
4742 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4743 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4744 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4745 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4746 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4747 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4748 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4749 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4750 std::vector<Type *> Tys;
4751 unsigned ID = CI->getIntrinsicID();
4752 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4753 switch (ID) {
4754 case Intrinsic::arm_mve_mull_int_predicated:
4755 case Intrinsic::arm_mve_vqdmull_predicated:
4756 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4757 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4758 break;
4759 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4760 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4761 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4762 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4763 V2I1Ty};
4764 break;
4765 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4766 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4767 CI->getOperand(1)->getType(), V2I1Ty};
4768 break;
4769 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4770 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4771 CI->getOperand(2)->getType(), V2I1Ty};
4772 break;
4773 case Intrinsic::arm_cde_vcx1q_predicated:
4774 case Intrinsic::arm_cde_vcx1qa_predicated:
4775 case Intrinsic::arm_cde_vcx2q_predicated:
4776 case Intrinsic::arm_cde_vcx2qa_predicated:
4777 case Intrinsic::arm_cde_vcx3q_predicated:
4778 case Intrinsic::arm_cde_vcx3qa_predicated:
4779 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4780 break;
4781 default:
4782 llvm_unreachable("Unhandled Intrinsic!");
4783 }
4784
4785 std::vector<Value *> Ops;
4786 for (Value *Op : CI->args()) {
4787 Type *Ty = Op->getType();
4788 if (Ty->getScalarSizeInBits() == 1) {
4789 Value *C1 = Builder.CreateIntrinsic(
4790 Intrinsic::arm_mve_pred_v2i,
4791 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4792 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4793 }
4794 Ops.push_back(Op);
4795 }
4796
4797 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4798 CI->getName());
4799 }
4800 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4801}
4802
4803// These are expected to have the arguments:
4804// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4805//
4806// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4807//
4809 Function *F, IRBuilder<> &Builder) {
4810 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4811 // for compatibility.
4812 auto UpgradeLegacyWMMAIUIntrinsicCall =
4813 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4814 ArrayRef<Type *> OverloadTys) -> Value * {
4815 // Prepare arguments, append clamp=0 for compatibility
4816 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4817 Args.push_back(Builder.getFalse());
4818
4819 // Insert the declaration for the right overload types
4821 F->getParent(), F->getIntrinsicID(), OverloadTys);
4822
4823 // Copy operand bundles if any
4825 CI->getOperandBundlesAsDefs(Bundles);
4826
4827 // Create the new call and copy calling properties
4828 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4829 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4830 NewCall->setCallingConv(CI->getCallingConv());
4831 NewCall->setAttributes(CI->getAttributes());
4832 NewCall->setDebugLoc(CI->getDebugLoc());
4833 NewCall->copyMetadata(*CI);
4834 return NewCall;
4835 };
4836
4837 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4838 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4839 "intrinsic should have 7 arguments");
4840 Type *T1 = CI->getArgOperand(4)->getType();
4841 Type *T2 = CI->getArgOperand(1)->getType();
4842 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4843 }
4844 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4845 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4846 "intrinsic should have 8 arguments");
4847 Type *T1 = CI->getArgOperand(4)->getType();
4848 Type *T2 = CI->getArgOperand(1)->getType();
4849 Type *T3 = CI->getArgOperand(3)->getType();
4850 Type *T4 = CI->getArgOperand(5)->getType();
4851 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4852 }
4853
4854 switch (F->getIntrinsicID()) {
4855 default:
4856 break;
4857 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4858 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4859 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4860 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4861 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4862 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4863 // Drop src0 and src1 modifiers.
4864 const Value *Op0 = CI->getArgOperand(0);
4865 const Value *Op2 = CI->getArgOperand(2);
4866 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4867 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4868 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4869 if (!ModA->isZero() || !ModB->isZero())
4870 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4871
4873 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4874 Args.push_back(CI->getArgOperand(I));
4875
4876 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4877 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4878 Overloads.push_back(Args[3]->getType());
4880 F->getParent(), F->getIntrinsicID(), Overloads);
4881
4883 CI->getOperandBundlesAsDefs(Bundles);
4884
4885 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4886 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4887 NewCall->setCallingConv(CI->getCallingConv());
4888 NewCall->setAttributes(CI->getAttributes());
4889 NewCall->setDebugLoc(CI->getDebugLoc());
4890 NewCall->copyMetadata(*CI);
4891 NewCall->takeName(CI);
4892 return NewCall;
4893 }
4894 }
4895
4896 AtomicRMWInst::BinOp RMWOp =
4898 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4899 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4900 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4901 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4902 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4903 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4904 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4905 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4906 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4907 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4908 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4909 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4910 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4911
4912 unsigned NumOperands = CI->getNumOperands();
4913 if (NumOperands < 3) // Malformed bitcode.
4914 return nullptr;
4915
4916 Value *Ptr = CI->getArgOperand(0);
4917 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4918 if (!PtrTy) // Malformed.
4919 return nullptr;
4920
4921 Value *Val = CI->getArgOperand(1);
4922 if (Val->getType() != CI->getType()) // Malformed.
4923 return nullptr;
4924
4925 ConstantInt *OrderArg = nullptr;
4926 bool IsVolatile = false;
4927
4928 // These should have 5 arguments (plus the callee). A separate version of the
4929 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4930 if (NumOperands > 3)
4931 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4932
4933 // Ignore scope argument at 3
4934
4935 if (NumOperands > 5) {
4936 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4937 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4938 }
4939
4941 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4942 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4945
4946 LLVMContext &Ctx = F->getContext();
4947
4948 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4949 Type *RetTy = CI->getType();
4950 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4951 if (VT->getElementType()->isIntegerTy(16)) {
4952 VectorType *AsBF16 =
4953 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4954 Val = Builder.CreateBitCast(Val, AsBF16);
4955 }
4956 }
4957
4958 // The scope argument never really worked correctly. Use agent as the most
4959 // conservative option which should still always produce the instruction.
4960 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4961 AtomicRMWInst *RMW =
4962 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4963
4964 unsigned AddrSpace = PtrTy->getAddressSpace();
4965 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4966 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4967 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4968 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4969 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4970 }
4971
4972 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4973 MDBuilder MDB(F->getContext());
4974 MDNode *RangeNotPrivate =
4977 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4978 }
4979
4980 if (IsVolatile)
4981 RMW->setVolatile(true);
4982
4983 return Builder.CreateBitCast(RMW, RetTy);
4984}
4985
4986/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4987/// plain MDNode, as it's the verifier's job to check these are the correct
4988/// types later.
4989static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4990 if (Op < CI->arg_size()) {
4991 if (MetadataAsValue *MAV =
4993 Metadata *MD = MAV->getMetadata();
4994 return dyn_cast_if_present<MDNode>(MD);
4995 }
4996 }
4997 return nullptr;
4998}
4999
5000/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
5001static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
5002 if (Op < CI->arg_size())
5004 return MAV->getMetadata();
5005 return nullptr;
5006}
5007
5008/// Convert debug intrinsic calls to non-instruction debug records.
5009/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
5010/// \p CI - The debug intrinsic call.
5012 DbgRecord *DR = nullptr;
5013 if (Name == "label") {
5015 } else if (Name == "assign") {
5018 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
5019 unwrapMAVMetadataOp(CI, 4),
5020 /*The address is a Value ref, it will be stored as a Metadata */
5021 unwrapMAVOp(CI, 5));
5022 } else if (Name == "declare") {
5025 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr);
5026 } else if (Name == "addr") {
5027 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
5028 MDNode *ExprNode = unwrapMAVOp(CI, 2);
5029 // Don't try to add something to the expression if it's not an expression.
5030 // Instead, allow the verifier to fail later.
5031 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
5032 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
5033 }
5036 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr);
5037 } else if (Name == "value") {
5038 // An old version of dbg.value had an extra offset argument.
5039 unsigned VarOp = 1;
5040 unsigned ExprOp = 2;
5041 if (CI->arg_size() == 4) {
5043 // Nonzero offset dbg.values get dropped without a replacement.
5044 if (!Offset || !Offset->isNullValue())
5045 return;
5046 VarOp = 2;
5047 ExprOp = 3;
5048 }
5051 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
5052 nullptr);
5053 }
5054 DR->setDebugLoc(CI->getDebugLoc());
5055 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
5056 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
5057}
5058
5061 if (!Offset)
5062 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
5063 int64_t OffsetVal = Offset->getSExtValue();
5064 return Builder.CreateIntrinsic(OffsetVal >= 0
5065 ? Intrinsic::vector_splice_left
5066 : Intrinsic::vector_splice_right,
5067 CI->getType(),
5068 {CI->getArgOperand(0), CI->getArgOperand(1),
5069 Builder.getInt32(std::abs(OffsetVal))});
5070}
5071
5073 Function *F, IRBuilder<> &Builder) {
5074 if (Name.starts_with("to.fp16")) {
5075 Value *Cast =
5076 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
5077 return Builder.CreateBitCast(Cast, CI->getType());
5078 }
5079
5080 if (Name.starts_with("from.fp16")) {
5081 Value *Cast =
5082 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
5083 return Builder.CreateFPExt(Cast, CI->getType());
5084 }
5085
5086 return nullptr;
5087}
5088
5089/// Upgrade a call to an old intrinsic. All argument and return casting must be
5090/// provided to seamlessly integrate with existing context.
5092 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
5093 // checks the callee's function type matches. It's likely we need to handle
5094 // type changes here.
5096 if (!F)
5097 return;
5098
5099 LLVMContext &C = CI->getContext();
5100 IRBuilder<> Builder(C);
5101 if (isa<FPMathOperator>(CI))
5102 Builder.setFastMathFlags(CI->getFastMathFlags());
5103 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
5104
5105 if (!NewFn) {
5106 // Get the Function's name.
5107 StringRef Name = F->getName();
5108 if (!Name.consume_front("llvm."))
5109 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5110
5111 bool IsX86 = Name.consume_front("x86.");
5112 bool IsNVVM = Name.consume_front("nvvm.");
5113 bool IsAArch64 = Name.consume_front("aarch64.");
5114 bool IsARM = Name.consume_front("arm.");
5115 bool IsAMDGCN = Name.consume_front("amdgcn.");
5116 bool IsDbg = Name.consume_front("dbg.");
5117 bool IsOldSplice =
5118 (Name.consume_front("experimental.vector.splice") ||
5119 Name.consume_front("vector.splice")) &&
5120 !(Name.starts_with(".left") || Name.starts_with(".right"));
5121 Value *Rep = nullptr;
5122
5123 if (!IsX86 && Name == "stackprotectorcheck") {
5124 Rep = nullptr;
5125 } else if (IsNVVM) {
5126 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5127 } else if (IsX86) {
5128 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5129 } else if (IsAArch64) {
5130 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5131 } else if (IsARM) {
5132 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5133 } else if (IsAMDGCN) {
5134 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5135 } else if (IsDbg) {
5137 } else if (IsOldSplice) {
5138 Rep = upgradeVectorSplice(CI, Builder);
5139 } else if (Name.consume_front("convert.")) {
5140 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5141 } else if (Name == "lifetime.start.i64" || Name == "lifetime.end.i64") {
5142 // Delete calls to invalid @llvm.lifetime.{start,end}.i64 intrinsics.
5143 Rep = nullptr;
5144 } else {
5145 llvm_unreachable("Unknown function for CallBase upgrade.");
5146 }
5147
5148 if (Rep)
5149 CI->replaceAllUsesWith(Rep);
5150 CI->eraseFromParent();
5151 return;
5152 }
5153
5154 const auto &DefaultCase = [&]() -> void {
5155 if (F == NewFn)
5156 return;
5157
5158 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5159 // Handle generic mangling change.
5160 assert(
5161 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5162 "Unknown function for CallBase upgrade and isn't just a name change");
5163 CI->setCalledFunction(NewFn);
5164 return;
5165 }
5166
5167 // This must be an upgrade from a named to a literal struct.
5168 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5169 assert(OldST != NewFn->getReturnType() &&
5170 "Return type must have changed");
5171 assert(OldST->getNumElements() ==
5172 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5173 "Must have same number of elements");
5174
5175 SmallVector<Value *> Args(CI->args());
5176 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5177 NewCI->setAttributes(CI->getAttributes());
5178 Value *Res = PoisonValue::get(OldST);
5179 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5180 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5181 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5182 }
5183 CI->replaceAllUsesWith(Res);
5184 CI->eraseFromParent();
5185 return;
5186 }
5187
5188 // We're probably about to produce something invalid. Let the verifier catch
5189 // it instead of dying here.
5190 CI->setCalledOperand(
5192 return;
5193 };
5194 CallInst *NewCall = nullptr;
5195 switch (NewFn->getIntrinsicID()) {
5196 default: {
5197 DefaultCase();
5198 return;
5199 }
5200 case Intrinsic::arm_neon_vst1:
5201 case Intrinsic::arm_neon_vst2:
5202 case Intrinsic::arm_neon_vst3:
5203 case Intrinsic::arm_neon_vst4:
5204 case Intrinsic::arm_neon_vst2lane:
5205 case Intrinsic::arm_neon_vst3lane:
5206 case Intrinsic::arm_neon_vst4lane: {
5207 SmallVector<Value *, 4> Args(CI->args());
5208 NewCall = Builder.CreateCall(NewFn, Args);
5209 break;
5210 }
5211 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5212 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5213 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5214 LLVMContext &Ctx = F->getParent()->getContext();
5215 SmallVector<Value *, 4> Args(CI->args());
5216 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5217 cast<ConstantInt>(Args[3])->getZExtValue());
5218 NewCall = Builder.CreateCall(NewFn, Args);
5219 break;
5220 }
5221 case Intrinsic::aarch64_sve_ld3_sret:
5222 case Intrinsic::aarch64_sve_ld4_sret:
5223 case Intrinsic::aarch64_sve_ld2_sret: {
5224 // Is this a trivial remangle of the name to support ptr address spaces?
5225 if (isa<StructType>(F->getReturnType())) {
5226 DefaultCase();
5227 return;
5228 }
5229
5230 StringRef Name = F->getName();
5231 Name = Name.substr(5);
5232 unsigned N = StringSwitch<unsigned>(Name)
5233 .StartsWith("aarch64.sve.ld2", 2)
5234 .StartsWith("aarch64.sve.ld3", 3)
5235 .StartsWith("aarch64.sve.ld4", 4)
5236 .Default(0);
5237 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5238 unsigned MinElts = RetTy->getMinNumElements() / N;
5239 SmallVector<Value *, 2> Args(CI->args());
5240 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5241 Value *Ret = llvm::PoisonValue::get(RetTy);
5242 for (unsigned I = 0; I < N; I++) {
5243 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5244 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5245 }
5246 NewCall = dyn_cast<CallInst>(Ret);
5247 break;
5248 }
5249
5250 case Intrinsic::coro_end: {
5251 SmallVector<Value *, 3> Args(CI->args());
5252 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5253 NewCall = Builder.CreateCall(NewFn, Args);
5254 break;
5255 }
5256
5257 case Intrinsic::vector_extract: {
5258 StringRef Name = F->getName();
5259 Name = Name.substr(5); // Strip llvm
5260 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5261 DefaultCase();
5262 return;
5263 }
5264 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5265 unsigned MinElts = RetTy->getMinNumElements();
5266 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5267 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5268 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5269 break;
5270 }
5271
5272 case Intrinsic::vector_insert: {
5273 StringRef Name = F->getName();
5274 Name = Name.substr(5);
5275 if (!Name.starts_with("aarch64.sve.tuple")) {
5276 DefaultCase();
5277 return;
5278 }
5279 if (Name.starts_with("aarch64.sve.tuple.set")) {
5280 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5281 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5282 Value *NewIdx =
5283 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5284 NewCall = Builder.CreateCall(
5285 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5286 break;
5287 }
5288 if (Name.starts_with("aarch64.sve.tuple.create")) {
5289 unsigned N = StringSwitch<unsigned>(Name)
5290 .StartsWith("aarch64.sve.tuple.create2", 2)
5291 .StartsWith("aarch64.sve.tuple.create3", 3)
5292 .StartsWith("aarch64.sve.tuple.create4", 4)
5293 .Default(0);
5294 assert(N > 1 && "Create is expected to be between 2-4");
5295 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5296 Value *Ret = llvm::PoisonValue::get(RetTy);
5297 unsigned MinElts = RetTy->getMinNumElements() / N;
5298 for (unsigned I = 0; I < N; I++) {
5299 Value *V = CI->getArgOperand(I);
5300 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5301 }
5302 NewCall = dyn_cast<CallInst>(Ret);
5303 }
5304 break;
5305 }
5306
5307 case Intrinsic::arm_neon_bfdot:
5308 case Intrinsic::arm_neon_bfmmla:
5309 case Intrinsic::arm_neon_bfmlalb:
5310 case Intrinsic::arm_neon_bfmlalt:
5311 case Intrinsic::aarch64_neon_bfdot:
5312 case Intrinsic::aarch64_neon_bfmmla:
5313 case Intrinsic::aarch64_neon_bfmlalb:
5314 case Intrinsic::aarch64_neon_bfmlalt: {
5316 assert(CI->arg_size() == 3 &&
5317 "Mismatch between function args and call args");
5318 size_t OperandWidth =
5320 assert((OperandWidth == 64 || OperandWidth == 128) &&
5321 "Unexpected operand width");
5322 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5323 auto Iter = CI->args().begin();
5324 Args.push_back(*Iter++);
5325 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5326 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5327 NewCall = Builder.CreateCall(NewFn, Args);
5328 break;
5329 }
5330
5331 case Intrinsic::bitreverse:
5332 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5333 break;
5334
5335 case Intrinsic::ctlz:
5336 case Intrinsic::cttz: {
5337 if (CI->arg_size() != 1) {
5338 DefaultCase();
5339 return;
5340 }
5341
5342 NewCall =
5343 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5344 break;
5345 }
5346
5347 case Intrinsic::objectsize: {
5348 Value *NullIsUnknownSize =
5349 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5350 Value *Dynamic =
5351 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5352 NewCall = Builder.CreateCall(
5353 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5354 break;
5355 }
5356
5357 case Intrinsic::ctpop:
5358 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5359 break;
5360 case Intrinsic::dbg_value: {
5361 StringRef Name = F->getName();
5362 Name = Name.substr(5); // Strip llvm.
5363 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5364 if (Name.starts_with("dbg.addr")) {
5366 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5367 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5368 NewCall =
5369 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5370 MetadataAsValue::get(C, Expr)});
5371 break;
5372 }
5373
5374 // Upgrade from the old version that had an extra offset argument.
5375 assert(CI->arg_size() == 4);
5376 // Drop nonzero offsets instead of attempting to upgrade them.
5378 if (Offset->isNullValue()) {
5379 NewCall = Builder.CreateCall(
5380 NewFn,
5381 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5382 break;
5383 }
5384 CI->eraseFromParent();
5385 return;
5386 }
5387
5388 case Intrinsic::ptr_annotation:
5389 // Upgrade from versions that lacked the annotation attribute argument.
5390 if (CI->arg_size() != 4) {
5391 DefaultCase();
5392 return;
5393 }
5394
5395 // Create a new call with an added null annotation attribute argument.
5396 NewCall = Builder.CreateCall(
5397 NewFn,
5398 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5399 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5400 NewCall->takeName(CI);
5401 CI->replaceAllUsesWith(NewCall);
5402 CI->eraseFromParent();
5403 return;
5404
5405 case Intrinsic::var_annotation:
5406 // Upgrade from versions that lacked the annotation attribute argument.
5407 if (CI->arg_size() != 4) {
5408 DefaultCase();
5409 return;
5410 }
5411 // Create a new call with an added null annotation attribute argument.
5412 NewCall = Builder.CreateCall(
5413 NewFn,
5414 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5415 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5416 NewCall->takeName(CI);
5417 CI->replaceAllUsesWith(NewCall);
5418 CI->eraseFromParent();
5419 return;
5420
5421 case Intrinsic::riscv_aes32dsi:
5422 case Intrinsic::riscv_aes32dsmi:
5423 case Intrinsic::riscv_aes32esi:
5424 case Intrinsic::riscv_aes32esmi:
5425 case Intrinsic::riscv_sm4ks:
5426 case Intrinsic::riscv_sm4ed: {
5427 // The last argument to these intrinsics used to be i8 and changed to i32.
5428 // The type overload for sm4ks and sm4ed was removed.
5429 Value *Arg2 = CI->getArgOperand(2);
5430 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5431 return;
5432
5433 Value *Arg0 = CI->getArgOperand(0);
5434 Value *Arg1 = CI->getArgOperand(1);
5435 if (CI->getType()->isIntegerTy(64)) {
5436 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5437 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5438 }
5439
5440 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5441 cast<ConstantInt>(Arg2)->getZExtValue());
5442
5443 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5444 Value *Res = NewCall;
5445 if (Res->getType() != CI->getType())
5446 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5447 NewCall->takeName(CI);
5448 CI->replaceAllUsesWith(Res);
5449 CI->eraseFromParent();
5450 return;
5451 }
5452 case Intrinsic::nvvm_mapa_shared_cluster: {
5453 // Create a new call with the correct address space.
5454 NewCall =
5455 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5456 Value *Res = NewCall;
5457 Res = Builder.CreateAddrSpaceCast(
5458 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5459 NewCall->takeName(CI);
5460 CI->replaceAllUsesWith(Res);
5461 CI->eraseFromParent();
5462 return;
5463 }
5464 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5465 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5466 // Create a new call with the correct address space.
5467 SmallVector<Value *, 4> Args(CI->args());
5468 Args[0] = Builder.CreateAddrSpaceCast(
5469 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5470
5471 NewCall = Builder.CreateCall(NewFn, Args);
5472 NewCall->takeName(CI);
5473 CI->replaceAllUsesWith(NewCall);
5474 CI->eraseFromParent();
5475 return;
5476 }
5477 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5478 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5479 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5480 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5481 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5482 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5483 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5484 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5485 SmallVector<Value *, 16> Args(CI->args());
5486
5487 // Create AddrSpaceCast to shared_cluster if needed.
5488 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5489 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5491 Args[0] = Builder.CreateAddrSpaceCast(
5492 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5493
5494 // Attach the flag argument for cta_group, with a
5495 // default value of 0. This handles case (2) in
5496 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5497 size_t NumArgs = CI->arg_size();
5498 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5499 if (!FlagArg->getType()->isIntegerTy(1))
5500 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5501
5502 NewCall = Builder.CreateCall(NewFn, Args);
5503 NewCall->takeName(CI);
5504 CI->replaceAllUsesWith(NewCall);
5505 CI->eraseFromParent();
5506 return;
5507 }
5508 case Intrinsic::riscv_sha256sig0:
5509 case Intrinsic::riscv_sha256sig1:
5510 case Intrinsic::riscv_sha256sum0:
5511 case Intrinsic::riscv_sha256sum1:
5512 case Intrinsic::riscv_sm3p0:
5513 case Intrinsic::riscv_sm3p1: {
5514 // The last argument to these intrinsics used to be i8 and changed to i32.
5515 // The type overload for sm4ks and sm4ed was removed.
5516 if (!CI->getType()->isIntegerTy(64))
5517 return;
5518
5519 Value *Arg =
5520 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5521
5522 NewCall = Builder.CreateCall(NewFn, Arg);
5523 Value *Res =
5524 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5525 NewCall->takeName(CI);
5526 CI->replaceAllUsesWith(Res);
5527 CI->eraseFromParent();
5528 return;
5529 }
5530
5531 case Intrinsic::x86_xop_vfrcz_ss:
5532 case Intrinsic::x86_xop_vfrcz_sd:
5533 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5534 break;
5535
5536 case Intrinsic::x86_xop_vpermil2pd:
5537 case Intrinsic::x86_xop_vpermil2ps:
5538 case Intrinsic::x86_xop_vpermil2pd_256:
5539 case Intrinsic::x86_xop_vpermil2ps_256: {
5540 SmallVector<Value *, 4> Args(CI->args());
5541 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5542 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5543 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5544 NewCall = Builder.CreateCall(NewFn, Args);
5545 break;
5546 }
5547
5548 case Intrinsic::x86_sse41_ptestc:
5549 case Intrinsic::x86_sse41_ptestz:
5550 case Intrinsic::x86_sse41_ptestnzc: {
5551 // The arguments for these intrinsics used to be v4f32, and changed
5552 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5553 // So, the only thing required is a bitcast for both arguments.
5554 // First, check the arguments have the old type.
5555 Value *Arg0 = CI->getArgOperand(0);
5556 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5557 return;
5558
5559 // Old intrinsic, add bitcasts
5560 Value *Arg1 = CI->getArgOperand(1);
5561
5562 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5563
5564 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5565 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5566
5567 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5568 break;
5569 }
5570
5571 case Intrinsic::x86_rdtscp: {
5572 // This used to take 1 arguments. If we have no arguments, it is already
5573 // upgraded.
5574 if (CI->getNumOperands() == 0)
5575 return;
5576
5577 NewCall = Builder.CreateCall(NewFn);
5578 // Extract the second result and store it.
5579 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5580 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5581 // Replace the original call result with the first result of the new call.
5582 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5583
5584 NewCall->takeName(CI);
5585 CI->replaceAllUsesWith(TSC);
5586 CI->eraseFromParent();
5587 return;
5588 }
5589
5590 case Intrinsic::x86_sse41_insertps:
5591 case Intrinsic::x86_sse41_dppd:
5592 case Intrinsic::x86_sse41_dpps:
5593 case Intrinsic::x86_sse41_mpsadbw:
5594 case Intrinsic::x86_avx_dp_ps_256:
5595 case Intrinsic::x86_avx2_mpsadbw: {
5596 // Need to truncate the last argument from i32 to i8 -- this argument models
5597 // an inherently 8-bit immediate operand to these x86 instructions.
5598 SmallVector<Value *, 4> Args(CI->args());
5599
5600 // Replace the last argument with a trunc.
5601 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5602 NewCall = Builder.CreateCall(NewFn, Args);
5603 break;
5604 }
5605
5606 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5607 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5608 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5609 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5610 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5611 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5612 SmallVector<Value *, 4> Args(CI->args());
5613 unsigned NumElts =
5614 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5615 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5616
5617 NewCall = Builder.CreateCall(NewFn, Args);
5618 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5619
5620 NewCall->takeName(CI);
5621 CI->replaceAllUsesWith(Res);
5622 CI->eraseFromParent();
5623 return;
5624 }
5625
5626 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5627 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5628 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5629 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5630 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5631 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5632 SmallVector<Value *, 4> Args(CI->args());
5633 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5634 if (NewFn->getIntrinsicID() ==
5635 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5636 Args[1] = Builder.CreateBitCast(
5637 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5638
5639 NewCall = Builder.CreateCall(NewFn, Args);
5640 Value *Res = Builder.CreateBitCast(
5641 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5642
5643 NewCall->takeName(CI);
5644 CI->replaceAllUsesWith(Res);
5645 CI->eraseFromParent();
5646 return;
5647 }
5648 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5649 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5650 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5651 SmallVector<Value *, 4> Args(CI->args());
5652 unsigned NumElts =
5653 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5654 Args[1] = Builder.CreateBitCast(
5655 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5656 Args[2] = Builder.CreateBitCast(
5657 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5658
5659 NewCall = Builder.CreateCall(NewFn, Args);
5660 break;
5661 }
5662
5663 case Intrinsic::thread_pointer: {
5664 NewCall = Builder.CreateCall(NewFn, {});
5665 break;
5666 }
5667
5668 case Intrinsic::memcpy:
5669 case Intrinsic::memmove:
5670 case Intrinsic::memset: {
5671 // We have to make sure that the call signature is what we're expecting.
5672 // We only want to change the old signatures by removing the alignment arg:
5673 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5674 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5675 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5676 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5677 // Note: i8*'s in the above can be any pointer type
5678 if (CI->arg_size() != 5) {
5679 DefaultCase();
5680 return;
5681 }
5682 // Remove alignment argument (3), and add alignment attributes to the
5683 // dest/src pointers.
5684 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5685 CI->getArgOperand(2), CI->getArgOperand(4)};
5686 NewCall = Builder.CreateCall(NewFn, Args);
5687 AttributeList OldAttrs = CI->getAttributes();
5688 AttributeList NewAttrs = AttributeList::get(
5689 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5690 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5691 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5692 NewCall->setAttributes(NewAttrs);
5693 auto *MemCI = cast<MemIntrinsic>(NewCall);
5694 // All mem intrinsics support dest alignment.
5696 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5697 // Memcpy/Memmove also support source alignment.
5698 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5699 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5700 break;
5701 }
5702
5703 case Intrinsic::masked_load:
5704 case Intrinsic::masked_gather:
5705 case Intrinsic::masked_store:
5706 case Intrinsic::masked_scatter: {
5707 if (CI->arg_size() != 4) {
5708 DefaultCase();
5709 return;
5710 }
5711
5712 auto GetMaybeAlign = [](Value *Op) {
5713 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5714 uint64_t Val = CI->getZExtValue();
5715 if (Val == 0)
5716 return MaybeAlign();
5717 if (isPowerOf2_64(Val))
5718 return MaybeAlign(Val);
5719 }
5720 reportFatalUsageError("Invalid alignment argument");
5721 };
5722 auto GetAlign = [&](Value *Op) {
5723 MaybeAlign Align = GetMaybeAlign(Op);
5724 if (Align)
5725 return *Align;
5726 reportFatalUsageError("Invalid zero alignment argument");
5727 };
5728
5729 const DataLayout &DL = CI->getDataLayout();
5730 switch (NewFn->getIntrinsicID()) {
5731 case Intrinsic::masked_load:
5732 NewCall = Builder.CreateMaskedLoad(
5733 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5734 CI->getArgOperand(2), CI->getArgOperand(3));
5735 break;
5736 case Intrinsic::masked_gather:
5737 NewCall = Builder.CreateMaskedGather(
5738 CI->getType(), CI->getArgOperand(0),
5739 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5740 CI->getType()->getScalarType()),
5741 CI->getArgOperand(2), CI->getArgOperand(3));
5742 break;
5743 case Intrinsic::masked_store:
5744 NewCall = Builder.CreateMaskedStore(
5745 CI->getArgOperand(0), CI->getArgOperand(1),
5746 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5747 break;
5748 case Intrinsic::masked_scatter:
5749 NewCall = Builder.CreateMaskedScatter(
5750 CI->getArgOperand(0), CI->getArgOperand(1),
5751 DL.getValueOrABITypeAlignment(
5752 GetMaybeAlign(CI->getArgOperand(2)),
5753 CI->getArgOperand(0)->getType()->getScalarType()),
5754 CI->getArgOperand(3));
5755 break;
5756 default:
5757 llvm_unreachable("Unexpected intrinsic ID");
5758 }
5759 // Previous metadata is still valid.
5760 NewCall->copyMetadata(*CI);
5761 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5762 break;
5763 }
5764
5765 case Intrinsic::lifetime_start:
5766 case Intrinsic::lifetime_end: {
5767 if (CI->arg_size() != 2) {
5768 DefaultCase();
5769 return;
5770 }
5771
5772 Value *Ptr = CI->getArgOperand(1);
5773 // Try to strip pointer casts, such that the lifetime works on an alloca.
5774 Ptr = Ptr->stripPointerCasts();
5775 if (isa<AllocaInst>(Ptr)) {
5776 // Don't use NewFn, as we might have looked through an addrspacecast.
5777 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5778 NewCall = Builder.CreateLifetimeStart(Ptr);
5779 else
5780 NewCall = Builder.CreateLifetimeEnd(Ptr);
5781 break;
5782 }
5783
5784 // Otherwise remove the lifetime marker.
5785 CI->eraseFromParent();
5786 return;
5787 }
5788
5789 case Intrinsic::x86_avx512_vpdpbusd_128:
5790 case Intrinsic::x86_avx512_vpdpbusd_256:
5791 case Intrinsic::x86_avx512_vpdpbusd_512:
5792 case Intrinsic::x86_avx512_vpdpbusds_128:
5793 case Intrinsic::x86_avx512_vpdpbusds_256:
5794 case Intrinsic::x86_avx512_vpdpbusds_512:
5795 case Intrinsic::x86_avx2_vpdpbssd_128:
5796 case Intrinsic::x86_avx2_vpdpbssd_256:
5797 case Intrinsic::x86_avx10_vpdpbssd_512:
5798 case Intrinsic::x86_avx2_vpdpbssds_128:
5799 case Intrinsic::x86_avx2_vpdpbssds_256:
5800 case Intrinsic::x86_avx10_vpdpbssds_512:
5801 case Intrinsic::x86_avx2_vpdpbsud_128:
5802 case Intrinsic::x86_avx2_vpdpbsud_256:
5803 case Intrinsic::x86_avx10_vpdpbsud_512:
5804 case Intrinsic::x86_avx2_vpdpbsuds_128:
5805 case Intrinsic::x86_avx2_vpdpbsuds_256:
5806 case Intrinsic::x86_avx10_vpdpbsuds_512:
5807 case Intrinsic::x86_avx2_vpdpbuud_128:
5808 case Intrinsic::x86_avx2_vpdpbuud_256:
5809 case Intrinsic::x86_avx10_vpdpbuud_512:
5810 case Intrinsic::x86_avx2_vpdpbuuds_128:
5811 case Intrinsic::x86_avx2_vpdpbuuds_256:
5812 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5813 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5814 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5815 CI->getArgOperand(2)};
5816 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5817 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5818 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5819
5820 NewCall = Builder.CreateCall(NewFn, Args);
5821 break;
5822 }
5823 case Intrinsic::x86_avx512_vpdpwssd_128:
5824 case Intrinsic::x86_avx512_vpdpwssd_256:
5825 case Intrinsic::x86_avx512_vpdpwssd_512:
5826 case Intrinsic::x86_avx512_vpdpwssds_128:
5827 case Intrinsic::x86_avx512_vpdpwssds_256:
5828 case Intrinsic::x86_avx512_vpdpwssds_512:
5829 case Intrinsic::x86_avx2_vpdpwsud_128:
5830 case Intrinsic::x86_avx2_vpdpwsud_256:
5831 case Intrinsic::x86_avx10_vpdpwsud_512:
5832 case Intrinsic::x86_avx2_vpdpwsuds_128:
5833 case Intrinsic::x86_avx2_vpdpwsuds_256:
5834 case Intrinsic::x86_avx10_vpdpwsuds_512:
5835 case Intrinsic::x86_avx2_vpdpwusd_128:
5836 case Intrinsic::x86_avx2_vpdpwusd_256:
5837 case Intrinsic::x86_avx10_vpdpwusd_512:
5838 case Intrinsic::x86_avx2_vpdpwusds_128:
5839 case Intrinsic::x86_avx2_vpdpwusds_256:
5840 case Intrinsic::x86_avx10_vpdpwusds_512:
5841 case Intrinsic::x86_avx2_vpdpwuud_128:
5842 case Intrinsic::x86_avx2_vpdpwuud_256:
5843 case Intrinsic::x86_avx10_vpdpwuud_512:
5844 case Intrinsic::x86_avx2_vpdpwuuds_128:
5845 case Intrinsic::x86_avx2_vpdpwuuds_256:
5846 case Intrinsic::x86_avx10_vpdpwuuds_512:
5847 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5848 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5849 CI->getArgOperand(2)};
5850 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5851 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5852 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5853
5854 NewCall = Builder.CreateCall(NewFn, Args);
5855 break;
5856 }
5857 assert(NewCall && "Should have either set this variable or returned through "
5858 "the default case");
5859 NewCall->takeName(CI);
5860 CI->replaceAllUsesWith(NewCall);
5861 CI->eraseFromParent();
5862}
5863
5865 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5866
5867 // Check if this function should be upgraded and get the replacement function
5868 // if there is one.
5869 Function *NewFn;
5870 if (UpgradeIntrinsicFunction(F, NewFn)) {
5871 // Replace all users of the old function with the new function or new
5872 // instructions. This is not a range loop because the call is deleted.
5873 for (User *U : make_early_inc_range(F->users()))
5874 if (CallBase *CB = dyn_cast<CallBase>(U))
5875 UpgradeIntrinsicCall(CB, NewFn);
5876
5877 // Remove old function, no longer used, from the module.
5878 if (F != NewFn)
5879 F->eraseFromParent();
5880 }
5881}
5882
5884 const unsigned NumOperands = MD.getNumOperands();
5885 if (NumOperands == 0)
5886 return &MD; // Invalid, punt to a verifier error.
5887
5888 // Check if the tag uses struct-path aware TBAA format.
5889 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5890 return &MD;
5891
5892 auto &Context = MD.getContext();
5893 if (NumOperands == 3) {
5894 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5895 MDNode *ScalarType = MDNode::get(Context, Elts);
5896 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5897 Metadata *Elts2[] = {ScalarType, ScalarType,
5900 MD.getOperand(2)};
5901 return MDNode::get(Context, Elts2);
5902 }
5903 // Create a MDNode <MD, MD, offset 0>
5905 Type::getInt64Ty(Context)))};
5906 return MDNode::get(Context, Elts);
5907}
5908
5910 Instruction *&Temp) {
5911 if (Opc != Instruction::BitCast)
5912 return nullptr;
5913
5914 Temp = nullptr;
5915 Type *SrcTy = V->getType();
5916 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5917 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5918 LLVMContext &Context = V->getContext();
5919
5920 // We have no information about target data layout, so we assume that
5921 // the maximum pointer size is 64bit.
5922 Type *MidTy = Type::getInt64Ty(Context);
5923 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5924
5925 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5926 }
5927
5928 return nullptr;
5929}
5930
5932 if (Opc != Instruction::BitCast)
5933 return nullptr;
5934
5935 Type *SrcTy = C->getType();
5936 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5937 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5938 LLVMContext &Context = C->getContext();
5939
5940 // We have no information about target data layout, so we assume that
5941 // the maximum pointer size is 64bit.
5942 Type *MidTy = Type::getInt64Ty(Context);
5943
5945 DestTy);
5946 }
5947
5948 return nullptr;
5949}
5950
5951/// Check the debug info version number, if it is out-dated, drop the debug
5952/// info. Return true if module is modified.
5955 return false;
5956
5957 llvm::TimeTraceScope timeScope("Upgrade debug info");
5958 // We need to get metadata before the module is verified (i.e., getModuleFlag
5959 // makes assumptions that we haven't verified yet). Carefully extract the flag
5960 // from the metadata.
5961 unsigned Version = 0;
5962 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5963 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5964 if (Flag->getNumOperands() < 3)
5965 return false;
5966 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5967 return K->getString() == "Debug Info Version";
5968 return false;
5969 });
5970 if (OpIt != ModFlags->op_end()) {
5971 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5972 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5973 Version = CI->getZExtValue();
5974 }
5975 }
5976
5978 bool BrokenDebugInfo = false;
5979 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5980 report_fatal_error("Broken module found, compilation aborted!");
5981 if (!BrokenDebugInfo)
5982 // Everything is ok.
5983 return false;
5984 else {
5985 // Diagnose malformed debug info.
5987 M.getContext().diagnose(Diag);
5988 }
5989 }
5990 bool Modified = StripDebugInfo(M);
5992 // Diagnose a version mismatch.
5994 M.getContext().diagnose(DiagVersion);
5995 }
5996 return Modified;
5997}
5998
5999static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
6000 GlobalValue *GV, const Metadata *V) {
6001 Function *F = cast<Function>(GV);
6002
6003 constexpr StringLiteral DefaultValue = "1";
6004 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
6005 unsigned Length = 0;
6006
6007 if (F->hasFnAttribute(Attr)) {
6008 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
6009 // parse these elements placing them into Vect3
6010 StringRef S = F->getFnAttribute(Attr).getValueAsString();
6011 for (; Length < 3 && !S.empty(); Length++) {
6012 auto [Part, Rest] = S.split(',');
6013 Vect3[Length] = Part.trim();
6014 S = Rest;
6015 }
6016 }
6017
6018 const unsigned Dim = DimC - 'x';
6019 assert(Dim < 3 && "Unexpected dim char");
6020
6021 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
6022
6023 // local variable required for StringRef in Vect3 to point to.
6024 const std::string VStr = llvm::utostr(VInt);
6025 Vect3[Dim] = VStr;
6026 Length = std::max(Length, Dim + 1);
6027
6028 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
6029 F->addFnAttr(Attr, NewAttr);
6030}
6031
6032static inline bool isXYZ(StringRef S) {
6033 return S == "x" || S == "y" || S == "z";
6034}
6035
6037 const Metadata *V) {
6038 if (K == "kernel") {
6040 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
6041 return true;
6042 }
6043 if (K == "align") {
6044 // V is a bitfeild specifying two 16-bit values. The alignment value is
6045 // specfied in low 16-bits, The index is specified in the high bits. For the
6046 // index, 0 indicates the return value while higher values correspond to
6047 // each parameter (idx = param + 1).
6048 const uint64_t AlignIdxValuePair =
6049 mdconst::extract<ConstantInt>(V)->getZExtValue();
6050 const unsigned Idx = (AlignIdxValuePair >> 16);
6051 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
6052 cast<Function>(GV)->addAttributeAtIndex(
6053 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
6054 return true;
6055 }
6056 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
6057 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6059 return true;
6060 }
6061 if (K == "minctasm") {
6062 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6063 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
6064 return true;
6065 }
6066 if (K == "maxnreg") {
6067 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
6068 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
6069 return true;
6070 }
6071 if (K.consume_front("maxntid") && isXYZ(K)) {
6073 return true;
6074 }
6075 if (K.consume_front("reqntid") && isXYZ(K)) {
6077 return true;
6078 }
6079 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
6081 return true;
6082 }
6083 if (K == "grid_constant") {
6084 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
6085 for (const auto &Op : cast<MDNode>(V)->operands()) {
6086 // For some reason, the index is 1-based in the metadata. Good thing we're
6087 // able to auto-upgrade it!
6088 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
6089 cast<Function>(GV)->addParamAttr(Index, Attr);
6090 }
6091 return true;
6092 }
6093
6094 return false;
6095}
6096
6098 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
6099 if (!NamedMD)
6100 return;
6101
6102 SmallVector<MDNode *, 8> NewNodes;
6104 for (MDNode *MD : NamedMD->operands()) {
6105 if (!SeenNodes.insert(MD).second)
6106 continue;
6107
6108 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
6109 if (!GV)
6110 continue;
6111
6112 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6113
6114 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6115 // Each nvvm.annotations metadata entry will be of the following form:
6116 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6117 // start index = 1, to skip the global variable key
6118 // increment = 2, to skip the value for each property-value pairs
6119 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6120 MDString *K = cast<MDString>(MD->getOperand(j));
6121 const MDOperand &V = MD->getOperand(j + 1);
6122 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6123 if (!Upgraded)
6124 NewOperands.append({K, V});
6125 }
6126
6127 if (NewOperands.size() > 1)
6128 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6129 }
6130
6131 NamedMD->clearOperands();
6132 for (MDNode *N : NewNodes)
6133 NamedMD->addOperand(N);
6134}
6135
6136/// This checks for objc retain release marker which should be upgraded. It
6137/// returns true if module is modified.
6139 bool Changed = false;
6140 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6141 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6142 if (ModRetainReleaseMarker) {
6143 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6144 if (Op) {
6145 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6146 if (ID) {
6147 SmallVector<StringRef, 4> ValueComp;
6148 ID->getString().split(ValueComp, "#");
6149 if (ValueComp.size() == 2) {
6150 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6151 ID = MDString::get(M.getContext(), NewValue);
6152 }
6153 M.addModuleFlag(Module::Error, MarkerKey, ID);
6154 M.eraseNamedMetadata(ModRetainReleaseMarker);
6155 Changed = true;
6156 }
6157 }
6158 }
6159 return Changed;
6160}
6161
6163 // This lambda converts normal function calls to ARC runtime functions to
6164 // intrinsic calls.
6165 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6166 llvm::Intrinsic::ID IntrinsicFunc) {
6167 Function *Fn = M.getFunction(OldFunc);
6168
6169 if (!Fn)
6170 return;
6171
6172 Function *NewFn =
6173 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6174
6175 for (User *U : make_early_inc_range(Fn->users())) {
6177 if (!CI || CI->getCalledFunction() != Fn)
6178 continue;
6179
6180 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6181 FunctionType *NewFuncTy = NewFn->getFunctionType();
6183
6184 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6185 // value to the return type of the old function.
6186 if (NewFuncTy->getReturnType() != CI->getType() &&
6187 !CastInst::castIsValid(Instruction::BitCast, CI,
6188 NewFuncTy->getReturnType()))
6189 continue;
6190
6191 bool InvalidCast = false;
6192
6193 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6194 Value *Arg = CI->getArgOperand(I);
6195
6196 // Bitcast argument to the parameter type of the new function if it's
6197 // not a variadic argument.
6198 if (I < NewFuncTy->getNumParams()) {
6199 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6200 // to the parameter type of the new function.
6201 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6202 NewFuncTy->getParamType(I))) {
6203 InvalidCast = true;
6204 break;
6205 }
6206 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6207 }
6208 Args.push_back(Arg);
6209 }
6210
6211 if (InvalidCast)
6212 continue;
6213
6214 // Create a call instruction that calls the new function.
6215 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6216 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6217 NewCall->takeName(CI);
6218
6219 // Bitcast the return value back to the type of the old call.
6220 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6221
6222 if (!CI->use_empty())
6223 CI->replaceAllUsesWith(NewRetVal);
6224 CI->eraseFromParent();
6225 }
6226
6227 if (Fn->use_empty())
6228 Fn->eraseFromParent();
6229 };
6230
6231 // Unconditionally convert a call to "clang.arc.use" to a call to
6232 // "llvm.objc.clang.arc.use".
6233 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6234
6235 // Upgrade the retain release marker. If there is no need to upgrade
6236 // the marker, that means either the module is already new enough to contain
6237 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6239 return;
6240
6241 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6242 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6243 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6244 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6245 {"objc_autoreleaseReturnValue",
6246 llvm::Intrinsic::objc_autoreleaseReturnValue},
6247 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6248 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6249 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6250 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6251 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6252 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6253 {"objc_release", llvm::Intrinsic::objc_release},
6254 {"objc_retain", llvm::Intrinsic::objc_retain},
6255 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6256 {"objc_retainAutoreleaseReturnValue",
6257 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6258 {"objc_retainAutoreleasedReturnValue",
6259 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6260 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6261 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6262 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6263 {"objc_unsafeClaimAutoreleasedReturnValue",
6264 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6265 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6266 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6267 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6268 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6269 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6270 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6271 {"objc_arc_annotation_topdown_bbstart",
6272 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6273 {"objc_arc_annotation_topdown_bbend",
6274 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6275 {"objc_arc_annotation_bottomup_bbstart",
6276 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6277 {"objc_arc_annotation_bottomup_bbend",
6278 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6279
6280 for (auto &I : RuntimeFuncs)
6281 UpgradeToIntrinsic(I.first, I.second);
6282}
6283
6285 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6286 if (!ModFlags)
6287 return false;
6288
6289 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6290 bool HasSwiftVersionFlag = false;
6291 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6292 uint32_t SwiftABIVersion;
6293 auto Int8Ty = Type::getInt8Ty(M.getContext());
6294 auto Int32Ty = Type::getInt32Ty(M.getContext());
6295
6296 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6297 MDNode *Op = ModFlags->getOperand(I);
6298 if (Op->getNumOperands() != 3)
6299 continue;
6300 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6301 if (!ID)
6302 continue;
6303 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6304 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6305 Type::getInt32Ty(M.getContext()), B)),
6306 MDString::get(M.getContext(), ID->getString()),
6307 Op->getOperand(2)};
6308 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6309 Changed = true;
6310 };
6311
6312 if (ID->getString() == "Objective-C Image Info Version")
6313 HasObjCFlag = true;
6314 if (ID->getString() == "Objective-C Class Properties")
6315 HasClassProperties = true;
6316 // Upgrade PIC from Error/Max to Min.
6317 if (ID->getString() == "PIC Level") {
6318 if (auto *Behavior =
6320 uint64_t V = Behavior->getLimitedValue();
6321 if (V == Module::Error || V == Module::Max)
6322 SetBehavior(Module::Min);
6323 }
6324 }
6325 // Upgrade "PIE Level" from Error to Max.
6326 if (ID->getString() == "PIE Level")
6327 if (auto *Behavior =
6329 if (Behavior->getLimitedValue() == Module::Error)
6330 SetBehavior(Module::Max);
6331
6332 // Upgrade branch protection and return address signing module flags. The
6333 // module flag behavior for these fields were Error and now they are Min.
6334 if (ID->getString() == "branch-target-enforcement" ||
6335 ID->getString().starts_with("sign-return-address")) {
6336 if (auto *Behavior =
6338 if (Behavior->getLimitedValue() == Module::Error) {
6339 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6340 Metadata *Ops[3] = {
6341 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6342 Op->getOperand(1), Op->getOperand(2)};
6343 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6344 Changed = true;
6345 }
6346 }
6347 }
6348
6349 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6350 // section name so that llvm-lto will not complain about mismatching
6351 // module flags that is functionally the same.
6352 if (ID->getString() == "Objective-C Image Info Section") {
6353 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6354 SmallVector<StringRef, 4> ValueComp;
6355 Value->getString().split(ValueComp, " ");
6356 if (ValueComp.size() != 1) {
6357 std::string NewValue;
6358 for (auto &S : ValueComp)
6359 NewValue += S.str();
6360 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6361 MDString::get(M.getContext(), NewValue)};
6362 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6363 Changed = true;
6364 }
6365 }
6366 }
6367
6368 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6369 // If the higher bits are set, it adds new module flag for swift info.
6370 if (ID->getString() == "Objective-C Garbage Collection") {
6371 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6372 if (Md) {
6373 assert(Md->getValue() && "Expected non-empty metadata");
6374 auto Type = Md->getValue()->getType();
6375 if (Type == Int8Ty)
6376 continue;
6377 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6378 if ((Val & 0xff) != Val) {
6379 HasSwiftVersionFlag = true;
6380 SwiftABIVersion = (Val & 0xff00) >> 8;
6381 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6382 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6383 }
6384 Metadata *Ops[3] = {
6385 ConstantAsMetadata::get(ConstantInt::get(Int32Ty,Module::Error)),
6386 Op->getOperand(1),
6387 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6388 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6389 Changed = true;
6390 }
6391 }
6392
6393 if (ID->getString() == "amdgpu_code_object_version") {
6394 Metadata *Ops[3] = {
6395 Op->getOperand(0),
6396 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6397 Op->getOperand(2)};
6398 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6399 Changed = true;
6400 }
6401 }
6402
6403 // "Objective-C Class Properties" is recently added for Objective-C. We
6404 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6405 // flag of value 0, so we can correclty downgrade this flag when trying to
6406 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6407 // this module flag.
6408 if (HasObjCFlag && !HasClassProperties) {
6409 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6410 (uint32_t)0);
6411 Changed = true;
6412 }
6413
6414 if (HasSwiftVersionFlag) {
6415 M.addModuleFlag(Module::Error, "Swift ABI Version",
6416 SwiftABIVersion);
6417 M.addModuleFlag(Module::Error, "Swift Major Version",
6418 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6419 M.addModuleFlag(Module::Error, "Swift Minor Version",
6420 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6421 Changed = true;
6422 }
6423
6424 return Changed;
6425}
6426
6428 NamedMDNode *CFIConsts = M.getNamedMetadata("cfi.functions");
6429 // If this metadata has operands, we expect all of them to be either from
6430 // before or from after the format change handled here, so we can bail out
6431 // fast if the first (if any) operands is of the new format.
6432 auto MatchesVersion = [](const MDNode *Op) {
6433 return Op->getNumOperands() >= 3 &&
6434 isa<ConstantAsMetadata>(Op->getOperand(2)) &&
6435 cast<ConstantAsMetadata>(Op->getOperand(2))
6436 ->getType()
6437 ->isIntegerTy(64);
6438 };
6439
6440 if (!CFIConsts || !CFIConsts->getNumOperands() ||
6441 MatchesVersion(CFIConsts->getOperand(0)))
6442 return false;
6443
6444 bool Changed = false;
6445 for (unsigned I = 0, E = CFIConsts->getNumOperands(); I != E; ++I) {
6446 MDNode *Op = CFIConsts->getOperand(I);
6447 assert(!MatchesVersion(Op) && "Unexpected mix of CFIConstant formats");
6448 assert(Op->getNumOperands() >= 2 &&
6449 "Expected at least 2 operands - name and linkage type");
6450 MDString *NameMD = dyn_cast<MDString>(Op->getOperand(0));
6451 StringRef Name = NameMD->getString();
6454
6456 Elts.push_back(Op->getOperand(0));
6457 Elts.push_back(Op->getOperand(1));
6459 ConstantInt::get(Type::getInt64Ty(M.getContext()), GUID)));
6460
6461 for (unsigned J = 2, EJ = Op->getNumOperands(); J != EJ; ++J)
6462 Elts.push_back(Op->getOperand(J));
6463
6464 CFIConsts->setOperand(I, MDNode::get(M.getContext(), Elts));
6465 Changed = true;
6466 }
6467
6468 return Changed;
6469}
6470
6472 auto TrimSpaces = [](StringRef Section) -> std::string {
6473 SmallVector<StringRef, 5> Components;
6474 Section.split(Components, ',');
6475
6476 SmallString<32> Buffer;
6477 raw_svector_ostream OS(Buffer);
6478
6479 for (auto Component : Components)
6480 OS << ',' << Component.trim();
6481
6482 return std::string(OS.str().substr(1));
6483 };
6484
6485 for (auto &GV : M.globals()) {
6486 if (!GV.hasSection())
6487 continue;
6488
6489 StringRef Section = GV.getSection();
6490
6491 if (!Section.starts_with("__DATA, __objc_catlist"))
6492 continue;
6493
6494 // __DATA, __objc_catlist, regular, no_dead_strip
6495 // __DATA,__objc_catlist,regular,no_dead_strip
6496 GV.setSection(TrimSpaces(Section));
6497 }
6498}
6499
6500namespace {
6501// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6502// callsites within a function that did not also have the strictfp attribute.
6503// Since 10.0, if strict FP semantics are needed within a function, the
6504// function must have the strictfp attribute and all calls within the function
6505// must also have the strictfp attribute. This latter restriction is
6506// necessary to prevent unwanted libcall simplification when a function is
6507// being cloned (such as for inlining).
6508//
6509// The "dangling" strictfp attribute usage was only used to prevent constant
6510// folding and other libcall simplification. The nobuiltin attribute on the
6511// callsite has the same effect.
6512struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6513 StrictFPUpgradeVisitor() = default;
6514
6515 void visitCallBase(CallBase &Call) {
6516 if (!Call.isStrictFP())
6517 return;
6519 return;
6520 // If we get here, the caller doesn't have the strictfp attribute
6521 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6522 Call.removeFnAttr(Attribute::StrictFP);
6523 Call.addFnAttr(Attribute::NoBuiltin);
6524 }
6525};
6526
6527/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6528struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6529 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6530 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6531
6532 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6533 if (!RMW.isFloatingPointOperation())
6534 return;
6535
6536 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6537 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6538 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6539 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6540 }
6541};
6542} // namespace
6543
6545 // If a function definition doesn't have the strictfp attribute,
6546 // convert any callsite strictfp attributes to nobuiltin.
6547 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6548 StrictFPUpgradeVisitor SFPV;
6549 SFPV.visit(F);
6550 }
6551
6552 // Remove all incompatibile attributes from function.
6553 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6554 F.getReturnType(), F.getAttributes().getRetAttrs()));
6555 for (auto &Arg : F.args())
6556 Arg.removeAttrs(
6557 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6558
6559 bool AddingAttrs = false, RemovingAttrs = false;
6560 AttrBuilder AttrsToAdd(F.getContext());
6561 AttributeMask AttrsToRemove;
6562
6563 // Older versions of LLVM treated an "implicit-section-name" attribute
6564 // similarly to directly setting the section on a Function.
6565 if (Attribute A = F.getFnAttribute("implicit-section-name");
6566 A.isValid() && A.isStringAttribute()) {
6567 F.setSection(A.getValueAsString());
6568 AttrsToRemove.addAttribute("implicit-section-name");
6569 RemovingAttrs = true;
6570 }
6571
6572 if (Attribute A = F.getFnAttribute("nooutline");
6573 A.isValid() && A.isStringAttribute()) {
6574 AttrsToRemove.addAttribute("nooutline");
6575 AttrsToAdd.addAttribute(Attribute::NoOutline);
6576 AddingAttrs = RemovingAttrs = true;
6577 }
6578
6579 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6580 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6581 AttrsToRemove.addAttribute("uniform-work-group-size");
6582 RemovingAttrs = true;
6583 if (A.getValueAsString() == "true") {
6584 AttrsToAdd.addAttribute("uniform-work-group-size");
6585 AddingAttrs = true;
6586 }
6587 }
6588
6589 if (!F.empty()) {
6590 // For some reason this is called twice, and the first time is before any
6591 // instructions are loaded into the body.
6592
6593 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6594 A.isValid()) {
6595
6596 if (A.getValueAsBool()) {
6597 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6598 Visitor.visit(F);
6599 }
6600
6601 // We will leave behind dead attribute uses on external declarations, but
6602 // clang never added these to declarations anyway.
6603 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6604 RemovingAttrs = true;
6605 }
6606 }
6607
6608 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6609 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6610
6611 bool HandleDenormalMode = false;
6612
6613 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6614 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6615 if (ParsedMode.isValid()) {
6616 DenormalFPMath = ParsedMode;
6617 AttrsToRemove.addAttribute("denormal-fp-math");
6618 AddingAttrs = RemovingAttrs = true;
6619 HandleDenormalMode = true;
6620 }
6621 }
6622
6623 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6624 Attr.isValid()) {
6625 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6626 if (ParsedMode.isValid()) {
6627 DenormalFPMathF32 = ParsedMode;
6628 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6629 AddingAttrs = RemovingAttrs = true;
6630 HandleDenormalMode = true;
6631 }
6632 }
6633
6634 if (HandleDenormalMode)
6635 AttrsToAdd.addDenormalFPEnvAttr(
6636 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6637
6638 if (RemovingAttrs)
6639 F.removeFnAttrs(AttrsToRemove);
6640
6641 if (AddingAttrs)
6642 F.addFnAttrs(AttrsToAdd);
6643}
6644
6645// Check if the function attribute is not present and set it.
6647 StringRef Value) {
6648 if (!F.hasFnAttribute(FnAttrName))
6649 F.addFnAttr(FnAttrName, Value);
6650}
6651
6652// Check if the function attribute is not present and set it if needed.
6653// If the attribute is "false" then removes it.
6654// If the attribute is "true" resets it to a valueless attribute.
6655static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6656 if (!F.hasFnAttribute(FnAttrName)) {
6657 if (Set)
6658 F.addFnAttr(FnAttrName);
6659 } else {
6660 auto A = F.getFnAttribute(FnAttrName);
6661 if ("false" == A.getValueAsString())
6662 F.removeFnAttr(FnAttrName);
6663 else if ("true" == A.getValueAsString()) {
6664 F.removeFnAttr(FnAttrName);
6665 F.addFnAttr(FnAttrName);
6666 }
6667 }
6668}
6669
6671 Triple T(M.getTargetTriple());
6672 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6673 return;
6674
6675 uint64_t BTEValue = 0;
6676 uint64_t BPPLRValue = 0;
6677 uint64_t GCSValue = 0;
6678 uint64_t SRAValue = 0;
6679 uint64_t SRAALLValue = 0;
6680 uint64_t SRABKeyValue = 0;
6681
6682 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6683 if (ModFlags) {
6684 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6685 MDNode *Op = ModFlags->getOperand(I);
6686 if (Op->getNumOperands() != 3)
6687 continue;
6688
6689 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6690 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6691 if (!ID || !CI)
6692 continue;
6693
6694 StringRef IDStr = ID->getString();
6695 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6696 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6697 : IDStr == "guarded-control-stack" ? &GCSValue
6698 : IDStr == "sign-return-address" ? &SRAValue
6699 : IDStr == "sign-return-address-all" ? &SRAALLValue
6700 : IDStr == "sign-return-address-with-bkey"
6701 ? &SRABKeyValue
6702 : nullptr;
6703 if (!ValPtr)
6704 continue;
6705
6706 *ValPtr = CI->getZExtValue();
6707 if (*ValPtr == 2)
6708 return;
6709 }
6710 }
6711
6712 bool BTE = BTEValue == 1;
6713 bool BPPLR = BPPLRValue == 1;
6714 bool GCS = GCSValue == 1;
6715 bool SRA = SRAValue == 1;
6716
6717 StringRef SignTypeValue = "non-leaf";
6718 if (SRA && SRAALLValue == 1)
6719 SignTypeValue = "all";
6720
6721 StringRef SignKeyValue = "a_key";
6722 if (SRA && SRABKeyValue == 1)
6723 SignKeyValue = "b_key";
6724
6725 for (Function &F : M.getFunctionList()) {
6726 if (F.isDeclaration())
6727 continue;
6728
6729 if (SRA) {
6730 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6731 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6732 } else {
6733 if (auto A = F.getFnAttribute("sign-return-address");
6734 A.isValid() && "none" == A.getValueAsString()) {
6735 F.removeFnAttr("sign-return-address");
6736 F.removeFnAttr("sign-return-address-key");
6737 }
6738 }
6739 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6740 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6741 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6742 }
6743
6744 if (BTE)
6745 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6746 if (BPPLR)
6747 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6748 if (GCS)
6749 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6750 if (SRA) {
6751 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6752 if (SRAALLValue == 1)
6753 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6754 if (SRABKeyValue == 1)
6755 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6756 }
6757}
6758
6759static bool isOldLoopArgument(Metadata *MD) {
6760 auto *T = dyn_cast_or_null<MDTuple>(MD);
6761 if (!T)
6762 return false;
6763 if (T->getNumOperands() < 1)
6764 return false;
6765 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6766 if (!S)
6767 return false;
6768 return S->getString().starts_with("llvm.vectorizer.");
6769}
6770
6772 StringRef OldPrefix = "llvm.vectorizer.";
6773 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6774
6775 if (OldTag == "llvm.vectorizer.unroll")
6776 return MDString::get(C, "llvm.loop.interleave.count");
6777
6778 return MDString::get(
6779 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6780 .str());
6781}
6782
6784 auto *T = dyn_cast_or_null<MDTuple>(MD);
6785 if (!T)
6786 return MD;
6787 if (T->getNumOperands() < 1)
6788 return MD;
6789 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6790 if (!OldTag)
6791 return MD;
6792 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6793 return MD;
6794
6795 // This has an old tag. Upgrade it.
6797 Ops.reserve(T->getNumOperands());
6798 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6799 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6800 Ops.push_back(T->getOperand(I));
6801
6802 return MDTuple::get(T->getContext(), Ops);
6803}
6804
6806 auto *T = dyn_cast<MDTuple>(&N);
6807 if (!T)
6808 return &N;
6809
6810 if (none_of(T->operands(), isOldLoopArgument))
6811 return &N;
6812
6814 Ops.reserve(T->getNumOperands());
6815 for (Metadata *MD : T->operands())
6816 Ops.push_back(upgradeLoopArgument(MD));
6817
6818 return MDTuple::get(T->getContext(), Ops);
6819}
6820
6822 Triple T(TT);
6823 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6824 // the address space of globals to 1. This does not apply to SPIRV Logical.
6825 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6826 !DL.contains("-G") && !DL.starts_with("G")) {
6827 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6828 }
6829
6830 if (T.isLoongArch64() || T.isRISCV64()) {
6831 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6832 auto I = DL.find("-n64-");
6833 if (I != StringRef::npos)
6834 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6835 return DL.str();
6836 }
6837
6838 // AMDGPU data layout upgrades.
6839 std::string Res = DL.str();
6840 if (T.isAMDGPU()) {
6841 // Define address spaces for constants.
6842 if (!DL.contains("-G") && !DL.starts_with("G"))
6843 Res.append(Res.empty() ? "G1" : "-G1");
6844
6845 // AMDGCN data layout upgrades.
6846 if (T.isAMDGCN()) {
6847
6848 // Add missing non-integral declarations.
6849 // This goes before adding new address spaces to prevent incoherent string
6850 // values.
6851 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6852 Res.append("-ni:7:8:9");
6853 // Update ni:7 to ni:7:8:9.
6854 if (DL.ends_with("ni:7"))
6855 Res.append(":8:9");
6856 if (DL.ends_with("ni:7:8"))
6857 Res.append(":9");
6858
6859 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6860 // resources) An empty data layout has already been upgraded to G1 by now.
6861 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6862 Res.append("-p7:160:256:256:32");
6863 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6864 Res.append("-p8:128:128:128:48");
6865 constexpr StringRef OldP8("-p8:128:128-");
6866 if (DL.contains(OldP8))
6867 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6868 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6869 Res.append("-p9:192:256:256:32");
6870 }
6871
6872 // Upgrade the ELF mangling mode.
6873 if (!DL.contains("m:e"))
6874 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6875
6876 return Res;
6877 }
6878
6879 if (T.isSystemZ() && !DL.empty()) {
6880 // Make sure the stack alignment is present.
6881 if (!DL.contains("-S64"))
6882 return "E-S64" + DL.drop_front(1).str();
6883 return DL.str();
6884 }
6885
6886 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6887 // If the datalayout matches the expected format, add pointer size address
6888 // spaces to the datalayout.
6889 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6890 if (!DL.contains(AddrSpaces)) {
6892 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6893 if (R.match(Res, &Groups))
6894 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6895 }
6896 };
6897
6898 // AArch64 data layout upgrades.
6899 if (T.isAArch64()) {
6900 // Add "-Fn32"
6901 if (!DL.empty() && !DL.contains("-Fn32"))
6902 Res.append("-Fn32");
6903 AddPtr32Ptr64AddrSpaces();
6904 return Res;
6905 }
6906
6907 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6908 T.isWasm()) {
6909 // Mips64 with o32 ABI did not add "-i128:128".
6910 // Add "-i128:128"
6911 std::string I64 = "-i64:64";
6912 std::string I128 = "-i128:128";
6913 if (!StringRef(Res).contains(I128)) {
6914 size_t Pos = Res.find(I64);
6915 if (Pos != size_t(-1))
6916 Res.insert(Pos + I64.size(), I128);
6917 }
6918 }
6919
6920 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6921 size_t Pos = Res.find("-S128");
6922 if (Pos == StringRef::npos)
6923 Pos = Res.size();
6924 Res.insert(Pos, "-f64:32:64");
6925 }
6926
6927 if (!T.isX86())
6928 return Res;
6929
6930 AddPtr32Ptr64AddrSpaces();
6931
6932 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6933 // for i128 operations prior to this being reflected in the data layout, and
6934 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6935 // boundaries, so although this is a breaking change, the upgrade is expected
6936 // to fix more IR than it breaks.
6937 // Intel MCU is an exception and uses 4-byte-alignment.
6938 if (!T.isOSIAMCU()) {
6939 std::string I128 = "-i128:128";
6940 if (StringRef Ref = Res; !Ref.contains(I128)) {
6942 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6943 if (R.match(Res, &Groups))
6944 Res = (Groups[1] + I128 + Groups[3]).str();
6945 }
6946 }
6947
6948 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6949 // Raising the alignment is safe because Clang did not produce f80 values in
6950 // the MSVC environment before this upgrade was added.
6951 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6952 StringRef Ref = Res;
6953 auto I = Ref.find("-f80:32-");
6954 if (I != StringRef::npos)
6955 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6956 }
6957
6958 return Res;
6959}
6960
6961void llvm::UpgradeAttributes(AttrBuilder &B) {
6962 StringRef FramePointer;
6963 Attribute A = B.getAttribute("no-frame-pointer-elim");
6964 if (A.isValid()) {
6965 // The value can be "true" or "false".
6966 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6967 B.removeAttribute("no-frame-pointer-elim");
6968 }
6969 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6970 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6971 if (FramePointer != "all")
6972 FramePointer = "non-leaf";
6973 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6974 }
6975 if (!FramePointer.empty())
6976 B.addAttribute("frame-pointer", FramePointer);
6977
6978 A = B.getAttribute("null-pointer-is-valid");
6979 if (A.isValid()) {
6980 // The value can be "true" or "false".
6981 bool NullPointerIsValid = A.getValueAsString() == "true";
6982 B.removeAttribute("null-pointer-is-valid");
6983 if (NullPointerIsValid)
6984 B.addAttribute(Attribute::NullPointerIsValid);
6985 }
6986
6987 A = B.getAttribute("uniform-work-group-size");
6988 if (A.isValid()) {
6989 StringRef Val = A.getValueAsString();
6990 if (!Val.empty()) {
6991 bool IsTrue = Val == "true";
6992 B.removeAttribute("uniform-work-group-size");
6993 if (IsTrue)
6994 B.addAttribute("uniform-work-group-size");
6995 }
6996 }
6997}
6998
6999void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
7000 // clang.arc.attachedcall bundles are now required to have an operand.
7001 // If they don't, it's okay to drop them entirely: when there is an operand,
7002 // the "attachedcall" is meaningful and required, but without an operand,
7003 // it's just a marker NOP. Dropping it merely prevents an optimization.
7004 erase_if(Bundles, [&](OperandBundleDef &OBD) {
7005 return OBD.getTag() == "clang.arc.attachedcall" &&
7006 OBD.inputs().empty();
7007 });
7008}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:484
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ Add
*p = old + v
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ Min
*p = old <signed v ? old : v
@ And
*p = old & v
@ Xor
*p = old ^ v
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ Max
*p = old >signed v ? old : v
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
void setDebugLoc(DebugLoc Loc)
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:867
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:444
size_t arg_size() const
Definition Function.h:875
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:860
static LLVM_ABI GUID getGUIDAssumingExternalLinkage(StringRef GlobalName)
Return a 64-bit global unique ID constructed from the name of a global symbol.
Definition Globals.cpp:80
LinkageTypes getLinkage() const
uint64_t GUID
Declare a type to represent a global unique identifier for a global value.
static StringRef dropLLVMManglingEscape(StringRef Name)
If the given string begins with the GlobalValue name mangling escape character '\1',...
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:577
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2848
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1069
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1433
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1561
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1439
LLVMContext & getContext() const
Definition Metadata.h:1233
Tracking metadata reference owned by Metadata.
Definition Metadata.h:891
A single uniqued string.
Definition Metadata.h:722
LLVM_ABI StringRef getString() const
Definition Metadata.cpp:632
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1518
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1749
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1845
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:889
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:888
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:736
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:597
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:635
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:850
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:477
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:310
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:288
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:309
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:307
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:368
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
static LLVM_ABI IntegerType * getInt16Ty(LLVMContext &C)
Definition Type.cpp:308
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:232
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:285
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:286
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:285
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:284
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:394
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:319
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:400
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:573
@ Length
Definition DWP.cpp:573
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
LLVM_ABI bool UpgradeCFIFunctionsMetadata(Module &M)
Upgrade the cfi.functions metadata node by calculating and inserting the GUID for each function entry...
LLVM_ABI void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:862
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106