LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
51#include "llvm/Support/Regex.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Report a fatal error along with the
67// Call Instruction which caused the error
68[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
69 CallBase *CI) {
70 CI->print(llvm::errs());
71 llvm::errs() << "\n";
73}
74
75// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
76// changed their type from v4f32 to v2i64.
78 Function *&NewFn) {
79 // Check whether this is an old version of the function, which received
80 // v4f32 arguments.
81 Type *Arg0Type = F->getFunctionType()->getParamType(0);
82 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
83 return false;
84
85 // Yes, it's old, replace it with new version.
86 rename(F);
87 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
88 return true;
89}
90
91// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
92// arguments have changed their type from i32 to i8.
94 Function *&NewFn) {
95 // Check that the last argument is an i32.
96 Type *LastArgType = F->getFunctionType()->getParamType(
97 F->getFunctionType()->getNumParams() - 1);
98 if (!LastArgType->isIntegerTy(32))
99 return false;
100
101 // Move this function aside and map down.
102 rename(F);
103 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
104 return true;
105}
106
107// Upgrade the declaration of fp compare intrinsics that change return type
108// from scalar to vXi1 mask.
110 Function *&NewFn) {
111 // Check if the return type is a vector.
112 if (F->getReturnType()->isVectorTy())
113 return false;
114
115 rename(F);
116 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
117 return true;
118}
119
120// Upgrade the declaration of multiply and add bytes intrinsics whose input
121// arguments' types have changed from vectors of i32 to vectors of i8
123 Function *&NewFn) {
124 // check if input argument type is a vector of i8
125 Type *Arg1Type = F->getFunctionType()->getParamType(1);
126 Type *Arg2Type = F->getFunctionType()->getParamType(2);
127 if (Arg1Type->isVectorTy() &&
128 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
129 Arg2Type->isVectorTy() &&
130 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
131 return false;
132
133 rename(F);
134 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
135 return true;
136}
137
138// Upgrade the declaration of multipy and add words intrinsics whose input
139// arguments' types have changed to vectors of i32 to vectors of i16
141 Function *&NewFn) {
142 // check if input argument type is a vector of i16
143 Type *Arg1Type = F->getFunctionType()->getParamType(1);
144 Type *Arg2Type = F->getFunctionType()->getParamType(2);
145 if (Arg1Type->isVectorTy() &&
146 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
147 Arg2Type->isVectorTy() &&
148 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
149 return false;
150
151 rename(F);
152 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
153 return true;
154}
155
157 Function *&NewFn) {
158 if (F->getReturnType()->getScalarType()->isBFloatTy())
159 return false;
160
161 rename(F);
162 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
163 return true;
164}
165
167 Function *&NewFn) {
168 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
169 return false;
170
171 rename(F);
172 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
173 return true;
174}
175
177 // All of the intrinsics matches below should be marked with which llvm
178 // version started autoupgrading them. At some point in the future we would
179 // like to use this information to remove upgrade code for some older
180 // intrinsics. It is currently undecided how we will determine that future
181 // point.
182 if (Name.consume_front("avx."))
183 return (Name.starts_with("blend.p") || // Added in 3.7
184 Name == "cvt.ps2.pd.256" || // Added in 3.9
185 Name == "cvtdq2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.ps.256" || // Added in 7.0
187 Name.starts_with("movnt.") || // Added in 3.2
188 Name.starts_with("sqrt.p") || // Added in 7.0
189 Name.starts_with("storeu.") || // Added in 3.9
190 Name.starts_with("vbroadcast.s") || // Added in 3.5
191 Name.starts_with("vbroadcastf128") || // Added in 4.0
192 Name.starts_with("vextractf128.") || // Added in 3.7
193 Name.starts_with("vinsertf128.") || // Added in 3.7
194 Name.starts_with("vperm2f128.") || // Added in 6.0
195 Name.starts_with("vpermil.")); // Added in 3.1
196
197 if (Name.consume_front("avx2."))
198 return (Name == "movntdqa" || // Added in 5.0
199 Name.starts_with("pabs.") || // Added in 6.0
200 Name.starts_with("padds.") || // Added in 8.0
201 Name.starts_with("paddus.") || // Added in 8.0
202 Name.starts_with("pblendd.") || // Added in 3.7
203 Name == "pblendw" || // Added in 3.7
204 Name.starts_with("pbroadcast") || // Added in 3.8
205 Name.starts_with("pcmpeq.") || // Added in 3.1
206 Name.starts_with("pcmpgt.") || // Added in 3.1
207 Name.starts_with("pmax") || // Added in 3.9
208 Name.starts_with("pmin") || // Added in 3.9
209 Name.starts_with("pmovsx") || // Added in 3.9
210 Name.starts_with("pmovzx") || // Added in 3.9
211 Name == "pmul.dq" || // Added in 7.0
212 Name == "pmulu.dq" || // Added in 7.0
213 Name.starts_with("psll.dq") || // Added in 3.7
214 Name.starts_with("psrl.dq") || // Added in 3.7
215 Name.starts_with("psubs.") || // Added in 8.0
216 Name.starts_with("psubus.") || // Added in 8.0
217 Name.starts_with("vbroadcast") || // Added in 3.8
218 Name == "vbroadcasti128" || // Added in 3.7
219 Name == "vextracti128" || // Added in 3.7
220 Name == "vinserti128" || // Added in 3.7
221 Name == "vperm2i128"); // Added in 6.0
222
223 if (Name.consume_front("avx512.")) {
224 if (Name.consume_front("mask."))
225 // 'avx512.mask.*'
226 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
227 Name.starts_with("and.") || // Added in 3.9
228 Name.starts_with("andn.") || // Added in 3.9
229 Name.starts_with("broadcast.s") || // Added in 3.9
230 Name.starts_with("broadcastf32x4.") || // Added in 6.0
231 Name.starts_with("broadcastf32x8.") || // Added in 6.0
232 Name.starts_with("broadcastf64x2.") || // Added in 6.0
233 Name.starts_with("broadcastf64x4.") || // Added in 6.0
234 Name.starts_with("broadcasti32x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x8.") || // Added in 6.0
236 Name.starts_with("broadcasti64x2.") || // Added in 6.0
237 Name.starts_with("broadcasti64x4.") || // Added in 6.0
238 Name.starts_with("cmp.b") || // Added in 5.0
239 Name.starts_with("cmp.d") || // Added in 5.0
240 Name.starts_with("cmp.q") || // Added in 5.0
241 Name.starts_with("cmp.w") || // Added in 5.0
242 Name.starts_with("compress.b") || // Added in 9.0
243 Name.starts_with("compress.d") || // Added in 9.0
244 Name.starts_with("compress.p") || // Added in 9.0
245 Name.starts_with("compress.q") || // Added in 9.0
246 Name.starts_with("compress.store.") || // Added in 7.0
247 Name.starts_with("compress.w") || // Added in 9.0
248 Name.starts_with("conflict.") || // Added in 9.0
249 Name.starts_with("cvtdq2pd.") || // Added in 4.0
250 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
251 Name == "cvtpd2dq.256" || // Added in 7.0
252 Name == "cvtpd2ps.256" || // Added in 7.0
253 Name == "cvtps2pd.128" || // Added in 7.0
254 Name == "cvtps2pd.256" || // Added in 7.0
255 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
256 Name == "cvtqq2ps.256" || // Added in 9.0
257 Name == "cvtqq2ps.512" || // Added in 9.0
258 Name == "cvttpd2dq.256" || // Added in 7.0
259 Name == "cvttps2dq.128" || // Added in 7.0
260 Name == "cvttps2dq.256" || // Added in 7.0
261 Name.starts_with("cvtudq2pd.") || // Added in 4.0
262 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
263 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
264 Name == "cvtuqq2ps.256" || // Added in 9.0
265 Name == "cvtuqq2ps.512" || // Added in 9.0
266 Name.starts_with("dbpsadbw.") || // Added in 7.0
267 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
268 Name.starts_with("expand.b") || // Added in 9.0
269 Name.starts_with("expand.d") || // Added in 9.0
270 Name.starts_with("expand.load.") || // Added in 7.0
271 Name.starts_with("expand.p") || // Added in 9.0
272 Name.starts_with("expand.q") || // Added in 9.0
273 Name.starts_with("expand.w") || // Added in 9.0
274 Name.starts_with("fpclass.p") || // Added in 7.0
275 Name.starts_with("insert") || // Added in 4.0
276 Name.starts_with("load.") || // Added in 3.9
277 Name.starts_with("loadu.") || // Added in 3.9
278 Name.starts_with("lzcnt.") || // Added in 5.0
279 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
280 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("movddup") || // Added in 3.9
282 Name.starts_with("move.s") || // Added in 4.0
283 Name.starts_with("movshdup") || // Added in 3.9
284 Name.starts_with("movsldup") || // Added in 3.9
285 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
286 Name.starts_with("or.") || // Added in 3.9
287 Name.starts_with("pabs.") || // Added in 6.0
288 Name.starts_with("packssdw.") || // Added in 5.0
289 Name.starts_with("packsswb.") || // Added in 5.0
290 Name.starts_with("packusdw.") || // Added in 5.0
291 Name.starts_with("packuswb.") || // Added in 5.0
292 Name.starts_with("padd.") || // Added in 4.0
293 Name.starts_with("padds.") || // Added in 8.0
294 Name.starts_with("paddus.") || // Added in 8.0
295 Name.starts_with("palignr.") || // Added in 3.9
296 Name.starts_with("pand.") || // Added in 3.9
297 Name.starts_with("pandn.") || // Added in 3.9
298 Name.starts_with("pavg") || // Added in 6.0
299 Name.starts_with("pbroadcast") || // Added in 6.0
300 Name.starts_with("pcmpeq.") || // Added in 3.9
301 Name.starts_with("pcmpgt.") || // Added in 3.9
302 Name.starts_with("perm.df.") || // Added in 3.9
303 Name.starts_with("perm.di.") || // Added in 3.9
304 Name.starts_with("permvar.") || // Added in 7.0
305 Name.starts_with("pmaddubs.w.") || // Added in 7.0
306 Name.starts_with("pmaddw.d.") || // Added in 7.0
307 Name.starts_with("pmax") || // Added in 4.0
308 Name.starts_with("pmin") || // Added in 4.0
309 Name == "pmov.qd.256" || // Added in 9.0
310 Name == "pmov.qd.512" || // Added in 9.0
311 Name == "pmov.wb.256" || // Added in 9.0
312 Name == "pmov.wb.512" || // Added in 9.0
313 Name.starts_with("pmovsx") || // Added in 4.0
314 Name.starts_with("pmovzx") || // Added in 4.0
315 Name.starts_with("pmul.dq.") || // Added in 4.0
316 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
317 Name.starts_with("pmulh.w.") || // Added in 7.0
318 Name.starts_with("pmulhu.w.") || // Added in 7.0
319 Name.starts_with("pmull.") || // Added in 4.0
320 Name.starts_with("pmultishift.qb.") || // Added in 8.0
321 Name.starts_with("pmulu.dq.") || // Added in 4.0
322 Name.starts_with("por.") || // Added in 3.9
323 Name.starts_with("prol.") || // Added in 8.0
324 Name.starts_with("prolv.") || // Added in 8.0
325 Name.starts_with("pror.") || // Added in 8.0
326 Name.starts_with("prorv.") || // Added in 8.0
327 Name.starts_with("pshuf.b.") || // Added in 4.0
328 Name.starts_with("pshuf.d.") || // Added in 3.9
329 Name.starts_with("pshufh.w.") || // Added in 3.9
330 Name.starts_with("pshufl.w.") || // Added in 3.9
331 Name.starts_with("psll.d") || // Added in 4.0
332 Name.starts_with("psll.q") || // Added in 4.0
333 Name.starts_with("psll.w") || // Added in 4.0
334 Name.starts_with("pslli") || // Added in 4.0
335 Name.starts_with("psllv") || // Added in 4.0
336 Name.starts_with("psra.d") || // Added in 4.0
337 Name.starts_with("psra.q") || // Added in 4.0
338 Name.starts_with("psra.w") || // Added in 4.0
339 Name.starts_with("psrai") || // Added in 4.0
340 Name.starts_with("psrav") || // Added in 4.0
341 Name.starts_with("psrl.d") || // Added in 4.0
342 Name.starts_with("psrl.q") || // Added in 4.0
343 Name.starts_with("psrl.w") || // Added in 4.0
344 Name.starts_with("psrli") || // Added in 4.0
345 Name.starts_with("psrlv") || // Added in 4.0
346 Name.starts_with("psub.") || // Added in 4.0
347 Name.starts_with("psubs.") || // Added in 8.0
348 Name.starts_with("psubus.") || // Added in 8.0
349 Name.starts_with("pternlog.") || // Added in 7.0
350 Name.starts_with("punpckh") || // Added in 3.9
351 Name.starts_with("punpckl") || // Added in 3.9
352 Name.starts_with("pxor.") || // Added in 3.9
353 Name.starts_with("shuf.f") || // Added in 6.0
354 Name.starts_with("shuf.i") || // Added in 6.0
355 Name.starts_with("shuf.p") || // Added in 4.0
356 Name.starts_with("sqrt.p") || // Added in 7.0
357 Name.starts_with("store.b.") || // Added in 3.9
358 Name.starts_with("store.d.") || // Added in 3.9
359 Name.starts_with("store.p") || // Added in 3.9
360 Name.starts_with("store.q.") || // Added in 3.9
361 Name.starts_with("store.w.") || // Added in 3.9
362 Name == "store.ss" || // Added in 7.0
363 Name.starts_with("storeu.") || // Added in 3.9
364 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
365 Name.starts_with("ucmp.") || // Added in 5.0
366 Name.starts_with("unpckh.") || // Added in 3.9
367 Name.starts_with("unpckl.") || // Added in 3.9
368 Name.starts_with("valign.") || // Added in 4.0
369 Name == "vcvtph2ps.128" || // Added in 11.0
370 Name == "vcvtph2ps.256" || // Added in 11.0
371 Name.starts_with("vextract") || // Added in 4.0
372 Name.starts_with("vfmadd.") || // Added in 7.0
373 Name.starts_with("vfmaddsub.") || // Added in 7.0
374 Name.starts_with("vfnmadd.") || // Added in 7.0
375 Name.starts_with("vfnmsub.") || // Added in 7.0
376 Name.starts_with("vpdpbusd.") || // Added in 7.0
377 Name.starts_with("vpdpbusds.") || // Added in 7.0
378 Name.starts_with("vpdpwssd.") || // Added in 7.0
379 Name.starts_with("vpdpwssds.") || // Added in 7.0
380 Name.starts_with("vpermi2var.") || // Added in 7.0
381 Name.starts_with("vpermil.p") || // Added in 3.9
382 Name.starts_with("vpermilvar.") || // Added in 4.0
383 Name.starts_with("vpermt2var.") || // Added in 7.0
384 Name.starts_with("vpmadd52") || // Added in 7.0
385 Name.starts_with("vpshld.") || // Added in 7.0
386 Name.starts_with("vpshldv.") || // Added in 8.0
387 Name.starts_with("vpshrd.") || // Added in 7.0
388 Name.starts_with("vpshrdv.") || // Added in 8.0
389 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
390 Name.starts_with("xor.")); // Added in 3.9
391
392 if (Name.consume_front("mask3."))
393 // 'avx512.mask3.*'
394 return (Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmaddsub.") || // Added in 7.0
396 Name.starts_with("vfmsub.") || // Added in 7.0
397 Name.starts_with("vfmsubadd.") || // Added in 7.0
398 Name.starts_with("vfnmsub.")); // Added in 7.0
399
400 if (Name.consume_front("maskz."))
401 // 'avx512.maskz.*'
402 return (Name.starts_with("pternlog.") || // Added in 7.0
403 Name.starts_with("vfmadd.") || // Added in 7.0
404 Name.starts_with("vfmaddsub.") || // Added in 7.0
405 Name.starts_with("vpdpbusd.") || // Added in 7.0
406 Name.starts_with("vpdpbusds.") || // Added in 7.0
407 Name.starts_with("vpdpwssd.") || // Added in 7.0
408 Name.starts_with("vpdpwssds.") || // Added in 7.0
409 Name.starts_with("vpermt2var.") || // Added in 7.0
410 Name.starts_with("vpmadd52") || // Added in 7.0
411 Name.starts_with("vpshldv.") || // Added in 8.0
412 Name.starts_with("vpshrdv.")); // Added in 8.0
413
414 // 'avx512.*'
415 return (Name == "movntdqa" || // Added in 5.0
416 Name == "pmul.dq.512" || // Added in 7.0
417 Name == "pmulu.dq.512" || // Added in 7.0
418 Name.starts_with("broadcastm") || // Added in 6.0
419 Name.starts_with("cmp.p") || // Added in 12.0
420 Name.starts_with("cvtb2mask.") || // Added in 7.0
421 Name.starts_with("cvtd2mask.") || // Added in 7.0
422 Name.starts_with("cvtmask2") || // Added in 5.0
423 Name.starts_with("cvtq2mask.") || // Added in 7.0
424 Name == "cvtusi2sd" || // Added in 7.0
425 Name.starts_with("cvtw2mask.") || // Added in 7.0
426 Name == "kand.w" || // Added in 7.0
427 Name == "kandn.w" || // Added in 7.0
428 Name == "knot.w" || // Added in 7.0
429 Name == "kor.w" || // Added in 7.0
430 Name == "kortestc.w" || // Added in 7.0
431 Name == "kortestz.w" || // Added in 7.0
432 Name.starts_with("kunpck") || // added in 6.0
433 Name == "kxnor.w" || // Added in 7.0
434 Name == "kxor.w" || // Added in 7.0
435 Name.starts_with("padds.") || // Added in 8.0
436 Name.starts_with("pbroadcast") || // Added in 3.9
437 Name.starts_with("prol") || // Added in 8.0
438 Name.starts_with("pror") || // Added in 8.0
439 Name.starts_with("psll.dq") || // Added in 3.9
440 Name.starts_with("psrl.dq") || // Added in 3.9
441 Name.starts_with("psubs.") || // Added in 8.0
442 Name.starts_with("ptestm") || // Added in 6.0
443 Name.starts_with("ptestnm") || // Added in 6.0
444 Name.starts_with("storent.") || // Added in 3.9
445 Name.starts_with("vbroadcast.s") || // Added in 7.0
446 Name.starts_with("vpshld.") || // Added in 8.0
447 Name.starts_with("vpshrd.")); // Added in 8.0
448 }
449
450 if (Name.consume_front("fma."))
451 return (Name.starts_with("vfmadd.") || // Added in 7.0
452 Name.starts_with("vfmsub.") || // Added in 7.0
453 Name.starts_with("vfmsubadd.") || // Added in 7.0
454 Name.starts_with("vfnmadd.") || // Added in 7.0
455 Name.starts_with("vfnmsub.")); // Added in 7.0
456
457 if (Name.consume_front("fma4."))
458 return Name.starts_with("vfmadd.s"); // Added in 7.0
459
460 if (Name.consume_front("sse."))
461 return (Name == "add.ss" || // Added in 4.0
462 Name == "cvtsi2ss" || // Added in 7.0
463 Name == "cvtsi642ss" || // Added in 7.0
464 Name == "div.ss" || // Added in 4.0
465 Name == "mul.ss" || // Added in 4.0
466 Name.starts_with("sqrt.p") || // Added in 7.0
467 Name == "sqrt.ss" || // Added in 7.0
468 Name.starts_with("storeu.") || // Added in 3.9
469 Name == "sub.ss"); // Added in 4.0
470
471 if (Name.consume_front("sse2."))
472 return (Name == "add.sd" || // Added in 4.0
473 Name == "cvtdq2pd" || // Added in 3.9
474 Name == "cvtdq2ps" || // Added in 7.0
475 Name == "cvtps2pd" || // Added in 3.9
476 Name == "cvtsi2sd" || // Added in 7.0
477 Name == "cvtsi642sd" || // Added in 7.0
478 Name == "cvtss2sd" || // Added in 7.0
479 Name == "div.sd" || // Added in 4.0
480 Name == "mul.sd" || // Added in 4.0
481 Name.starts_with("padds.") || // Added in 8.0
482 Name.starts_with("paddus.") || // Added in 8.0
483 Name.starts_with("pcmpeq.") || // Added in 3.1
484 Name.starts_with("pcmpgt.") || // Added in 3.1
485 Name == "pmaxs.w" || // Added in 3.9
486 Name == "pmaxu.b" || // Added in 3.9
487 Name == "pmins.w" || // Added in 3.9
488 Name == "pminu.b" || // Added in 3.9
489 Name == "pmulu.dq" || // Added in 7.0
490 Name.starts_with("pshuf") || // Added in 3.9
491 Name.starts_with("psll.dq") || // Added in 3.7
492 Name.starts_with("psrl.dq") || // Added in 3.7
493 Name.starts_with("psubs.") || // Added in 8.0
494 Name.starts_with("psubus.") || // Added in 8.0
495 Name.starts_with("sqrt.p") || // Added in 7.0
496 Name == "sqrt.sd" || // Added in 7.0
497 Name == "storel.dq" || // Added in 3.9
498 Name.starts_with("storeu.") || // Added in 3.9
499 Name == "sub.sd"); // Added in 4.0
500
501 if (Name.consume_front("sse41."))
502 return (Name.starts_with("blendp") || // Added in 3.7
503 Name == "movntdqa" || // Added in 5.0
504 Name == "pblendw" || // Added in 3.7
505 Name == "pmaxsb" || // Added in 3.9
506 Name == "pmaxsd" || // Added in 3.9
507 Name == "pmaxud" || // Added in 3.9
508 Name == "pmaxuw" || // Added in 3.9
509 Name == "pminsb" || // Added in 3.9
510 Name == "pminsd" || // Added in 3.9
511 Name == "pminud" || // Added in 3.9
512 Name == "pminuw" || // Added in 3.9
513 Name.starts_with("pmovsx") || // Added in 3.8
514 Name.starts_with("pmovzx") || // Added in 3.9
515 Name == "pmuldq"); // Added in 7.0
516
517 if (Name.consume_front("sse42."))
518 return Name == "crc32.64.8"; // Added in 3.4
519
520 if (Name.consume_front("sse4a."))
521 return Name.starts_with("movnt."); // Added in 3.9
522
523 if (Name.consume_front("ssse3."))
524 return (Name == "pabs.b.128" || // Added in 6.0
525 Name == "pabs.d.128" || // Added in 6.0
526 Name == "pabs.w.128"); // Added in 6.0
527
528 if (Name.consume_front("xop."))
529 return (Name == "vpcmov" || // Added in 3.8
530 Name == "vpcmov.256" || // Added in 5.0
531 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
532 Name.starts_with("vprot")); // Added in 8.0
533
534 return (Name == "addcarry.u32" || // Added in 8.0
535 Name == "addcarry.u64" || // Added in 8.0
536 Name == "addcarryx.u32" || // Added in 8.0
537 Name == "addcarryx.u64" || // Added in 8.0
538 Name == "subborrow.u32" || // Added in 8.0
539 Name == "subborrow.u64" || // Added in 8.0
540 Name.starts_with("vcvtph2ps.")); // Added in 11.0
541}
542
544 Function *&NewFn) {
545 // Only handle intrinsics that start with "x86.".
546 if (!Name.consume_front("x86."))
547 return false;
548
549 if (shouldUpgradeX86Intrinsic(F, Name)) {
550 NewFn = nullptr;
551 return true;
552 }
553
554 if (Name == "rdtscp") { // Added in 8.0
555 // If this intrinsic has 0 operands, it's the new version.
556 if (F->getFunctionType()->getNumParams() == 0)
557 return false;
558
559 rename(F);
560 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
561 Intrinsic::x86_rdtscp);
562 return true;
563 }
564
566
567 // SSE4.1 ptest functions may have an old signature.
568 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
570 .Case("c", Intrinsic::x86_sse41_ptestc)
571 .Case("z", Intrinsic::x86_sse41_ptestz)
572 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
575 return upgradePTESTIntrinsic(F, ID, NewFn);
576
577 return false;
578 }
579
580 // Several blend and other instructions with masks used the wrong number of
581 // bits.
582
583 // Added in 3.6
585 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
586 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
587 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
588 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
589 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
590 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
593 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
594
595 if (Name.consume_front("avx512.")) {
596 if (Name.consume_front("mask.cmp.")) {
597 // Added in 7.0
599 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
600 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
601 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
602 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
603 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
604 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
607 return upgradeX86MaskedFPCompare(F, ID, NewFn);
608 } else if (Name.starts_with("vpdpbusd.") ||
609 Name.starts_with("vpdpbusds.")) {
610 // Added in 21.1
612 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
613 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
614 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
615 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
616 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
617 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
620 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
621 } else if (Name.starts_with("vpdpwssd.") ||
622 Name.starts_with("vpdpwssds.")) {
623 // Added in 21.1
625 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
626 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
627 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
628 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
629 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
630 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
633 return upgradeX86MultiplyAddWords(F, ID, NewFn);
634 }
635 return false; // No other 'x86.avx512.*'.
636 }
637
638 if (Name.consume_front("avx2.")) {
639 if (Name.consume_front("vpdpb")) {
640 // Added in 21.1
642 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
643 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
644 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
645 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
646 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
647 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
648 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
649 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
650 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
651 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
652 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
653 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
656 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
657 } else if (Name.consume_front("vpdpw")) {
658 // Added in 21.1
660 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
661 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
662 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
663 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
664 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
665 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
666 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
667 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
668 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
669 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
670 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
671 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
674 return upgradeX86MultiplyAddWords(F, ID, NewFn);
675 }
676 return false; // No other 'x86.avx2.*'
677 }
678
679 if (Name.consume_front("avx10.")) {
680 if (Name.consume_front("vpdpb")) {
681 // Added in 21.1
683 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
684 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
685 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
686 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
687 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
688 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
691 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
692 } else if (Name.consume_front("vpdpw")) {
694 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
695 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
696 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
697 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
698 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
699 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
702 return upgradeX86MultiplyAddWords(F, ID, NewFn);
703 }
704 return false; // No other 'x86.avx10.*'
705 }
706
707 if (Name.consume_front("avx512bf16.")) {
708 // Added in 9.0
710 .Case("cvtne2ps2bf16.128",
711 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
712 .Case("cvtne2ps2bf16.256",
713 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
714 .Case("cvtne2ps2bf16.512",
715 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
716 .Case("mask.cvtneps2bf16.128",
717 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
718 .Case("cvtneps2bf16.256",
719 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
720 .Case("cvtneps2bf16.512",
721 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
724 return upgradeX86BF16Intrinsic(F, ID, NewFn);
725
726 // Added in 9.0
728 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
729 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
730 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
733 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
734 return false; // No other 'x86.avx512bf16.*'.
735 }
736
737 if (Name.consume_front("xop.")) {
739 if (Name.starts_with("vpermil2")) { // Added in 3.9
740 // Upgrade any XOP PERMIL2 index operand still using a float/double
741 // vector.
742 auto Idx = F->getFunctionType()->getParamType(2);
743 if (Idx->isFPOrFPVectorTy()) {
744 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
745 unsigned EltSize = Idx->getScalarSizeInBits();
746 if (EltSize == 64 && IdxSize == 128)
747 ID = Intrinsic::x86_xop_vpermil2pd;
748 else if (EltSize == 32 && IdxSize == 128)
749 ID = Intrinsic::x86_xop_vpermil2ps;
750 else if (EltSize == 64 && IdxSize == 256)
751 ID = Intrinsic::x86_xop_vpermil2pd_256;
752 else
753 ID = Intrinsic::x86_xop_vpermil2ps_256;
754 }
755 } else if (F->arg_size() == 2)
756 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
758 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
759 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
761
763 rename(F);
764 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
765 return true;
766 }
767 return false; // No other 'x86.xop.*'
768 }
769
770 if (Name == "seh.recoverfp") {
771 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
772 Intrinsic::eh_recoverfp);
773 return true;
774 }
775
776 return false;
777}
778
779// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
780// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
782 StringRef Name,
783 Function *&NewFn) {
784 if (Name.starts_with("rbit")) {
785 // '(arm|aarch64).rbit'.
787 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
788 return true;
789 }
790
791 if (Name == "thread.pointer") {
792 // '(arm|aarch64).thread.pointer'.
794 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
795 return true;
796 }
797
798 bool Neon = Name.consume_front("neon.");
799 if (Neon) {
800 // '(arm|aarch64).neon.*'.
801 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
802 // v16i8 respectively.
803 if (Name.consume_front("bfdot.")) {
804 // (arm|aarch64).neon.bfdot.*'.
807 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
808 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
809 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
812 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
813 assert((OperandWidth == 64 || OperandWidth == 128) &&
814 "Unexpected operand width");
815 LLVMContext &Ctx = F->getParent()->getContext();
816 std::array<Type *, 2> Tys{
817 {F->getReturnType(),
818 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
819 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
820 return true;
821 }
822 return false; // No other '(arm|aarch64).neon.bfdot.*'.
823 }
824
825 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
826 // anymore and accept v8bf16 instead of v16i8.
827 if (Name.consume_front("bfm")) {
828 // (arm|aarch64).neon.bfm*'.
829 if (Name.consume_back(".v4f32.v16i8")) {
830 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
833 .Case("mla",
834 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
835 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
836 .Case("lalb",
837 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
838 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
839 .Case("lalt",
840 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
841 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
844 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
845 return true;
846 }
847 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
848 }
849 return false; // No other '(arm|aarch64).neon.bfm*.
850 }
851 // Continue on to Aarch64 Neon or Arm Neon.
852 }
853 // Continue on to Arm or Aarch64.
854
855 if (IsArm) {
856 // 'arm.*'.
857 if (Neon) {
858 // 'arm.neon.*'.
860 .StartsWith("vclz.", Intrinsic::ctlz)
861 .StartsWith("vcnt.", Intrinsic::ctpop)
862 .StartsWith("vqadds.", Intrinsic::sadd_sat)
863 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
864 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
865 .StartsWith("vqsubu.", Intrinsic::usub_sat)
866 .StartsWith("vrinta.", Intrinsic::round)
867 .StartsWith("vrintn.", Intrinsic::roundeven)
868 .StartsWith("vrintm.", Intrinsic::floor)
869 .StartsWith("vrintp.", Intrinsic::ceil)
870 .StartsWith("vrintx.", Intrinsic::rint)
871 .StartsWith("vrintz.", Intrinsic::trunc)
874 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
875 F->arg_begin()->getType());
876 return true;
877 }
878
879 if (Name.consume_front("vst")) {
880 // 'arm.neon.vst*'.
881 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
883 if (vstRegex.match(Name, &Groups)) {
884 static const Intrinsic::ID StoreInts[] = {
885 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
886 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
887
888 static const Intrinsic::ID StoreLaneInts[] = {
889 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
890 Intrinsic::arm_neon_vst4lane};
891
892 auto fArgs = F->getFunctionType()->params();
893 Type *Tys[] = {fArgs[0], fArgs[1]};
894 if (Groups[1].size() == 1)
896 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
897 else
899 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
900 return true;
901 }
902 return false; // No other 'arm.neon.vst*'.
903 }
904
905 return false; // No other 'arm.neon.*'.
906 }
907
908 if (Name.consume_front("mve.")) {
909 // 'arm.mve.*'.
910 if (Name == "vctp64") {
911 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
912 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
913 // the function and deal with it below in UpgradeIntrinsicCall.
914 rename(F);
915 return true;
916 }
917 return false; // Not 'arm.mve.vctp64'.
918 }
919
920 if (Name.starts_with("vrintn.v")) {
922 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
923 return true;
924 }
925
926 // These too are changed to accept a v2i1 instead of the old v4i1.
927 if (Name.consume_back(".v4i1")) {
928 // 'arm.mve.*.v4i1'.
929 if (Name.consume_back(".predicated.v2i64.v4i32"))
930 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
931 return Name == "mull.int" || Name == "vqdmull";
932
933 if (Name.consume_back(".v2i64")) {
934 // 'arm.mve.*.v2i64.v4i1'
935 bool IsGather = Name.consume_front("vldr.gather.");
936 if (IsGather || Name.consume_front("vstr.scatter.")) {
937 if (Name.consume_front("base.")) {
938 // Optional 'wb.' prefix.
939 Name.consume_front("wb.");
940 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
941 // predicated.v2i64.v2i64.v4i1'.
942 return Name == "predicated.v2i64";
943 }
944
945 if (Name.consume_front("offset.predicated."))
946 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
947 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
948
949 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
950 return false;
951 }
952
953 return false; // No other 'arm.mve.*.v2i64.v4i1'.
954 }
955 return false; // No other 'arm.mve.*.v4i1'.
956 }
957 return false; // No other 'arm.mve.*'.
958 }
959
960 if (Name.consume_front("cde.vcx")) {
961 // 'arm.cde.vcx*'.
962 if (Name.consume_back(".predicated.v2i64.v4i1"))
963 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
964 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
965 Name == "3q" || Name == "3qa";
966
967 return false; // No other 'arm.cde.vcx*'.
968 }
969 } else {
970 // 'aarch64.*'.
971 if (Neon) {
972 // 'aarch64.neon.*'.
974 .StartsWith("frintn", Intrinsic::roundeven)
975 .StartsWith("rbit", Intrinsic::bitreverse)
978 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
979 F->arg_begin()->getType());
980 return true;
981 }
982
983 if (Name.starts_with("addp")) {
984 // 'aarch64.neon.addp*'.
985 if (F->arg_size() != 2)
986 return false; // Invalid IR.
987 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
988 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
990 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
991 return true;
992 }
993 }
994
995 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
996 if (Name.starts_with("bfcvt")) {
997 NewFn = nullptr;
998 return true;
999 }
1000
1001 return false; // No other 'aarch64.neon.*'.
1002 }
1003 if (Name.consume_front("sve.")) {
1004 // 'aarch64.sve.*'.
1005 if (Name.consume_front("bf")) {
1006 if (Name.consume_back(".lane")) {
1007 // 'aarch64.sve.bf*.lane'.
1010 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1011 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1012 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1015 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1016 return true;
1017 }
1018 return false; // No other 'aarch64.sve.bf*.lane'.
1019 }
1020 return false; // No other 'aarch64.sve.bf*'.
1021 }
1022
1023 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1024 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1025 NewFn = nullptr;
1026 return true;
1027 }
1028
1029 if (Name.consume_front("addqv")) {
1030 // 'aarch64.sve.addqv'.
1031 if (!F->getReturnType()->isFPOrFPVectorTy())
1032 return false;
1033
1034 auto Args = F->getFunctionType()->params();
1035 Type *Tys[] = {F->getReturnType(), Args[1]};
1037 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1038 return true;
1039 }
1040
1041 if (Name.consume_front("ld")) {
1042 // 'aarch64.sve.ld*'.
1043 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1044 if (LdRegex.match(Name)) {
1045 Type *ScalarTy =
1046 cast<VectorType>(F->getReturnType())->getElementType();
1047 ElementCount EC =
1048 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1049 Type *Ty = VectorType::get(ScalarTy, EC);
1050 static const Intrinsic::ID LoadIDs[] = {
1051 Intrinsic::aarch64_sve_ld2_sret,
1052 Intrinsic::aarch64_sve_ld3_sret,
1053 Intrinsic::aarch64_sve_ld4_sret,
1054 };
1055 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1056 LoadIDs[Name[0] - '2'], Ty);
1057 return true;
1058 }
1059 return false; // No other 'aarch64.sve.ld*'.
1060 }
1061
1062 if (Name.consume_front("tuple.")) {
1063 // 'aarch64.sve.tuple.*'.
1064 if (Name.starts_with("get")) {
1065 // 'aarch64.sve.tuple.get*'.
1066 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1068 F->getParent(), Intrinsic::vector_extract, Tys);
1069 return true;
1070 }
1071
1072 if (Name.starts_with("set")) {
1073 // 'aarch64.sve.tuple.set*'.
1074 auto Args = F->getFunctionType()->params();
1075 Type *Tys[] = {Args[0], Args[2], Args[1]};
1077 F->getParent(), Intrinsic::vector_insert, Tys);
1078 return true;
1079 }
1080
1081 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1082 if (CreateTupleRegex.match(Name)) {
1083 // 'aarch64.sve.tuple.create*'.
1084 auto Args = F->getFunctionType()->params();
1085 Type *Tys[] = {F->getReturnType(), Args[1]};
1087 F->getParent(), Intrinsic::vector_insert, Tys);
1088 return true;
1089 }
1090 return false; // No other 'aarch64.sve.tuple.*'.
1091 }
1092
1093 if (Name.starts_with("rev.nxv")) {
1094 // 'aarch64.sve.rev.<Ty>'
1096 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1097 return true;
1098 }
1099
1100 return false; // No other 'aarch64.sve.*'.
1101 }
1102 }
1103 return false; // No other 'arm.*', 'aarch64.*'.
1104}
1105
1107 StringRef Name) {
1108 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1111 .Case("im2col.3d",
1112 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1113 .Case("im2col.4d",
1114 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1115 .Case("im2col.5d",
1116 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1117 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1118 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1119 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1120 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1121 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1123
1125 return ID;
1126
1127 // These intrinsics may need upgrade for two reasons:
1128 // (1) When the address-space of the first argument is shared[AS=3]
1129 // (and we upgrade it to use shared_cluster address-space[AS=7])
1130 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1132 return ID;
1133
1134 // (2) When there are only two boolean flag arguments at the end:
1135 //
1136 // The last three parameters of the older version of these
1137 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1138 //
1139 // The newer version reads as:
1140 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1141 //
1142 // So, when the type of the [N-3]rd argument is "not i1", then
1143 // it is the older version and we need to upgrade.
1144 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1145 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1146 if (!ArgType->isIntegerTy(1))
1147 return ID;
1148 }
1149
1151}
1152
1154 StringRef Name) {
1155 if (Name.consume_front("mapa.shared.cluster"))
1156 if (F->getReturnType()->getPointerAddressSpace() ==
1158 return Intrinsic::nvvm_mapa_shared_cluster;
1159
1160 if (Name.consume_front("cp.async.bulk.")) {
1163 .Case("global.to.shared.cluster",
1164 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1165 .Case("shared.cta.to.cluster",
1166 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1168
1170 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1172 return ID;
1173 }
1174
1176}
1177
1179 if (Name.consume_front("fma.rn."))
1180 return StringSwitch<Intrinsic::ID>(Name)
1181 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1182 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1183 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1184 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1186
1187 if (Name.consume_front("fmax."))
1188 return StringSwitch<Intrinsic::ID>(Name)
1189 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1190 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1191 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1192 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1193 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1194 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1195 .Case("ftz.nan.xorsign.abs.bf16",
1196 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1197 .Case("ftz.nan.xorsign.abs.bf16x2",
1198 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1199 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1200 .Case("ftz.xorsign.abs.bf16x2",
1201 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1202 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1203 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1204 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1205 .Case("nan.xorsign.abs.bf16x2",
1206 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1207 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1208 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1210
1211 if (Name.consume_front("fmin."))
1212 return StringSwitch<Intrinsic::ID>(Name)
1213 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1214 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1215 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1216 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1217 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1218 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1219 .Case("ftz.nan.xorsign.abs.bf16",
1220 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1221 .Case("ftz.nan.xorsign.abs.bf16x2",
1222 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1223 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1224 .Case("ftz.xorsign.abs.bf16x2",
1225 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1226 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1227 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1228 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1229 .Case("nan.xorsign.abs.bf16x2",
1230 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1231 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1232 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1234
1235 if (Name.consume_front("neg."))
1236 return StringSwitch<Intrinsic::ID>(Name)
1237 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1238 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1240
1242}
1243
1245 return Name.consume_front("local") || Name.consume_front("shared") ||
1246 Name.consume_front("global") || Name.consume_front("constant") ||
1247 Name.consume_front("param");
1248}
1249
1251 const FunctionType *FuncTy) {
1252 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1253 if (Name.starts_with("to.fp16")) {
1254 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1255 HalfTy) &&
1256 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1257 FuncTy->getReturnType());
1258 }
1259
1260 if (Name.starts_with("from.fp16")) {
1261 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1262 HalfTy) &&
1263 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1264 FuncTy->getReturnType());
1265 }
1266
1267 return false;
1268}
1269
1271 bool CanUpgradeDebugIntrinsicsToRecords) {
1272 assert(F && "Illegal to upgrade a non-existent Function.");
1273
1274 StringRef Name = F->getName();
1275
1276 // Quickly eliminate it, if it's not a candidate.
1277 if (!Name.consume_front("llvm.") || Name.empty())
1278 return false;
1279
1280 switch (Name[0]) {
1281 default: break;
1282 case 'a': {
1283 bool IsArm = Name.consume_front("arm.");
1284 if (IsArm || Name.consume_front("aarch64.")) {
1285 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1286 return true;
1287 break;
1288 }
1289
1290 if (Name.consume_front("amdgcn.")) {
1291 if (Name == "alignbit") {
1292 // Target specific intrinsic became redundant
1294 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1295 return true;
1296 }
1297
1298 if (Name.consume_front("atomic.")) {
1299 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1300 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1301 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1302 // and usub_sat so there's no new declaration.
1303 NewFn = nullptr;
1304 return true;
1305 }
1306 break; // No other 'amdgcn.atomic.*'
1307 }
1308
1309 // Legacy wmma iu intrinsics without the optional clamp operand.
1310 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1311 F->arg_size() == 7) {
1312 NewFn = nullptr;
1313 return true;
1314 }
1315 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1316 F->arg_size() == 8) {
1317 NewFn = nullptr;
1318 return true;
1319 }
1320
1321 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1322 Name.consume_front("flat.atomic.")) {
1323 if (Name.starts_with("fadd") ||
1324 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1325 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1326 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1327 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1328 // declaration.
1329 NewFn = nullptr;
1330 return true;
1331 }
1332 }
1333
1334 if (Name.starts_with("ldexp.")) {
1335 // Target specific intrinsic became redundant
1337 F->getParent(), Intrinsic::ldexp,
1338 {F->getReturnType(), F->getArg(1)->getType()});
1339 return true;
1340 }
1341 break; // No other 'amdgcn.*'
1342 }
1343
1344 break;
1345 }
1346 case 'c': {
1347 if (F->arg_size() == 1) {
1348 if (Name.consume_front("convert.")) {
1349 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1350 NewFn = nullptr;
1351 return true;
1352 }
1353 }
1354
1356 .StartsWith("ctlz.", Intrinsic::ctlz)
1357 .StartsWith("cttz.", Intrinsic::cttz)
1360 rename(F);
1361 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1362 F->arg_begin()->getType());
1363 return true;
1364 }
1365 }
1366
1367 if (F->arg_size() == 2 && Name == "coro.end") {
1368 rename(F);
1369 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1370 Intrinsic::coro_end);
1371 return true;
1372 }
1373
1374 break;
1375 }
1376 case 'd':
1377 if (Name.consume_front("dbg.")) {
1378 // Mark debug intrinsics for upgrade to new debug format.
1379 if (CanUpgradeDebugIntrinsicsToRecords) {
1380 if (Name == "addr" || Name == "value" || Name == "assign" ||
1381 Name == "declare" || Name == "label") {
1382 // There's no function to replace these with.
1383 NewFn = nullptr;
1384 // But we do want these to get upgraded.
1385 return true;
1386 }
1387 }
1388 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1389 // converted to DbgVariableRecords later.
1390 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1393 Intrinsic::dbg_value);
1394 return true;
1395 }
1396 break; // No other 'dbg.*'.
1397 }
1398 break;
1399 case 'e':
1400 if (Name.consume_front("experimental.vector.")) {
1403 // Skip over extract.last.active, otherwise it will be 'upgraded'
1404 // to a regular vector extract which is a different operation.
1405 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1406 .StartsWith("extract.", Intrinsic::vector_extract)
1407 .StartsWith("insert.", Intrinsic::vector_insert)
1408 .StartsWith("reverse.", Intrinsic::vector_reverse)
1409 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1410 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1411 .StartsWith("partial.reduce.add",
1412 Intrinsic::vector_partial_reduce_add)
1415 const auto *FT = F->getFunctionType();
1417 if (ID == Intrinsic::vector_extract ||
1418 ID == Intrinsic::vector_interleave2)
1419 // Extracting overloads the return type.
1420 Tys.push_back(FT->getReturnType());
1421 if (ID != Intrinsic::vector_interleave2)
1422 Tys.push_back(FT->getParamType(0));
1423 if (ID == Intrinsic::vector_insert ||
1424 ID == Intrinsic::vector_partial_reduce_add)
1425 // Inserting overloads the inserted type.
1426 Tys.push_back(FT->getParamType(1));
1427 rename(F);
1428 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1429 return true;
1430 }
1431
1432 if (Name.consume_front("reduce.")) {
1434 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1435 if (R.match(Name, &Groups))
1437 .Case("add", Intrinsic::vector_reduce_add)
1438 .Case("mul", Intrinsic::vector_reduce_mul)
1439 .Case("and", Intrinsic::vector_reduce_and)
1440 .Case("or", Intrinsic::vector_reduce_or)
1441 .Case("xor", Intrinsic::vector_reduce_xor)
1442 .Case("smax", Intrinsic::vector_reduce_smax)
1443 .Case("smin", Intrinsic::vector_reduce_smin)
1444 .Case("umax", Intrinsic::vector_reduce_umax)
1445 .Case("umin", Intrinsic::vector_reduce_umin)
1446 .Case("fmax", Intrinsic::vector_reduce_fmax)
1447 .Case("fmin", Intrinsic::vector_reduce_fmin)
1449
1450 bool V2 = false;
1452 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1453 Groups.clear();
1454 V2 = true;
1455 if (R2.match(Name, &Groups))
1457 .Case("fadd", Intrinsic::vector_reduce_fadd)
1458 .Case("fmul", Intrinsic::vector_reduce_fmul)
1460 }
1462 rename(F);
1463 auto Args = F->getFunctionType()->params();
1464 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1465 {Args[V2 ? 1 : 0]});
1466 return true;
1467 }
1468 break; // No other 'expermental.vector.reduce.*'.
1469 }
1470
1471 if (Name.consume_front("splice"))
1472 return true;
1473 break; // No other 'experimental.vector.*'.
1474 }
1475 if (Name.consume_front("experimental.stepvector.")) {
1476 Intrinsic::ID ID = Intrinsic::stepvector;
1477 rename(F);
1479 F->getParent(), ID, F->getFunctionType()->getReturnType());
1480 return true;
1481 }
1482 break; // No other 'e*'.
1483 case 'f':
1484 if (Name.starts_with("flt.rounds")) {
1485 rename(F);
1486 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1487 Intrinsic::get_rounding);
1488 return true;
1489 }
1490 break;
1491 case 'i':
1492 if (Name.starts_with("invariant.group.barrier")) {
1493 // Rename invariant.group.barrier to launder.invariant.group
1494 auto Args = F->getFunctionType()->params();
1495 Type* ObjectPtr[1] = {Args[0]};
1496 rename(F);
1498 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1499 return true;
1500 }
1501 break;
1502 case 'l':
1503 if ((Name.starts_with("lifetime.start") ||
1504 Name.starts_with("lifetime.end")) &&
1505 F->arg_size() == 2) {
1506 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1507 ? Intrinsic::lifetime_start
1508 : Intrinsic::lifetime_end;
1509 rename(F);
1510 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1511 F->getArg(0)->getType());
1512 return true;
1513 }
1514 break;
1515 case 'm': {
1516 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1517 // alignment parameter to embedding the alignment as an attribute of
1518 // the pointer args.
1519 if (unsigned ID = StringSwitch<unsigned>(Name)
1520 .StartsWith("memcpy.", Intrinsic::memcpy)
1521 .StartsWith("memmove.", Intrinsic::memmove)
1522 .Default(0)) {
1523 if (F->arg_size() == 5) {
1524 rename(F);
1525 // Get the types of dest, src, and len
1526 ArrayRef<Type *> ParamTypes =
1527 F->getFunctionType()->params().slice(0, 3);
1528 NewFn =
1529 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1530 return true;
1531 }
1532 }
1533 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1534 rename(F);
1535 // Get the types of dest, and len
1536 const auto *FT = F->getFunctionType();
1537 Type *ParamTypes[2] = {
1538 FT->getParamType(0), // Dest
1539 FT->getParamType(2) // len
1540 };
1541 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1542 Intrinsic::memset, ParamTypes);
1543 return true;
1544 }
1545
1546 unsigned MaskedID =
1548 .StartsWith("masked.load", Intrinsic::masked_load)
1549 .StartsWith("masked.gather", Intrinsic::masked_gather)
1550 .StartsWith("masked.store", Intrinsic::masked_store)
1551 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1552 .Default(0);
1553 if (MaskedID && F->arg_size() == 4) {
1554 rename(F);
1555 if (MaskedID == Intrinsic::masked_load ||
1556 MaskedID == Intrinsic::masked_gather) {
1558 F->getParent(), MaskedID,
1559 {F->getReturnType(), F->getArg(0)->getType()});
1560 return true;
1561 }
1563 F->getParent(), MaskedID,
1564 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1565 return true;
1566 }
1567 break;
1568 }
1569 case 'n': {
1570 if (Name.consume_front("nvvm.")) {
1571 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1572 if (F->arg_size() == 1) {
1573 Intrinsic::ID IID =
1575 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1576 .Case("clz.i", Intrinsic::ctlz)
1577 .Case("popc.i", Intrinsic::ctpop)
1579 if (IID != Intrinsic::not_intrinsic) {
1580 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1581 {F->getReturnType()});
1582 return true;
1583 }
1584 } else if (F->arg_size() == 2) {
1585 Intrinsic::ID IID =
1587 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1588 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1589 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1590 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1592 if (IID != Intrinsic::not_intrinsic) {
1593 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1594 {F->getReturnType()});
1595 return true;
1596 }
1597 }
1598
1599 // Check for nvvm intrinsics that need a return type adjustment.
1600 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1602 if (IID != Intrinsic::not_intrinsic) {
1603 NewFn = nullptr;
1604 return true;
1605 }
1606 }
1607
1608 // Upgrade Distributed Shared Memory Intrinsics
1610 if (IID != Intrinsic::not_intrinsic) {
1611 rename(F);
1612 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1613 return true;
1614 }
1615
1616 // Upgrade TMA copy G2S Intrinsics
1618 if (IID != Intrinsic::not_intrinsic) {
1619 rename(F);
1620 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1621 return true;
1622 }
1623
1624 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1625 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1626 //
1627 // TODO: We could add lohi.i2d.
1628 bool Expand = false;
1629 if (Name.consume_front("abs."))
1630 // nvvm.abs.{i,ii}
1631 Expand =
1632 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1633 else if (Name.consume_front("fabs."))
1634 // nvvm.fabs.{f,ftz.f,d}
1635 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1636 else if (Name.consume_front("ex2.approx."))
1637 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1638 Expand =
1639 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1640 else if (Name.consume_front("atomic.load."))
1641 // nvvm.atomic.load.add.{f32,f64}.p
1642 // nvvm.atomic.load.{inc,dec}.32.p
1643 Expand = StringSwitch<bool>(Name)
1644 .StartsWith("add.f32.p", true)
1645 .StartsWith("add.f64.p", true)
1646 .StartsWith("inc.32.p", true)
1647 .StartsWith("dec.32.p", true)
1648 .Default(false);
1649 else if (Name.consume_front("bitcast."))
1650 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1651 Expand =
1652 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1653 else if (Name.consume_front("rotate."))
1654 // nvvm.rotate.{b32,b64,right.b64}
1655 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1656 else if (Name.consume_front("ptr.gen.to."))
1657 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1658 Expand = consumeNVVMPtrAddrSpace(Name);
1659 else if (Name.consume_front("ptr."))
1660 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1661 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1662 else if (Name.consume_front("ldg.global."))
1663 // nvvm.ldg.global.{i,p,f}
1664 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1665 Name.starts_with("p."));
1666 else
1667 Expand = StringSwitch<bool>(Name)
1668 .Case("barrier0", true)
1669 .Case("barrier.n", true)
1670 .Case("barrier.sync.cnt", true)
1671 .Case("barrier.sync", true)
1672 .Case("barrier", true)
1673 .Case("bar.sync", true)
1674 .Case("barrier0.popc", true)
1675 .Case("barrier0.and", true)
1676 .Case("barrier0.or", true)
1677 .Case("clz.ll", true)
1678 .Case("popc.ll", true)
1679 .Case("h2f", true)
1680 .Case("swap.lo.hi.b64", true)
1681 .Case("tanh.approx.f32", true)
1682 .Default(false);
1683
1684 if (Expand) {
1685 NewFn = nullptr;
1686 return true;
1687 }
1688 break; // No other 'nvvm.*'.
1689 }
1690 break;
1691 }
1692 case 'o':
1693 if (Name.starts_with("objectsize.")) {
1694 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1695 if (F->arg_size() == 2 || F->arg_size() == 3) {
1696 rename(F);
1697 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1698 Intrinsic::objectsize, Tys);
1699 return true;
1700 }
1701 }
1702 break;
1703
1704 case 'p':
1705 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1706 rename(F);
1708 F->getParent(), Intrinsic::ptr_annotation,
1709 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1710 return true;
1711 }
1712 break;
1713
1714 case 'r': {
1715 if (Name.consume_front("riscv.")) {
1718 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1719 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1720 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1721 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1724 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1725 rename(F);
1726 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1727 return true;
1728 }
1729 break; // No other applicable upgrades.
1730 }
1731
1733 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1734 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1737 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1738 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1739 rename(F);
1740 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1741 return true;
1742 }
1743 break; // No other applicable upgrades.
1744 }
1745
1747 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1748 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1749 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1750 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1751 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1752 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1755 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1756 rename(F);
1757 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1758 return true;
1759 }
1760 break; // No other applicable upgrades.
1761 }
1762
1763 // Replace llvm.riscv.clmul with llvm.clmul.
1764 if (Name == "clmul.i32" || Name == "clmul.i64") {
1766 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1767 return true;
1768 }
1769
1770 break; // No other 'riscv.*' intrinsics
1771 }
1772 } break;
1773
1774 case 's':
1775 if (Name == "stackprotectorcheck") {
1776 NewFn = nullptr;
1777 return true;
1778 }
1779 break;
1780
1781 case 't':
1782 if (Name == "thread.pointer") {
1784 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1785 return true;
1786 }
1787 break;
1788
1789 case 'v': {
1790 if (Name == "var.annotation" && F->arg_size() == 4) {
1791 rename(F);
1793 F->getParent(), Intrinsic::var_annotation,
1794 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1795 return true;
1796 }
1797 if (Name.consume_front("vector.splice")) {
1798 if (Name.starts_with(".left") || Name.starts_with(".right"))
1799 break;
1800 return true;
1801 }
1802 break;
1803 }
1804
1805 case 'w':
1806 if (Name.consume_front("wasm.")) {
1809 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1810 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1811 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1814 rename(F);
1815 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1816 F->getReturnType());
1817 return true;
1818 }
1819
1820 if (Name.consume_front("dot.i8x16.i7x16.")) {
1822 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1823 .Case("add.signed",
1824 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1827 rename(F);
1828 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1829 return true;
1830 }
1831 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1832 }
1833 break; // No other 'wasm.*'.
1834 }
1835 break;
1836
1837 case 'x':
1838 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1839 return true;
1840 }
1841
1842 auto *ST = dyn_cast<StructType>(F->getReturnType());
1843 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1844 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1845 // Replace return type with literal non-packed struct. Only do this for
1846 // intrinsics declared to return a struct, not for intrinsics with
1847 // overloaded return type, in which case the exact struct type will be
1848 // mangled into the name.
1851 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1852 auto *FT = F->getFunctionType();
1853 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1854 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1855 std::string Name = F->getName().str();
1856 rename(F);
1857 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1858 Name, F->getParent());
1859
1860 // The new function may also need remangling.
1861 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1862 NewFn = *Result;
1863 return true;
1864 }
1865 }
1866
1867 // Remangle our intrinsic since we upgrade the mangling
1869 if (Result != std::nullopt) {
1870 NewFn = *Result;
1871 return true;
1872 }
1873
1874 // This may not belong here. This function is effectively being overloaded
1875 // to both detect an intrinsic which needs upgrading, and to provide the
1876 // upgraded form of the intrinsic. We should perhaps have two separate
1877 // functions for this.
1878 return false;
1879}
1880
1882 bool CanUpgradeDebugIntrinsicsToRecords) {
1883 NewFn = nullptr;
1884 bool Upgraded =
1885 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1886
1887 // Upgrade intrinsic attributes. This does not change the function.
1888 if (NewFn)
1889 F = NewFn;
1890 if (Intrinsic::ID id = F->getIntrinsicID()) {
1891 // Only do this if the intrinsic signature is valid.
1892 SmallVector<Type *> OverloadTys;
1893 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1894 F->setAttributes(
1895 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1896 }
1897 return Upgraded;
1898}
1899
1901 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1902 GV->getName() == "llvm.global_dtors")) ||
1903 !GV->hasInitializer())
1904 return nullptr;
1906 if (!ATy)
1907 return nullptr;
1909 if (!STy || STy->getNumElements() != 2)
1910 return nullptr;
1911
1912 LLVMContext &C = GV->getContext();
1913 IRBuilder<> IRB(C);
1914 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1915 IRB.getPtrTy());
1916 Constant *Init = GV->getInitializer();
1917 unsigned N = Init->getNumOperands();
1918 std::vector<Constant *> NewCtors(N);
1919 for (unsigned i = 0; i != N; ++i) {
1920 auto Ctor = cast<Constant>(Init->getOperand(i));
1921 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1922 Ctor->getAggregateElement(1),
1924 }
1925 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1926
1927 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1928 NewInit, GV->getName());
1929}
1930
1931// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1932// to byte shuffles.
1934 unsigned Shift) {
1935 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1936 unsigned NumElts = ResultTy->getNumElements() * 8;
1937
1938 // Bitcast from a 64-bit element type to a byte element type.
1939 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1940 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1941
1942 // We'll be shuffling in zeroes.
1943 Value *Res = Constant::getNullValue(VecTy);
1944
1945 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1946 // we'll just return the zero vector.
1947 if (Shift < 16) {
1948 int Idxs[64];
1949 // 256/512-bit version is split into 2/4 16-byte lanes.
1950 for (unsigned l = 0; l != NumElts; l += 16)
1951 for (unsigned i = 0; i != 16; ++i) {
1952 unsigned Idx = NumElts + i - Shift;
1953 if (Idx < NumElts)
1954 Idx -= NumElts - 16; // end of lane, switch operand.
1955 Idxs[l + i] = Idx + l;
1956 }
1957
1958 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1959 }
1960
1961 // Bitcast back to a 64-bit element type.
1962 return Builder.CreateBitCast(Res, ResultTy, "cast");
1963}
1964
1965// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1966// to byte shuffles.
1968 unsigned Shift) {
1969 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1970 unsigned NumElts = ResultTy->getNumElements() * 8;
1971
1972 // Bitcast from a 64-bit element type to a byte element type.
1973 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1974 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1975
1976 // We'll be shuffling in zeroes.
1977 Value *Res = Constant::getNullValue(VecTy);
1978
1979 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1980 // we'll just return the zero vector.
1981 if (Shift < 16) {
1982 int Idxs[64];
1983 // 256/512-bit version is split into 2/4 16-byte lanes.
1984 for (unsigned l = 0; l != NumElts; l += 16)
1985 for (unsigned i = 0; i != 16; ++i) {
1986 unsigned Idx = i + Shift;
1987 if (Idx >= 16)
1988 Idx += NumElts - 16; // end of lane, switch operand.
1989 Idxs[l + i] = Idx + l;
1990 }
1991
1992 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1993 }
1994
1995 // Bitcast back to a 64-bit element type.
1996 return Builder.CreateBitCast(Res, ResultTy, "cast");
1997}
1998
1999static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2000 unsigned NumElts) {
2001 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2003 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2004 Mask = Builder.CreateBitCast(Mask, MaskTy);
2005
2006 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2007 // i8 and we need to extract down to the right number of elements.
2008 if (NumElts <= 4) {
2009 int Indices[4];
2010 for (unsigned i = 0; i != NumElts; ++i)
2011 Indices[i] = i;
2012 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2013 "extract");
2014 }
2015
2016 return Mask;
2017}
2018
2019static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2020 Value *Op1) {
2021 // If the mask is all ones just emit the first operation.
2022 if (const auto *C = dyn_cast<Constant>(Mask))
2023 if (C->isAllOnesValue())
2024 return Op0;
2025
2026 Mask = getX86MaskVec(Builder, Mask,
2027 cast<FixedVectorType>(Op0->getType())->getNumElements());
2028 return Builder.CreateSelect(Mask, Op0, Op1);
2029}
2030
2031static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2032 Value *Op1) {
2033 // If the mask is all ones just emit the first operation.
2034 if (const auto *C = dyn_cast<Constant>(Mask))
2035 if (C->isAllOnesValue())
2036 return Op0;
2037
2038 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2039 Mask->getType()->getIntegerBitWidth());
2040 Mask = Builder.CreateBitCast(Mask, MaskTy);
2041 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2042 return Builder.CreateSelect(Mask, Op0, Op1);
2043}
2044
2045// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2046// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2047// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2049 Value *Op1, Value *Shift,
2050 Value *Passthru, Value *Mask,
2051 bool IsVALIGN) {
2052 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2053
2054 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2055 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2056 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2057 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2058
2059 // Mask the immediate for VALIGN.
2060 if (IsVALIGN)
2061 ShiftVal &= (NumElts - 1);
2062
2063 // If palignr is shifting the pair of vectors more than the size of two
2064 // lanes, emit zero.
2065 if (ShiftVal >= 32)
2067
2068 // If palignr is shifting the pair of input vectors more than one lane,
2069 // but less than two lanes, convert to shifting in zeroes.
2070 if (ShiftVal > 16) {
2071 ShiftVal -= 16;
2072 Op1 = Op0;
2074 }
2075
2076 int Indices[64];
2077 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2078 for (unsigned l = 0; l < NumElts; l += 16) {
2079 for (unsigned i = 0; i != 16; ++i) {
2080 unsigned Idx = ShiftVal + i;
2081 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2082 Idx += NumElts - 16; // End of lane, switch operand.
2083 Indices[l + i] = Idx + l;
2084 }
2085 }
2086
2087 Value *Align = Builder.CreateShuffleVector(
2088 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2089
2090 return emitX86Select(Builder, Mask, Align, Passthru);
2091}
2092
2094 bool ZeroMask, bool IndexForm) {
2095 Type *Ty = CI.getType();
2096 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2097 unsigned EltWidth = Ty->getScalarSizeInBits();
2098 bool IsFloat = Ty->isFPOrFPVectorTy();
2099 Intrinsic::ID IID;
2100 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2101 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2102 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2103 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2104 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2105 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2106 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2107 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2108 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2109 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2110 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2111 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2112 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2113 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2114 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2115 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2116 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2117 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2118 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2119 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2120 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2121 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2122 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2123 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2124 else if (VecWidth == 128 && EltWidth == 16)
2125 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2126 else if (VecWidth == 256 && EltWidth == 16)
2127 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2128 else if (VecWidth == 512 && EltWidth == 16)
2129 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2130 else if (VecWidth == 128 && EltWidth == 8)
2131 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2132 else if (VecWidth == 256 && EltWidth == 8)
2133 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2134 else if (VecWidth == 512 && EltWidth == 8)
2135 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2136 else
2137 llvm_unreachable("Unexpected intrinsic");
2138
2139 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2140 CI.getArgOperand(2) };
2141
2142 // If this isn't index form we need to swap operand 0 and 1.
2143 if (!IndexForm)
2144 std::swap(Args[0], Args[1]);
2145
2146 Value *V = Builder.CreateIntrinsic(IID, Args);
2147 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2148 : Builder.CreateBitCast(CI.getArgOperand(1),
2149 Ty);
2150 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2151}
2152
2154 Intrinsic::ID IID) {
2155 Type *Ty = CI.getType();
2156 Value *Op0 = CI.getOperand(0);
2157 Value *Op1 = CI.getOperand(1);
2158 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2159
2160 if (CI.arg_size() == 4) { // For masked intrinsics.
2161 Value *VecSrc = CI.getOperand(2);
2162 Value *Mask = CI.getOperand(3);
2163 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2164 }
2165 return Res;
2166}
2167
2169 bool IsRotateRight) {
2170 Type *Ty = CI.getType();
2171 Value *Src = CI.getArgOperand(0);
2172 Value *Amt = CI.getArgOperand(1);
2173
2174 // Amount may be scalar immediate, in which case create a splat vector.
2175 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2176 // we only care about the lowest log2 bits anyway.
2177 if (Amt->getType() != Ty) {
2178 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2179 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2180 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2181 }
2182
2183 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2184 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2185
2186 if (CI.arg_size() == 4) { // For masked intrinsics.
2187 Value *VecSrc = CI.getOperand(2);
2188 Value *Mask = CI.getOperand(3);
2189 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2190 }
2191 return Res;
2192}
2193
2194static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2195 bool IsSigned) {
2196 Type *Ty = CI.getType();
2197 Value *LHS = CI.getArgOperand(0);
2198 Value *RHS = CI.getArgOperand(1);
2199
2200 CmpInst::Predicate Pred;
2201 switch (Imm) {
2202 case 0x0:
2203 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2204 break;
2205 case 0x1:
2206 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2207 break;
2208 case 0x2:
2209 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2210 break;
2211 case 0x3:
2212 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2213 break;
2214 case 0x4:
2215 Pred = ICmpInst::ICMP_EQ;
2216 break;
2217 case 0x5:
2218 Pred = ICmpInst::ICMP_NE;
2219 break;
2220 case 0x6:
2221 return Constant::getNullValue(Ty); // FALSE
2222 case 0x7:
2223 return Constant::getAllOnesValue(Ty); // TRUE
2224 default:
2225 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2226 }
2227
2228 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2229 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2230 return Ext;
2231}
2232
2234 bool IsShiftRight, bool ZeroMask) {
2235 Type *Ty = CI.getType();
2236 Value *Op0 = CI.getArgOperand(0);
2237 Value *Op1 = CI.getArgOperand(1);
2238 Value *Amt = CI.getArgOperand(2);
2239
2240 if (IsShiftRight)
2241 std::swap(Op0, Op1);
2242
2243 // Amount may be scalar immediate, in which case create a splat vector.
2244 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2245 // we only care about the lowest log2 bits anyway.
2246 if (Amt->getType() != Ty) {
2247 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2248 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2249 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2250 }
2251
2252 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2253 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2254
2255 unsigned NumArgs = CI.arg_size();
2256 if (NumArgs >= 4) { // For masked intrinsics.
2257 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2258 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2259 CI.getArgOperand(0);
2260 Value *Mask = CI.getOperand(NumArgs - 1);
2261 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2262 }
2263 return Res;
2264}
2265
2267 Value *Mask, bool Aligned) {
2268 const Align Alignment =
2269 Aligned
2270 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2271 : Align(1);
2272
2273 // If the mask is all ones just emit a regular store.
2274 if (const auto *C = dyn_cast<Constant>(Mask))
2275 if (C->isAllOnesValue())
2276 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2277
2278 // Convert the mask from an integer type to a vector of i1.
2279 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2280 Mask = getX86MaskVec(Builder, Mask, NumElts);
2281 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2282}
2283
2285 Value *Passthru, Value *Mask, bool Aligned) {
2286 Type *ValTy = Passthru->getType();
2287 const Align Alignment =
2288 Aligned
2289 ? Align(
2291 8)
2292 : Align(1);
2293
2294 // If the mask is all ones just emit a regular store.
2295 if (const auto *C = dyn_cast<Constant>(Mask))
2296 if (C->isAllOnesValue())
2297 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2298
2299 // Convert the mask from an integer type to a vector of i1.
2300 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2301 Mask = getX86MaskVec(Builder, Mask, NumElts);
2302 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2303}
2304
2305static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2306 Type *Ty = CI.getType();
2307 Value *Op0 = CI.getArgOperand(0);
2308 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2309 {Op0, Builder.getInt1(false)});
2310 if (CI.arg_size() == 3)
2311 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2312 return Res;
2313}
2314
2315static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2316 Type *Ty = CI.getType();
2317
2318 // Arguments have a vXi32 type so cast to vXi64.
2319 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2320 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2321
2322 if (IsSigned) {
2323 // Shift left then arithmetic shift right.
2324 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2325 LHS = Builder.CreateShl(LHS, ShiftAmt);
2326 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2327 RHS = Builder.CreateShl(RHS, ShiftAmt);
2328 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2329 } else {
2330 // Clear the upper bits.
2331 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2332 LHS = Builder.CreateAnd(LHS, Mask);
2333 RHS = Builder.CreateAnd(RHS, Mask);
2334 }
2335
2336 Value *Res = Builder.CreateMul(LHS, RHS);
2337
2338 if (CI.arg_size() == 4)
2339 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2340
2341 return Res;
2342}
2343
2344// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2346 Value *Mask) {
2347 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2348 if (Mask) {
2349 const auto *C = dyn_cast<Constant>(Mask);
2350 if (!C || !C->isAllOnesValue())
2351 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2352 }
2353
2354 if (NumElts < 8) {
2355 int Indices[8];
2356 for (unsigned i = 0; i != NumElts; ++i)
2357 Indices[i] = i;
2358 for (unsigned i = NumElts; i != 8; ++i)
2359 Indices[i] = NumElts + i % NumElts;
2360 Vec = Builder.CreateShuffleVector(Vec,
2362 Indices);
2363 }
2364 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2365}
2366
2368 unsigned CC, bool Signed) {
2369 Value *Op0 = CI.getArgOperand(0);
2370 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2371
2372 Value *Cmp;
2373 if (CC == 3) {
2375 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2376 } else if (CC == 7) {
2378 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2379 } else {
2381 switch (CC) {
2382 default: llvm_unreachable("Unknown condition code");
2383 case 0: Pred = ICmpInst::ICMP_EQ; break;
2384 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2385 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2386 case 4: Pred = ICmpInst::ICMP_NE; break;
2387 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2388 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2389 }
2390 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2391 }
2392
2393 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2394
2395 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2396}
2397
2398// Replace a masked intrinsic with an older unmasked intrinsic.
2400 Intrinsic::ID IID) {
2401 Value *Rep =
2402 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2403 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2404}
2405
2407 Value* A = CI.getArgOperand(0);
2408 Value* B = CI.getArgOperand(1);
2409 Value* Src = CI.getArgOperand(2);
2410 Value* Mask = CI.getArgOperand(3);
2411
2412 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2413 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2414 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2415 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2416 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2417 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2418}
2419
2421 Value* Op = CI.getArgOperand(0);
2422 Type* ReturnOp = CI.getType();
2423 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2424 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2425 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2426}
2427
2428// Replace intrinsic with unmasked version and a select.
2430 CallBase &CI, Value *&Rep) {
2431 Name = Name.substr(12); // Remove avx512.mask.
2432
2433 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2434 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2435 Intrinsic::ID IID;
2436 if (Name.starts_with("max.p")) {
2437 if (VecWidth == 128 && EltWidth == 32)
2438 IID = Intrinsic::x86_sse_max_ps;
2439 else if (VecWidth == 128 && EltWidth == 64)
2440 IID = Intrinsic::x86_sse2_max_pd;
2441 else if (VecWidth == 256 && EltWidth == 32)
2442 IID = Intrinsic::x86_avx_max_ps_256;
2443 else if (VecWidth == 256 && EltWidth == 64)
2444 IID = Intrinsic::x86_avx_max_pd_256;
2445 else
2446 llvm_unreachable("Unexpected intrinsic");
2447 } else if (Name.starts_with("min.p")) {
2448 if (VecWidth == 128 && EltWidth == 32)
2449 IID = Intrinsic::x86_sse_min_ps;
2450 else if (VecWidth == 128 && EltWidth == 64)
2451 IID = Intrinsic::x86_sse2_min_pd;
2452 else if (VecWidth == 256 && EltWidth == 32)
2453 IID = Intrinsic::x86_avx_min_ps_256;
2454 else if (VecWidth == 256 && EltWidth == 64)
2455 IID = Intrinsic::x86_avx_min_pd_256;
2456 else
2457 llvm_unreachable("Unexpected intrinsic");
2458 } else if (Name.starts_with("pshuf.b.")) {
2459 if (VecWidth == 128)
2460 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2461 else if (VecWidth == 256)
2462 IID = Intrinsic::x86_avx2_pshuf_b;
2463 else if (VecWidth == 512)
2464 IID = Intrinsic::x86_avx512_pshuf_b_512;
2465 else
2466 llvm_unreachable("Unexpected intrinsic");
2467 } else if (Name.starts_with("pmul.hr.sw.")) {
2468 if (VecWidth == 128)
2469 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2470 else if (VecWidth == 256)
2471 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2472 else if (VecWidth == 512)
2473 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2474 else
2475 llvm_unreachable("Unexpected intrinsic");
2476 } else if (Name.starts_with("pmulh.w.")) {
2477 if (VecWidth == 128)
2478 IID = Intrinsic::x86_sse2_pmulh_w;
2479 else if (VecWidth == 256)
2480 IID = Intrinsic::x86_avx2_pmulh_w;
2481 else if (VecWidth == 512)
2482 IID = Intrinsic::x86_avx512_pmulh_w_512;
2483 else
2484 llvm_unreachable("Unexpected intrinsic");
2485 } else if (Name.starts_with("pmulhu.w.")) {
2486 if (VecWidth == 128)
2487 IID = Intrinsic::x86_sse2_pmulhu_w;
2488 else if (VecWidth == 256)
2489 IID = Intrinsic::x86_avx2_pmulhu_w;
2490 else if (VecWidth == 512)
2491 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2492 else
2493 llvm_unreachable("Unexpected intrinsic");
2494 } else if (Name.starts_with("pmaddw.d.")) {
2495 if (VecWidth == 128)
2496 IID = Intrinsic::x86_sse2_pmadd_wd;
2497 else if (VecWidth == 256)
2498 IID = Intrinsic::x86_avx2_pmadd_wd;
2499 else if (VecWidth == 512)
2500 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2501 else
2502 llvm_unreachable("Unexpected intrinsic");
2503 } else if (Name.starts_with("pmaddubs.w.")) {
2504 if (VecWidth == 128)
2505 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2506 else if (VecWidth == 256)
2507 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2508 else if (VecWidth == 512)
2509 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2510 else
2511 llvm_unreachable("Unexpected intrinsic");
2512 } else if (Name.starts_with("packsswb.")) {
2513 if (VecWidth == 128)
2514 IID = Intrinsic::x86_sse2_packsswb_128;
2515 else if (VecWidth == 256)
2516 IID = Intrinsic::x86_avx2_packsswb;
2517 else if (VecWidth == 512)
2518 IID = Intrinsic::x86_avx512_packsswb_512;
2519 else
2520 llvm_unreachable("Unexpected intrinsic");
2521 } else if (Name.starts_with("packssdw.")) {
2522 if (VecWidth == 128)
2523 IID = Intrinsic::x86_sse2_packssdw_128;
2524 else if (VecWidth == 256)
2525 IID = Intrinsic::x86_avx2_packssdw;
2526 else if (VecWidth == 512)
2527 IID = Intrinsic::x86_avx512_packssdw_512;
2528 else
2529 llvm_unreachable("Unexpected intrinsic");
2530 } else if (Name.starts_with("packuswb.")) {
2531 if (VecWidth == 128)
2532 IID = Intrinsic::x86_sse2_packuswb_128;
2533 else if (VecWidth == 256)
2534 IID = Intrinsic::x86_avx2_packuswb;
2535 else if (VecWidth == 512)
2536 IID = Intrinsic::x86_avx512_packuswb_512;
2537 else
2538 llvm_unreachable("Unexpected intrinsic");
2539 } else if (Name.starts_with("packusdw.")) {
2540 if (VecWidth == 128)
2541 IID = Intrinsic::x86_sse41_packusdw;
2542 else if (VecWidth == 256)
2543 IID = Intrinsic::x86_avx2_packusdw;
2544 else if (VecWidth == 512)
2545 IID = Intrinsic::x86_avx512_packusdw_512;
2546 else
2547 llvm_unreachable("Unexpected intrinsic");
2548 } else if (Name.starts_with("vpermilvar.")) {
2549 if (VecWidth == 128 && EltWidth == 32)
2550 IID = Intrinsic::x86_avx_vpermilvar_ps;
2551 else if (VecWidth == 128 && EltWidth == 64)
2552 IID = Intrinsic::x86_avx_vpermilvar_pd;
2553 else if (VecWidth == 256 && EltWidth == 32)
2554 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2555 else if (VecWidth == 256 && EltWidth == 64)
2556 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2557 else if (VecWidth == 512 && EltWidth == 32)
2558 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2559 else if (VecWidth == 512 && EltWidth == 64)
2560 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2561 else
2562 llvm_unreachable("Unexpected intrinsic");
2563 } else if (Name == "cvtpd2dq.256") {
2564 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2565 } else if (Name == "cvtpd2ps.256") {
2566 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2567 } else if (Name == "cvttpd2dq.256") {
2568 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2569 } else if (Name == "cvttps2dq.128") {
2570 IID = Intrinsic::x86_sse2_cvttps2dq;
2571 } else if (Name == "cvttps2dq.256") {
2572 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2573 } else if (Name.starts_with("permvar.")) {
2574 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2575 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2576 IID = Intrinsic::x86_avx2_permps;
2577 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2578 IID = Intrinsic::x86_avx2_permd;
2579 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2580 IID = Intrinsic::x86_avx512_permvar_df_256;
2581 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2582 IID = Intrinsic::x86_avx512_permvar_di_256;
2583 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2584 IID = Intrinsic::x86_avx512_permvar_sf_512;
2585 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2586 IID = Intrinsic::x86_avx512_permvar_si_512;
2587 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2588 IID = Intrinsic::x86_avx512_permvar_df_512;
2589 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2590 IID = Intrinsic::x86_avx512_permvar_di_512;
2591 else if (VecWidth == 128 && EltWidth == 16)
2592 IID = Intrinsic::x86_avx512_permvar_hi_128;
2593 else if (VecWidth == 256 && EltWidth == 16)
2594 IID = Intrinsic::x86_avx512_permvar_hi_256;
2595 else if (VecWidth == 512 && EltWidth == 16)
2596 IID = Intrinsic::x86_avx512_permvar_hi_512;
2597 else if (VecWidth == 128 && EltWidth == 8)
2598 IID = Intrinsic::x86_avx512_permvar_qi_128;
2599 else if (VecWidth == 256 && EltWidth == 8)
2600 IID = Intrinsic::x86_avx512_permvar_qi_256;
2601 else if (VecWidth == 512 && EltWidth == 8)
2602 IID = Intrinsic::x86_avx512_permvar_qi_512;
2603 else
2604 llvm_unreachable("Unexpected intrinsic");
2605 } else if (Name.starts_with("dbpsadbw.")) {
2606 if (VecWidth == 128)
2607 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2608 else if (VecWidth == 256)
2609 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2610 else if (VecWidth == 512)
2611 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2612 else
2613 llvm_unreachable("Unexpected intrinsic");
2614 } else if (Name.starts_with("pmultishift.qb.")) {
2615 if (VecWidth == 128)
2616 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2617 else if (VecWidth == 256)
2618 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2619 else if (VecWidth == 512)
2620 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2621 else
2622 llvm_unreachable("Unexpected intrinsic");
2623 } else if (Name.starts_with("conflict.")) {
2624 if (Name[9] == 'd' && VecWidth == 128)
2625 IID = Intrinsic::x86_avx512_conflict_d_128;
2626 else if (Name[9] == 'd' && VecWidth == 256)
2627 IID = Intrinsic::x86_avx512_conflict_d_256;
2628 else if (Name[9] == 'd' && VecWidth == 512)
2629 IID = Intrinsic::x86_avx512_conflict_d_512;
2630 else if (Name[9] == 'q' && VecWidth == 128)
2631 IID = Intrinsic::x86_avx512_conflict_q_128;
2632 else if (Name[9] == 'q' && VecWidth == 256)
2633 IID = Intrinsic::x86_avx512_conflict_q_256;
2634 else if (Name[9] == 'q' && VecWidth == 512)
2635 IID = Intrinsic::x86_avx512_conflict_q_512;
2636 else
2637 llvm_unreachable("Unexpected intrinsic");
2638 } else if (Name.starts_with("pavg.")) {
2639 if (Name[5] == 'b' && VecWidth == 128)
2640 IID = Intrinsic::x86_sse2_pavg_b;
2641 else if (Name[5] == 'b' && VecWidth == 256)
2642 IID = Intrinsic::x86_avx2_pavg_b;
2643 else if (Name[5] == 'b' && VecWidth == 512)
2644 IID = Intrinsic::x86_avx512_pavg_b_512;
2645 else if (Name[5] == 'w' && VecWidth == 128)
2646 IID = Intrinsic::x86_sse2_pavg_w;
2647 else if (Name[5] == 'w' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx2_pavg_w;
2649 else if (Name[5] == 'w' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_pavg_w_512;
2651 else
2652 llvm_unreachable("Unexpected intrinsic");
2653 } else
2654 return false;
2655
2656 SmallVector<Value *, 4> Args(CI.args());
2657 Args.pop_back();
2658 Args.pop_back();
2659 Rep = Builder.CreateIntrinsic(IID, Args);
2660 unsigned NumArgs = CI.arg_size();
2661 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2662 CI.getArgOperand(NumArgs - 2));
2663 return true;
2664}
2665
2666/// Upgrade comment in call to inline asm that represents an objc retain release
2667/// marker.
2668void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2669 size_t Pos;
2670 if (AsmStr->find("mov\tfp") == 0 &&
2671 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2672 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2673 AsmStr->replace(Pos, 1, ";");
2674 }
2675}
2676
2678 Function *F, IRBuilder<> &Builder) {
2679 Value *Rep = nullptr;
2680
2681 if (Name == "abs.i" || Name == "abs.ll") {
2682 Value *Arg = CI->getArgOperand(0);
2683 Value *Neg = Builder.CreateNeg(Arg, "neg");
2684 Value *Cmp = Builder.CreateICmpSGE(
2685 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2686 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2687 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2688 Type *Ty = (Name == "abs.bf16")
2689 ? Builder.getBFloatTy()
2690 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2691 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2692 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2693 Rep = Builder.CreateBitCast(Abs, CI->getType());
2694 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2695 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2696 : Intrinsic::nvvm_fabs;
2697 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2698 } else if (Name.consume_front("ex2.approx.")) {
2699 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2700 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2701 : Intrinsic::nvvm_ex2_approx;
2702 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2703 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2704 Name.starts_with("atomic.load.add.f64.p")) {
2705 Value *Ptr = CI->getArgOperand(0);
2706 Value *Val = CI->getArgOperand(1);
2707 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2709 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2710 Name.starts_with("atomic.load.dec.32.p")) {
2711 Value *Ptr = CI->getArgOperand(0);
2712 Value *Val = CI->getArgOperand(1);
2713 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2715 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2717 } else if (Name == "clz.ll") {
2718 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2719 Value *Arg = CI->getArgOperand(0);
2720 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2721 {Arg, Builder.getFalse()},
2722 /*FMFSource=*/nullptr, "ctlz");
2723 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2724 } else if (Name == "popc.ll") {
2725 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2726 // i64.
2727 Value *Arg = CI->getArgOperand(0);
2728 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2729 Arg, /*FMFSource=*/nullptr, "ctpop");
2730 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2731 } else if (Name == "h2f") {
2732 Value *Cast =
2733 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2734 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2735 } else if (Name.consume_front("bitcast.") &&
2736 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2737 Name == "d2ll")) {
2738 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2739 } else if (Name == "rotate.b32") {
2740 Value *Arg = CI->getOperand(0);
2741 Value *ShiftAmt = CI->getOperand(1);
2742 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2743 {Arg, Arg, ShiftAmt});
2744 } else if (Name == "rotate.b64") {
2745 Type *Int64Ty = Builder.getInt64Ty();
2746 Value *Arg = CI->getOperand(0);
2747 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2748 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2749 {Arg, Arg, ZExtShiftAmt});
2750 } else if (Name == "rotate.right.b64") {
2751 Type *Int64Ty = Builder.getInt64Ty();
2752 Value *Arg = CI->getOperand(0);
2753 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2754 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2755 {Arg, Arg, ZExtShiftAmt});
2756 } else if (Name == "swap.lo.hi.b64") {
2757 Type *Int64Ty = Builder.getInt64Ty();
2758 Value *Arg = CI->getOperand(0);
2759 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2760 {Arg, Arg, Builder.getInt64(32)});
2761 } else if ((Name.consume_front("ptr.gen.to.") &&
2762 consumeNVVMPtrAddrSpace(Name)) ||
2763 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2764 Name.starts_with(".to.gen"))) {
2765 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2766 } else if (Name.consume_front("ldg.global")) {
2767 Value *Ptr = CI->getArgOperand(0);
2768 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2769 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2770 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2771 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2772 MDNode *MD = MDNode::get(Builder.getContext(), {});
2773 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2774 return LD;
2775 } else if (Name == "tanh.approx.f32") {
2776 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2777 FastMathFlags FMF;
2778 FMF.setApproxFunc();
2779 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2780 FMF);
2781 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2782 Value *Arg =
2783 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2784 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2785 {}, {Arg});
2786 } else if (Name == "barrier") {
2787 Rep = Builder.CreateIntrinsic(
2788 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2789 {CI->getArgOperand(0), CI->getArgOperand(1)});
2790 } else if (Name == "barrier.sync") {
2791 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2792 {CI->getArgOperand(0)});
2793 } else if (Name == "barrier.sync.cnt") {
2794 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2795 {CI->getArgOperand(0), CI->getArgOperand(1)});
2796 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2797 Name == "barrier0.or") {
2798 Value *C = CI->getArgOperand(0);
2799 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2800
2801 Intrinsic::ID IID =
2803 .Case("barrier0.popc",
2804 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2805 .Case("barrier0.and",
2806 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2807 .Case("barrier0.or",
2808 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2809 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2810 Rep = Builder.CreateZExt(Bar, CI->getType());
2811 } else {
2813 if (IID != Intrinsic::not_intrinsic &&
2814 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2815 rename(F);
2816 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2818 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2819 Value *Arg = CI->getArgOperand(I);
2820 Type *OldType = Arg->getType();
2821 Type *NewType = NewFn->getArg(I)->getType();
2822 Args.push_back(
2823 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2824 ? Builder.CreateBitCast(Arg, NewType)
2825 : Arg);
2826 }
2827 Rep = Builder.CreateCall(NewFn, Args);
2828 if (F->getReturnType()->isIntegerTy())
2829 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2830 }
2831 }
2832
2833 return Rep;
2834}
2835
2837 IRBuilder<> &Builder) {
2838 LLVMContext &C = F->getContext();
2839 Value *Rep = nullptr;
2840
2841 if (Name.starts_with("sse4a.movnt.")) {
2843 Elts.push_back(
2844 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2845 MDNode *Node = MDNode::get(C, Elts);
2846
2847 Value *Arg0 = CI->getArgOperand(0);
2848 Value *Arg1 = CI->getArgOperand(1);
2849
2850 // Nontemporal (unaligned) store of the 0'th element of the float/double
2851 // vector.
2852 Value *Extract =
2853 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2854
2855 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2856 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2857 } else if (Name.starts_with("avx.movnt.") ||
2858 Name.starts_with("avx512.storent.")) {
2860 Elts.push_back(
2861 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2862 MDNode *Node = MDNode::get(C, Elts);
2863
2864 Value *Arg0 = CI->getArgOperand(0);
2865 Value *Arg1 = CI->getArgOperand(1);
2866
2867 StoreInst *SI = Builder.CreateAlignedStore(
2868 Arg1, Arg0,
2870 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2871 } else if (Name == "sse2.storel.dq") {
2872 Value *Arg0 = CI->getArgOperand(0);
2873 Value *Arg1 = CI->getArgOperand(1);
2874
2875 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2876 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2877 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2878 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2879 } else if (Name.starts_with("sse.storeu.") ||
2880 Name.starts_with("sse2.storeu.") ||
2881 Name.starts_with("avx.storeu.")) {
2882 Value *Arg0 = CI->getArgOperand(0);
2883 Value *Arg1 = CI->getArgOperand(1);
2884 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2885 } else if (Name == "avx512.mask.store.ss") {
2886 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2887 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2888 Mask, false);
2889 } else if (Name.starts_with("avx512.mask.store")) {
2890 // "avx512.mask.storeu." or "avx512.mask.store."
2891 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2892 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2893 CI->getArgOperand(2), Aligned);
2894 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2895 // Upgrade packed integer vector compare intrinsics to compare instructions.
2896 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2897 bool CmpEq = Name[9] == 'e';
2898 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2899 CI->getArgOperand(0), CI->getArgOperand(1));
2900 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2901 } else if (Name.starts_with("avx512.broadcastm")) {
2902 Type *ExtTy = Type::getInt32Ty(C);
2903 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2904 ExtTy = Type::getInt64Ty(C);
2905 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2906 ExtTy->getPrimitiveSizeInBits();
2907 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2908 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2909 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2910 Value *Vec = CI->getArgOperand(0);
2911 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2912 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2913 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2914 } else if (Name.starts_with("avx.sqrt.p") ||
2915 Name.starts_with("sse2.sqrt.p") ||
2916 Name.starts_with("sse.sqrt.p")) {
2917 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2918 {CI->getArgOperand(0)});
2919 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2920 if (CI->arg_size() == 4 &&
2921 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2922 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2923 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2924 : Intrinsic::x86_avx512_sqrt_pd_512;
2925
2926 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2927 Rep = Builder.CreateIntrinsic(IID, Args);
2928 } else {
2929 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2930 {CI->getArgOperand(0)});
2931 }
2932 Rep =
2933 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2934 } else if (Name.starts_with("avx512.ptestm") ||
2935 Name.starts_with("avx512.ptestnm")) {
2936 Value *Op0 = CI->getArgOperand(0);
2937 Value *Op1 = CI->getArgOperand(1);
2938 Value *Mask = CI->getArgOperand(2);
2939 Rep = Builder.CreateAnd(Op0, Op1);
2940 llvm::Type *Ty = Op0->getType();
2942 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2945 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2946 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2947 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2948 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2949 ->getNumElements();
2950 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2951 Rep =
2952 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2953 } else if (Name.starts_with("avx512.kunpck")) {
2954 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2955 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2956 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2957 int Indices[64];
2958 for (unsigned i = 0; i != NumElts; ++i)
2959 Indices[i] = i;
2960
2961 // First extract half of each vector. This gives better codegen than
2962 // doing it in a single shuffle.
2963 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2964 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2965 // Concat the vectors.
2966 // NOTE: Operands have to be swapped to match intrinsic definition.
2967 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2968 Rep = Builder.CreateBitCast(Rep, CI->getType());
2969 } else if (Name == "avx512.kand.w") {
2970 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2971 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2972 Rep = Builder.CreateAnd(LHS, RHS);
2973 Rep = Builder.CreateBitCast(Rep, CI->getType());
2974 } else if (Name == "avx512.kandn.w") {
2975 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2976 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2977 LHS = Builder.CreateNot(LHS);
2978 Rep = Builder.CreateAnd(LHS, RHS);
2979 Rep = Builder.CreateBitCast(Rep, CI->getType());
2980 } else if (Name == "avx512.kor.w") {
2981 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2982 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2983 Rep = Builder.CreateOr(LHS, RHS);
2984 Rep = Builder.CreateBitCast(Rep, CI->getType());
2985 } else if (Name == "avx512.kxor.w") {
2986 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2987 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2988 Rep = Builder.CreateXor(LHS, RHS);
2989 Rep = Builder.CreateBitCast(Rep, CI->getType());
2990 } else if (Name == "avx512.kxnor.w") {
2991 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2992 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2993 LHS = Builder.CreateNot(LHS);
2994 Rep = Builder.CreateXor(LHS, RHS);
2995 Rep = Builder.CreateBitCast(Rep, CI->getType());
2996 } else if (Name == "avx512.knot.w") {
2997 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2998 Rep = Builder.CreateNot(Rep);
2999 Rep = Builder.CreateBitCast(Rep, CI->getType());
3000 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3001 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3002 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3003 Rep = Builder.CreateOr(LHS, RHS);
3004 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3005 Value *C;
3006 if (Name[14] == 'c')
3007 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3008 else
3009 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3010 Rep = Builder.CreateICmpEQ(Rep, C);
3011 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3012 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3013 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3014 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3015 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3016 Type *I32Ty = Type::getInt32Ty(C);
3017 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3018 ConstantInt::get(I32Ty, 0));
3019 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3020 ConstantInt::get(I32Ty, 0));
3021 Value *EltOp;
3022 if (Name.contains(".add."))
3023 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3024 else if (Name.contains(".sub."))
3025 EltOp = Builder.CreateFSub(Elt0, Elt1);
3026 else if (Name.contains(".mul."))
3027 EltOp = Builder.CreateFMul(Elt0, Elt1);
3028 else
3029 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3030 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3031 ConstantInt::get(I32Ty, 0));
3032 } else if (Name.starts_with("avx512.mask.pcmp")) {
3033 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3034 bool CmpEq = Name[16] == 'e';
3035 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3036 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3037 Type *OpTy = CI->getArgOperand(0)->getType();
3038 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3039 Intrinsic::ID IID;
3040 switch (VecWidth) {
3041 default:
3042 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3043 break;
3044 case 128:
3045 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3046 break;
3047 case 256:
3048 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3049 break;
3050 case 512:
3051 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3052 break;
3053 }
3054
3055 Rep =
3056 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3057 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3058 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3059 Type *OpTy = CI->getArgOperand(0)->getType();
3060 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3061 unsigned EltWidth = OpTy->getScalarSizeInBits();
3062 Intrinsic::ID IID;
3063 if (VecWidth == 128 && EltWidth == 32)
3064 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3065 else if (VecWidth == 256 && EltWidth == 32)
3066 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3067 else if (VecWidth == 512 && EltWidth == 32)
3068 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3069 else if (VecWidth == 128 && EltWidth == 64)
3070 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3071 else if (VecWidth == 256 && EltWidth == 64)
3072 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3073 else if (VecWidth == 512 && EltWidth == 64)
3074 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3075 else
3076 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3077
3078 Rep =
3079 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3080 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3081 } else if (Name.starts_with("avx512.cmp.p")) {
3082 SmallVector<Value *, 4> Args(CI->args());
3083 Type *OpTy = Args[0]->getType();
3084 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3085 unsigned EltWidth = OpTy->getScalarSizeInBits();
3086 Intrinsic::ID IID;
3087 if (VecWidth == 128 && EltWidth == 32)
3088 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3089 else if (VecWidth == 256 && EltWidth == 32)
3090 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3091 else if (VecWidth == 512 && EltWidth == 32)
3092 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3093 else if (VecWidth == 128 && EltWidth == 64)
3094 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3095 else if (VecWidth == 256 && EltWidth == 64)
3096 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3097 else if (VecWidth == 512 && EltWidth == 64)
3098 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3099 else
3100 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3101
3103 if (VecWidth == 512)
3104 std::swap(Mask, Args.back());
3105 Args.push_back(Mask);
3106
3107 Rep = Builder.CreateIntrinsic(IID, Args);
3108 } else if (Name.starts_with("avx512.mask.cmp.")) {
3109 // Integer compare intrinsics.
3110 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3111 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3112 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3113 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3114 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3115 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3116 Name.starts_with("avx512.cvtw2mask.") ||
3117 Name.starts_with("avx512.cvtd2mask.") ||
3118 Name.starts_with("avx512.cvtq2mask.")) {
3119 Value *Op = CI->getArgOperand(0);
3120 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3121 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3122 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3123 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3124 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3125 Name.starts_with("avx512.mask.pabs")) {
3126 Rep = upgradeAbs(Builder, *CI);
3127 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3128 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3129 Name.starts_with("avx512.mask.pmaxs")) {
3130 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3131 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3132 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3133 Name.starts_with("avx512.mask.pmaxu")) {
3134 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3135 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3136 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3137 Name.starts_with("avx512.mask.pmins")) {
3138 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3139 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3140 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3141 Name.starts_with("avx512.mask.pminu")) {
3142 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3143 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3144 Name == "avx512.pmulu.dq.512" ||
3145 Name.starts_with("avx512.mask.pmulu.dq.")) {
3146 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3147 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3148 Name == "avx512.pmul.dq.512" ||
3149 Name.starts_with("avx512.mask.pmul.dq.")) {
3150 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3151 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3152 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3153 Rep =
3154 Builder.CreateSIToFP(CI->getArgOperand(1),
3155 cast<VectorType>(CI->getType())->getElementType());
3156 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3157 } else if (Name == "avx512.cvtusi2sd") {
3158 Rep =
3159 Builder.CreateUIToFP(CI->getArgOperand(1),
3160 cast<VectorType>(CI->getType())->getElementType());
3161 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3162 } else if (Name == "sse2.cvtss2sd") {
3163 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3164 Rep = Builder.CreateFPExt(
3165 Rep, cast<VectorType>(CI->getType())->getElementType());
3166 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3167 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3168 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3169 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3170 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3171 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3172 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3173 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3174 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3175 Name == "avx512.mask.cvtqq2ps.256" ||
3176 Name == "avx512.mask.cvtqq2ps.512" ||
3177 Name == "avx512.mask.cvtuqq2ps.256" ||
3178 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3179 Name == "avx.cvt.ps2.pd.256" ||
3180 Name == "avx512.mask.cvtps2pd.128" ||
3181 Name == "avx512.mask.cvtps2pd.256") {
3182 auto *DstTy = cast<FixedVectorType>(CI->getType());
3183 Rep = CI->getArgOperand(0);
3184 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3185
3186 unsigned NumDstElts = DstTy->getNumElements();
3187 if (NumDstElts < SrcTy->getNumElements()) {
3188 assert(NumDstElts == 2 && "Unexpected vector size");
3189 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3190 }
3191
3192 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3193 bool IsUnsigned = Name.contains("cvtu");
3194 if (IsPS2PD)
3195 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3196 else if (CI->arg_size() == 4 &&
3197 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3198 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3199 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3200 : Intrinsic::x86_avx512_sitofp_round;
3201 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3202 {Rep, CI->getArgOperand(3)});
3203 } else {
3204 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3205 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3206 }
3207
3208 if (CI->arg_size() >= 3)
3209 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3210 CI->getArgOperand(1));
3211 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3212 Name.starts_with("vcvtph2ps.")) {
3213 auto *DstTy = cast<FixedVectorType>(CI->getType());
3214 Rep = CI->getArgOperand(0);
3215 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3216 unsigned NumDstElts = DstTy->getNumElements();
3217 if (NumDstElts != SrcTy->getNumElements()) {
3218 assert(NumDstElts == 4 && "Unexpected vector size");
3219 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3220 }
3221 Rep = Builder.CreateBitCast(
3222 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3223 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3224 if (CI->arg_size() >= 3)
3225 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3226 CI->getArgOperand(1));
3227 } else if (Name.starts_with("avx512.mask.load")) {
3228 // "avx512.mask.loadu." or "avx512.mask.load."
3229 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3230 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3231 CI->getArgOperand(2), Aligned);
3232 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3233 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3234 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3235 ResultTy->getNumElements());
3236
3237 Rep = Builder.CreateIntrinsic(
3238 Intrinsic::masked_expandload, ResultTy,
3239 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3240 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3241 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3242 Value *MaskVec =
3243 getX86MaskVec(Builder, CI->getArgOperand(2),
3244 cast<FixedVectorType>(ResultTy)->getNumElements());
3245
3246 Rep = Builder.CreateIntrinsic(
3247 Intrinsic::masked_compressstore, ResultTy,
3248 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3249 } else if (Name.starts_with("avx512.mask.compress.") ||
3250 Name.starts_with("avx512.mask.expand.")) {
3251 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3252
3253 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3254 ResultTy->getNumElements());
3255
3256 bool IsCompress = Name[12] == 'c';
3257 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3258 : Intrinsic::x86_avx512_mask_expand;
3259 Rep = Builder.CreateIntrinsic(
3260 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3261 } else if (Name.starts_with("xop.vpcom")) {
3262 bool IsSigned;
3263 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3264 Name.ends_with("uq"))
3265 IsSigned = false;
3266 else if (Name.ends_with("b") || Name.ends_with("w") ||
3267 Name.ends_with("d") || Name.ends_with("q"))
3268 IsSigned = true;
3269 else
3270 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3271
3272 unsigned Imm;
3273 if (CI->arg_size() == 3) {
3274 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3275 } else {
3276 Name = Name.substr(9); // strip off "xop.vpcom"
3277 if (Name.starts_with("lt"))
3278 Imm = 0;
3279 else if (Name.starts_with("le"))
3280 Imm = 1;
3281 else if (Name.starts_with("gt"))
3282 Imm = 2;
3283 else if (Name.starts_with("ge"))
3284 Imm = 3;
3285 else if (Name.starts_with("eq"))
3286 Imm = 4;
3287 else if (Name.starts_with("ne"))
3288 Imm = 5;
3289 else if (Name.starts_with("false"))
3290 Imm = 6;
3291 else if (Name.starts_with("true"))
3292 Imm = 7;
3293 else
3294 llvm_unreachable("Unknown condition");
3295 }
3296
3297 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3298 } else if (Name.starts_with("xop.vpcmov")) {
3299 Value *Sel = CI->getArgOperand(2);
3300 Value *NotSel = Builder.CreateNot(Sel);
3301 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3302 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3303 Rep = Builder.CreateOr(Sel0, Sel1);
3304 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3305 Name.starts_with("avx512.mask.prol")) {
3306 Rep = upgradeX86Rotate(Builder, *CI, false);
3307 } else if (Name.starts_with("avx512.pror") ||
3308 Name.starts_with("avx512.mask.pror")) {
3309 Rep = upgradeX86Rotate(Builder, *CI, true);
3310 } else if (Name.starts_with("avx512.vpshld.") ||
3311 Name.starts_with("avx512.mask.vpshld") ||
3312 Name.starts_with("avx512.maskz.vpshld")) {
3313 bool ZeroMask = Name[11] == 'z';
3314 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3315 } else if (Name.starts_with("avx512.vpshrd.") ||
3316 Name.starts_with("avx512.mask.vpshrd") ||
3317 Name.starts_with("avx512.maskz.vpshrd")) {
3318 bool ZeroMask = Name[11] == 'z';
3319 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3320 } else if (Name == "sse42.crc32.64.8") {
3321 Value *Trunc0 =
3322 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3323 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3324 {Trunc0, CI->getArgOperand(1)});
3325 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3326 } else if (Name.starts_with("avx.vbroadcast.s") ||
3327 Name.starts_with("avx512.vbroadcast.s")) {
3328 // Replace broadcasts with a series of insertelements.
3329 auto *VecTy = cast<FixedVectorType>(CI->getType());
3330 Type *EltTy = VecTy->getElementType();
3331 unsigned EltNum = VecTy->getNumElements();
3332 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3333 Type *I32Ty = Type::getInt32Ty(C);
3334 Rep = PoisonValue::get(VecTy);
3335 for (unsigned I = 0; I < EltNum; ++I)
3336 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3337 } else if (Name.starts_with("sse41.pmovsx") ||
3338 Name.starts_with("sse41.pmovzx") ||
3339 Name.starts_with("avx2.pmovsx") ||
3340 Name.starts_with("avx2.pmovzx") ||
3341 Name.starts_with("avx512.mask.pmovsx") ||
3342 Name.starts_with("avx512.mask.pmovzx")) {
3343 auto *DstTy = cast<FixedVectorType>(CI->getType());
3344 unsigned NumDstElts = DstTy->getNumElements();
3345
3346 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3347 SmallVector<int, 8> ShuffleMask(NumDstElts);
3348 for (unsigned i = 0; i != NumDstElts; ++i)
3349 ShuffleMask[i] = i;
3350
3351 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3352
3353 bool DoSext = Name.contains("pmovsx");
3354 Rep =
3355 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3356 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3357 if (CI->arg_size() == 3)
3358 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3359 CI->getArgOperand(1));
3360 } else if (Name == "avx512.mask.pmov.qd.256" ||
3361 Name == "avx512.mask.pmov.qd.512" ||
3362 Name == "avx512.mask.pmov.wb.256" ||
3363 Name == "avx512.mask.pmov.wb.512") {
3364 Type *Ty = CI->getArgOperand(1)->getType();
3365 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3366 Rep =
3367 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3368 } else if (Name.starts_with("avx.vbroadcastf128") ||
3369 Name == "avx2.vbroadcasti128") {
3370 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3371 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3372 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3373 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3374 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3375 if (NumSrcElts == 2)
3376 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3377 else
3378 Rep = Builder.CreateShuffleVector(Load,
3379 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3380 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3381 Name.starts_with("avx512.mask.shuf.f")) {
3382 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3383 Type *VT = CI->getType();
3384 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3385 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3386 unsigned ControlBitsMask = NumLanes - 1;
3387 unsigned NumControlBits = NumLanes / 2;
3388 SmallVector<int, 8> ShuffleMask(0);
3389
3390 for (unsigned l = 0; l != NumLanes; ++l) {
3391 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3392 // We actually need the other source.
3393 if (l >= NumLanes / 2)
3394 LaneMask += NumLanes;
3395 for (unsigned i = 0; i != NumElementsInLane; ++i)
3396 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3397 }
3398 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3399 CI->getArgOperand(1), ShuffleMask);
3400 Rep =
3401 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3402 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3403 Name.starts_with("avx512.mask.broadcasti")) {
3404 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3405 ->getNumElements();
3406 unsigned NumDstElts =
3407 cast<FixedVectorType>(CI->getType())->getNumElements();
3408
3409 SmallVector<int, 8> ShuffleMask(NumDstElts);
3410 for (unsigned i = 0; i != NumDstElts; ++i)
3411 ShuffleMask[i] = i % NumSrcElts;
3412
3413 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3414 CI->getArgOperand(0), ShuffleMask);
3415 Rep =
3416 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3417 } else if (Name.starts_with("avx2.pbroadcast") ||
3418 Name.starts_with("avx2.vbroadcast") ||
3419 Name.starts_with("avx512.pbroadcast") ||
3420 Name.starts_with("avx512.mask.broadcast.s")) {
3421 // Replace vp?broadcasts with a vector shuffle.
3422 Value *Op = CI->getArgOperand(0);
3423 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3424 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3427 Rep = Builder.CreateShuffleVector(Op, M);
3428
3429 if (CI->arg_size() == 3)
3430 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3431 CI->getArgOperand(1));
3432 } else if (Name.starts_with("sse2.padds.") ||
3433 Name.starts_with("avx2.padds.") ||
3434 Name.starts_with("avx512.padds.") ||
3435 Name.starts_with("avx512.mask.padds.")) {
3436 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3437 } else if (Name.starts_with("sse2.psubs.") ||
3438 Name.starts_with("avx2.psubs.") ||
3439 Name.starts_with("avx512.psubs.") ||
3440 Name.starts_with("avx512.mask.psubs.")) {
3441 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3442 } else if (Name.starts_with("sse2.paddus.") ||
3443 Name.starts_with("avx2.paddus.") ||
3444 Name.starts_with("avx512.mask.paddus.")) {
3445 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3446 } else if (Name.starts_with("sse2.psubus.") ||
3447 Name.starts_with("avx2.psubus.") ||
3448 Name.starts_with("avx512.mask.psubus.")) {
3449 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3450 } else if (Name.starts_with("avx512.mask.palignr.")) {
3451 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3452 CI->getArgOperand(1), CI->getArgOperand(2),
3453 CI->getArgOperand(3), CI->getArgOperand(4),
3454 false);
3455 } else if (Name.starts_with("avx512.mask.valign.")) {
3457 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3458 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3459 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3460 // 128/256-bit shift left specified in bits.
3461 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3462 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3463 Shift / 8); // Shift is in bits.
3464 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3465 // 128/256-bit shift right specified in bits.
3466 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3467 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3468 Shift / 8); // Shift is in bits.
3469 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3470 Name == "avx512.psll.dq.512") {
3471 // 128/256/512-bit shift left specified in bytes.
3472 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3473 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3474 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3475 Name == "avx512.psrl.dq.512") {
3476 // 128/256/512-bit shift right specified in bytes.
3477 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3478 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3479 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3480 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3481 Name.starts_with("avx2.pblendd.")) {
3482 Value *Op0 = CI->getArgOperand(0);
3483 Value *Op1 = CI->getArgOperand(1);
3484 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3485 auto *VecTy = cast<FixedVectorType>(CI->getType());
3486 unsigned NumElts = VecTy->getNumElements();
3487
3488 SmallVector<int, 16> Idxs(NumElts);
3489 for (unsigned i = 0; i != NumElts; ++i)
3490 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3491
3492 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3493 } else if (Name.starts_with("avx.vinsertf128.") ||
3494 Name == "avx2.vinserti128" ||
3495 Name.starts_with("avx512.mask.insert")) {
3496 Value *Op0 = CI->getArgOperand(0);
3497 Value *Op1 = CI->getArgOperand(1);
3498 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3499 unsigned DstNumElts =
3500 cast<FixedVectorType>(CI->getType())->getNumElements();
3501 unsigned SrcNumElts =
3502 cast<FixedVectorType>(Op1->getType())->getNumElements();
3503 unsigned Scale = DstNumElts / SrcNumElts;
3504
3505 // Mask off the high bits of the immediate value; hardware ignores those.
3506 Imm = Imm % Scale;
3507
3508 // Extend the second operand into a vector the size of the destination.
3509 SmallVector<int, 8> Idxs(DstNumElts);
3510 for (unsigned i = 0; i != SrcNumElts; ++i)
3511 Idxs[i] = i;
3512 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3513 Idxs[i] = SrcNumElts;
3514 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3515
3516 // Insert the second operand into the first operand.
3517
3518 // Note that there is no guarantee that instruction lowering will actually
3519 // produce a vinsertf128 instruction for the created shuffles. In
3520 // particular, the 0 immediate case involves no lane changes, so it can
3521 // be handled as a blend.
3522
3523 // Example of shuffle mask for 32-bit elements:
3524 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3525 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3526
3527 // First fill with identify mask.
3528 for (unsigned i = 0; i != DstNumElts; ++i)
3529 Idxs[i] = i;
3530 // Then replace the elements where we need to insert.
3531 for (unsigned i = 0; i != SrcNumElts; ++i)
3532 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3533 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3534
3535 // If the intrinsic has a mask operand, handle that.
3536 if (CI->arg_size() == 5)
3537 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3538 CI->getArgOperand(3));
3539 } else if (Name.starts_with("avx.vextractf128.") ||
3540 Name == "avx2.vextracti128" ||
3541 Name.starts_with("avx512.mask.vextract")) {
3542 Value *Op0 = CI->getArgOperand(0);
3543 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3544 unsigned DstNumElts =
3545 cast<FixedVectorType>(CI->getType())->getNumElements();
3546 unsigned SrcNumElts =
3547 cast<FixedVectorType>(Op0->getType())->getNumElements();
3548 unsigned Scale = SrcNumElts / DstNumElts;
3549
3550 // Mask off the high bits of the immediate value; hardware ignores those.
3551 Imm = Imm % Scale;
3552
3553 // Get indexes for the subvector of the input vector.
3554 SmallVector<int, 8> Idxs(DstNumElts);
3555 for (unsigned i = 0; i != DstNumElts; ++i) {
3556 Idxs[i] = i + (Imm * DstNumElts);
3557 }
3558 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3559
3560 // If the intrinsic has a mask operand, handle that.
3561 if (CI->arg_size() == 4)
3562 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3563 CI->getArgOperand(2));
3564 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3565 Name.starts_with("avx512.mask.perm.di.")) {
3566 Value *Op0 = CI->getArgOperand(0);
3567 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3568 auto *VecTy = cast<FixedVectorType>(CI->getType());
3569 unsigned NumElts = VecTy->getNumElements();
3570
3571 SmallVector<int, 8> Idxs(NumElts);
3572 for (unsigned i = 0; i != NumElts; ++i)
3573 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3574
3575 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3576
3577 if (CI->arg_size() == 4)
3578 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3579 CI->getArgOperand(2));
3580 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3581 // The immediate permute control byte looks like this:
3582 // [1:0] - select 128 bits from sources for low half of destination
3583 // [2] - ignore
3584 // [3] - zero low half of destination
3585 // [5:4] - select 128 bits from sources for high half of destination
3586 // [6] - ignore
3587 // [7] - zero high half of destination
3588
3589 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3590
3591 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3592 unsigned HalfSize = NumElts / 2;
3593 SmallVector<int, 8> ShuffleMask(NumElts);
3594
3595 // Determine which operand(s) are actually in use for this instruction.
3596 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3597 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3598
3599 // If needed, replace operands based on zero mask.
3600 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3601 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3602
3603 // Permute low half of result.
3604 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3605 for (unsigned i = 0; i < HalfSize; ++i)
3606 ShuffleMask[i] = StartIndex + i;
3607
3608 // Permute high half of result.
3609 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3610 for (unsigned i = 0; i < HalfSize; ++i)
3611 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3612
3613 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3614
3615 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3616 Name.starts_with("avx512.mask.vpermil.p") ||
3617 Name.starts_with("avx512.mask.pshuf.d.")) {
3618 Value *Op0 = CI->getArgOperand(0);
3619 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3620 auto *VecTy = cast<FixedVectorType>(CI->getType());
3621 unsigned NumElts = VecTy->getNumElements();
3622 // Calculate the size of each index in the immediate.
3623 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3624 unsigned IdxMask = ((1 << IdxSize) - 1);
3625
3626 SmallVector<int, 8> Idxs(NumElts);
3627 // Lookup the bits for this element, wrapping around the immediate every
3628 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3629 // to offset by the first index of each group.
3630 for (unsigned i = 0; i != NumElts; ++i)
3631 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3632
3633 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3634
3635 if (CI->arg_size() == 4)
3636 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3637 CI->getArgOperand(2));
3638 } else if (Name == "sse2.pshufl.w" ||
3639 Name.starts_with("avx512.mask.pshufl.w.")) {
3640 Value *Op0 = CI->getArgOperand(0);
3641 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3642 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3643
3644 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3645 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3646
3647 SmallVector<int, 16> Idxs(NumElts);
3648 for (unsigned l = 0; l != NumElts; l += 8) {
3649 for (unsigned i = 0; i != 4; ++i)
3650 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3651 for (unsigned i = 4; i != 8; ++i)
3652 Idxs[i + l] = i + l;
3653 }
3654
3655 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3656
3657 if (CI->arg_size() == 4)
3658 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3659 CI->getArgOperand(2));
3660 } else if (Name == "sse2.pshufh.w" ||
3661 Name.starts_with("avx512.mask.pshufh.w.")) {
3662 Value *Op0 = CI->getArgOperand(0);
3663 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3664 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3665
3666 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3667 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3668
3669 SmallVector<int, 16> Idxs(NumElts);
3670 for (unsigned l = 0; l != NumElts; l += 8) {
3671 for (unsigned i = 0; i != 4; ++i)
3672 Idxs[i + l] = i + l;
3673 for (unsigned i = 0; i != 4; ++i)
3674 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3675 }
3676
3677 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3678
3679 if (CI->arg_size() == 4)
3680 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3681 CI->getArgOperand(2));
3682 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3683 Value *Op0 = CI->getArgOperand(0);
3684 Value *Op1 = CI->getArgOperand(1);
3685 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3686 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3687
3688 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3689 unsigned HalfLaneElts = NumLaneElts / 2;
3690
3691 SmallVector<int, 16> Idxs(NumElts);
3692 for (unsigned i = 0; i != NumElts; ++i) {
3693 // Base index is the starting element of the lane.
3694 Idxs[i] = i - (i % NumLaneElts);
3695 // If we are half way through the lane switch to the other source.
3696 if ((i % NumLaneElts) >= HalfLaneElts)
3697 Idxs[i] += NumElts;
3698 // Now select the specific element. By adding HalfLaneElts bits from
3699 // the immediate. Wrapping around the immediate every 8-bits.
3700 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3701 }
3702
3703 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3704
3705 Rep =
3706 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3707 } else if (Name.starts_with("avx512.mask.movddup") ||
3708 Name.starts_with("avx512.mask.movshdup") ||
3709 Name.starts_with("avx512.mask.movsldup")) {
3710 Value *Op0 = CI->getArgOperand(0);
3711 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3712 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3713
3714 unsigned Offset = 0;
3715 if (Name.starts_with("avx512.mask.movshdup."))
3716 Offset = 1;
3717
3718 SmallVector<int, 16> Idxs(NumElts);
3719 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3720 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3721 Idxs[i + l + 0] = i + l + Offset;
3722 Idxs[i + l + 1] = i + l + Offset;
3723 }
3724
3725 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3726
3727 Rep =
3728 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3729 } else if (Name.starts_with("avx512.mask.punpckl") ||
3730 Name.starts_with("avx512.mask.unpckl.")) {
3731 Value *Op0 = CI->getArgOperand(0);
3732 Value *Op1 = CI->getArgOperand(1);
3733 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3734 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3735
3736 SmallVector<int, 64> Idxs(NumElts);
3737 for (int l = 0; l != NumElts; l += NumLaneElts)
3738 for (int i = 0; i != NumLaneElts; ++i)
3739 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3740
3741 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3742
3743 Rep =
3744 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3745 } else if (Name.starts_with("avx512.mask.punpckh") ||
3746 Name.starts_with("avx512.mask.unpckh.")) {
3747 Value *Op0 = CI->getArgOperand(0);
3748 Value *Op1 = CI->getArgOperand(1);
3749 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3750 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3751
3752 SmallVector<int, 64> Idxs(NumElts);
3753 for (int l = 0; l != NumElts; l += NumLaneElts)
3754 for (int i = 0; i != NumLaneElts; ++i)
3755 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3756
3757 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3758
3759 Rep =
3760 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3761 } else if (Name.starts_with("avx512.mask.and.") ||
3762 Name.starts_with("avx512.mask.pand.")) {
3763 VectorType *FTy = cast<VectorType>(CI->getType());
3765 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3766 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3767 Rep = Builder.CreateBitCast(Rep, FTy);
3768 Rep =
3769 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3770 } else if (Name.starts_with("avx512.mask.andn.") ||
3771 Name.starts_with("avx512.mask.pandn.")) {
3772 VectorType *FTy = cast<VectorType>(CI->getType());
3774 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3775 Rep = Builder.CreateAnd(Rep,
3776 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3777 Rep = Builder.CreateBitCast(Rep, FTy);
3778 Rep =
3779 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3780 } else if (Name.starts_with("avx512.mask.or.") ||
3781 Name.starts_with("avx512.mask.por.")) {
3782 VectorType *FTy = cast<VectorType>(CI->getType());
3784 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3785 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3786 Rep = Builder.CreateBitCast(Rep, FTy);
3787 Rep =
3788 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3789 } else if (Name.starts_with("avx512.mask.xor.") ||
3790 Name.starts_with("avx512.mask.pxor.")) {
3791 VectorType *FTy = cast<VectorType>(CI->getType());
3793 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3794 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3795 Rep = Builder.CreateBitCast(Rep, FTy);
3796 Rep =
3797 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3798 } else if (Name.starts_with("avx512.mask.padd.")) {
3799 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3800 Rep =
3801 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3802 } else if (Name.starts_with("avx512.mask.psub.")) {
3803 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3804 Rep =
3805 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3806 } else if (Name.starts_with("avx512.mask.pmull.")) {
3807 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3808 Rep =
3809 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3810 } else if (Name.starts_with("avx512.mask.add.p")) {
3811 if (Name.ends_with(".512")) {
3812 Intrinsic::ID IID;
3813 if (Name[17] == 's')
3814 IID = Intrinsic::x86_avx512_add_ps_512;
3815 else
3816 IID = Intrinsic::x86_avx512_add_pd_512;
3817
3818 Rep = Builder.CreateIntrinsic(
3819 IID,
3820 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3821 } else {
3822 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3823 }
3824 Rep =
3825 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3826 } else if (Name.starts_with("avx512.mask.div.p")) {
3827 if (Name.ends_with(".512")) {
3828 Intrinsic::ID IID;
3829 if (Name[17] == 's')
3830 IID = Intrinsic::x86_avx512_div_ps_512;
3831 else
3832 IID = Intrinsic::x86_avx512_div_pd_512;
3833
3834 Rep = Builder.CreateIntrinsic(
3835 IID,
3836 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3837 } else {
3838 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3839 }
3840 Rep =
3841 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3842 } else if (Name.starts_with("avx512.mask.mul.p")) {
3843 if (Name.ends_with(".512")) {
3844 Intrinsic::ID IID;
3845 if (Name[17] == 's')
3846 IID = Intrinsic::x86_avx512_mul_ps_512;
3847 else
3848 IID = Intrinsic::x86_avx512_mul_pd_512;
3849
3850 Rep = Builder.CreateIntrinsic(
3851 IID,
3852 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3853 } else {
3854 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3855 }
3856 Rep =
3857 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3858 } else if (Name.starts_with("avx512.mask.sub.p")) {
3859 if (Name.ends_with(".512")) {
3860 Intrinsic::ID IID;
3861 if (Name[17] == 's')
3862 IID = Intrinsic::x86_avx512_sub_ps_512;
3863 else
3864 IID = Intrinsic::x86_avx512_sub_pd_512;
3865
3866 Rep = Builder.CreateIntrinsic(
3867 IID,
3868 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3869 } else {
3870 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3871 }
3872 Rep =
3873 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3874 } else if ((Name.starts_with("avx512.mask.max.p") ||
3875 Name.starts_with("avx512.mask.min.p")) &&
3876 Name.drop_front(18) == ".512") {
3877 bool IsDouble = Name[17] == 'd';
3878 bool IsMin = Name[13] == 'i';
3879 static const Intrinsic::ID MinMaxTbl[2][2] = {
3880 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3881 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3882 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3883
3884 Rep = Builder.CreateIntrinsic(
3885 IID,
3886 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3887 Rep =
3888 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3889 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3890 Rep =
3891 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3892 {CI->getArgOperand(0), Builder.getInt1(false)});
3893 Rep =
3894 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3895 } else if (Name.starts_with("avx512.mask.psll")) {
3896 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3897 bool IsVariable = Name[16] == 'v';
3898 char Size = Name[16] == '.' ? Name[17]
3899 : Name[17] == '.' ? Name[18]
3900 : Name[18] == '.' ? Name[19]
3901 : Name[20];
3902
3903 Intrinsic::ID IID;
3904 if (IsVariable && Name[17] != '.') {
3905 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3906 IID = Intrinsic::x86_avx2_psllv_q;
3907 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3908 IID = Intrinsic::x86_avx2_psllv_q_256;
3909 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3910 IID = Intrinsic::x86_avx2_psllv_d;
3911 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3912 IID = Intrinsic::x86_avx2_psllv_d_256;
3913 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3914 IID = Intrinsic::x86_avx512_psllv_w_128;
3915 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3916 IID = Intrinsic::x86_avx512_psllv_w_256;
3917 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3918 IID = Intrinsic::x86_avx512_psllv_w_512;
3919 else
3920 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3921 } else if (Name.ends_with(".128")) {
3922 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3923 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3924 : Intrinsic::x86_sse2_psll_d;
3925 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3926 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3927 : Intrinsic::x86_sse2_psll_q;
3928 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3929 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3930 : Intrinsic::x86_sse2_psll_w;
3931 else
3932 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3933 } else if (Name.ends_with(".256")) {
3934 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3935 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3936 : Intrinsic::x86_avx2_psll_d;
3937 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3938 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3939 : Intrinsic::x86_avx2_psll_q;
3940 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3941 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3942 : Intrinsic::x86_avx2_psll_w;
3943 else
3944 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3945 } else {
3946 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3947 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3948 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3949 : Intrinsic::x86_avx512_psll_d_512;
3950 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3951 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3952 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3953 : Intrinsic::x86_avx512_psll_q_512;
3954 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3955 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3956 : Intrinsic::x86_avx512_psll_w_512;
3957 else
3958 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3959 }
3960
3961 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3962 } else if (Name.starts_with("avx512.mask.psrl")) {
3963 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3964 bool IsVariable = Name[16] == 'v';
3965 char Size = Name[16] == '.' ? Name[17]
3966 : Name[17] == '.' ? Name[18]
3967 : Name[18] == '.' ? Name[19]
3968 : Name[20];
3969
3970 Intrinsic::ID IID;
3971 if (IsVariable && Name[17] != '.') {
3972 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3973 IID = Intrinsic::x86_avx2_psrlv_q;
3974 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3975 IID = Intrinsic::x86_avx2_psrlv_q_256;
3976 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3977 IID = Intrinsic::x86_avx2_psrlv_d;
3978 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3979 IID = Intrinsic::x86_avx2_psrlv_d_256;
3980 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3981 IID = Intrinsic::x86_avx512_psrlv_w_128;
3982 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3983 IID = Intrinsic::x86_avx512_psrlv_w_256;
3984 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3985 IID = Intrinsic::x86_avx512_psrlv_w_512;
3986 else
3987 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3988 } else if (Name.ends_with(".128")) {
3989 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3990 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3991 : Intrinsic::x86_sse2_psrl_d;
3992 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3993 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3994 : Intrinsic::x86_sse2_psrl_q;
3995 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3996 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3997 : Intrinsic::x86_sse2_psrl_w;
3998 else
3999 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4000 } else if (Name.ends_with(".256")) {
4001 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4002 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4003 : Intrinsic::x86_avx2_psrl_d;
4004 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4005 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4006 : Intrinsic::x86_avx2_psrl_q;
4007 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4008 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4009 : Intrinsic::x86_avx2_psrl_w;
4010 else
4011 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4012 } else {
4013 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4014 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4015 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4016 : Intrinsic::x86_avx512_psrl_d_512;
4017 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4018 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4019 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4020 : Intrinsic::x86_avx512_psrl_q_512;
4021 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4022 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4023 : Intrinsic::x86_avx512_psrl_w_512;
4024 else
4025 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4026 }
4027
4028 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4029 } else if (Name.starts_with("avx512.mask.psra")) {
4030 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4031 bool IsVariable = Name[16] == 'v';
4032 char Size = Name[16] == '.' ? Name[17]
4033 : Name[17] == '.' ? Name[18]
4034 : Name[18] == '.' ? Name[19]
4035 : Name[20];
4036
4037 Intrinsic::ID IID;
4038 if (IsVariable && Name[17] != '.') {
4039 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4040 IID = Intrinsic::x86_avx2_psrav_d;
4041 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4042 IID = Intrinsic::x86_avx2_psrav_d_256;
4043 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4044 IID = Intrinsic::x86_avx512_psrav_w_128;
4045 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4046 IID = Intrinsic::x86_avx512_psrav_w_256;
4047 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4048 IID = Intrinsic::x86_avx512_psrav_w_512;
4049 else
4050 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4051 } else if (Name.ends_with(".128")) {
4052 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4053 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4054 : Intrinsic::x86_sse2_psra_d;
4055 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4056 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4057 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4058 : Intrinsic::x86_avx512_psra_q_128;
4059 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4060 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4061 : Intrinsic::x86_sse2_psra_w;
4062 else
4063 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4064 } else if (Name.ends_with(".256")) {
4065 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4066 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4067 : Intrinsic::x86_avx2_psra_d;
4068 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4069 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4070 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4071 : Intrinsic::x86_avx512_psra_q_256;
4072 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4073 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4074 : Intrinsic::x86_avx2_psra_w;
4075 else
4076 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4077 } else {
4078 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4079 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4080 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4081 : Intrinsic::x86_avx512_psra_d_512;
4082 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4083 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4084 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4085 : Intrinsic::x86_avx512_psra_q_512;
4086 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4087 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4088 : Intrinsic::x86_avx512_psra_w_512;
4089 else
4090 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4091 }
4092
4093 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4094 } else if (Name.starts_with("avx512.mask.move.s")) {
4095 Rep = upgradeMaskedMove(Builder, *CI);
4096 } else if (Name.starts_with("avx512.cvtmask2")) {
4097 Rep = upgradeMaskToInt(Builder, *CI);
4098 } else if (Name.ends_with(".movntdqa")) {
4100 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4101
4102 LoadInst *LI = Builder.CreateAlignedLoad(
4103 CI->getType(), CI->getArgOperand(0),
4105 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4106 Rep = LI;
4107 } else if (Name.starts_with("fma.vfmadd.") ||
4108 Name.starts_with("fma.vfmsub.") ||
4109 Name.starts_with("fma.vfnmadd.") ||
4110 Name.starts_with("fma.vfnmsub.")) {
4111 bool NegMul = Name[6] == 'n';
4112 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4113 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4114
4115 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4116 CI->getArgOperand(2)};
4117
4118 if (IsScalar) {
4119 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4120 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4121 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4122 }
4123
4124 if (NegMul && !IsScalar)
4125 Ops[0] = Builder.CreateFNeg(Ops[0]);
4126 if (NegMul && IsScalar)
4127 Ops[1] = Builder.CreateFNeg(Ops[1]);
4128 if (NegAcc)
4129 Ops[2] = Builder.CreateFNeg(Ops[2]);
4130
4131 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4132
4133 if (IsScalar)
4134 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4135 } else if (Name.starts_with("fma4.vfmadd.s")) {
4136 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4137 CI->getArgOperand(2)};
4138
4139 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4140 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4141 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4142
4143 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4144
4145 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4146 Rep, (uint64_t)0);
4147 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4148 Name.starts_with("avx512.maskz.vfmadd.s") ||
4149 Name.starts_with("avx512.mask3.vfmadd.s") ||
4150 Name.starts_with("avx512.mask3.vfmsub.s") ||
4151 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4152 bool IsMask3 = Name[11] == '3';
4153 bool IsMaskZ = Name[11] == 'z';
4154 // Drop the "avx512.mask." to make it easier.
4155 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4156 bool NegMul = Name[2] == 'n';
4157 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4158
4159 Value *A = CI->getArgOperand(0);
4160 Value *B = CI->getArgOperand(1);
4161 Value *C = CI->getArgOperand(2);
4162
4163 if (NegMul && (IsMask3 || IsMaskZ))
4164 A = Builder.CreateFNeg(A);
4165 if (NegMul && !(IsMask3 || IsMaskZ))
4166 B = Builder.CreateFNeg(B);
4167 if (NegAcc)
4168 C = Builder.CreateFNeg(C);
4169
4170 A = Builder.CreateExtractElement(A, (uint64_t)0);
4171 B = Builder.CreateExtractElement(B, (uint64_t)0);
4172 C = Builder.CreateExtractElement(C, (uint64_t)0);
4173
4174 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4175 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4176 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4177
4178 Intrinsic::ID IID;
4179 if (Name.back() == 'd')
4180 IID = Intrinsic::x86_avx512_vfmadd_f64;
4181 else
4182 IID = Intrinsic::x86_avx512_vfmadd_f32;
4183 Rep = Builder.CreateIntrinsic(IID, Ops);
4184 } else {
4185 Rep = Builder.CreateFMA(A, B, C);
4186 }
4187
4188 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4189 : IsMask3 ? C
4190 : A;
4191
4192 // For Mask3 with NegAcc, we need to create a new extractelement that
4193 // avoids the negation above.
4194 if (NegAcc && IsMask3)
4195 PassThru =
4196 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4197
4198 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4199 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4200 (uint64_t)0);
4201 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4202 Name.starts_with("avx512.mask.vfnmadd.p") ||
4203 Name.starts_with("avx512.mask.vfnmsub.p") ||
4204 Name.starts_with("avx512.mask3.vfmadd.p") ||
4205 Name.starts_with("avx512.mask3.vfmsub.p") ||
4206 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4207 Name.starts_with("avx512.maskz.vfmadd.p")) {
4208 bool IsMask3 = Name[11] == '3';
4209 bool IsMaskZ = Name[11] == 'z';
4210 // Drop the "avx512.mask." to make it easier.
4211 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4212 bool NegMul = Name[2] == 'n';
4213 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4214
4215 Value *A = CI->getArgOperand(0);
4216 Value *B = CI->getArgOperand(1);
4217 Value *C = CI->getArgOperand(2);
4218
4219 if (NegMul && (IsMask3 || IsMaskZ))
4220 A = Builder.CreateFNeg(A);
4221 if (NegMul && !(IsMask3 || IsMaskZ))
4222 B = Builder.CreateFNeg(B);
4223 if (NegAcc)
4224 C = Builder.CreateFNeg(C);
4225
4226 if (CI->arg_size() == 5 &&
4227 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4228 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4229 Intrinsic::ID IID;
4230 // Check the character before ".512" in string.
4231 if (Name[Name.size() - 5] == 's')
4232 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4233 else
4234 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4235
4236 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4237 } else {
4238 Rep = Builder.CreateFMA(A, B, C);
4239 }
4240
4241 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4242 : IsMask3 ? CI->getArgOperand(2)
4243 : CI->getArgOperand(0);
4244
4245 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4246 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4247 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4248 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4249 Intrinsic::ID IID;
4250 if (VecWidth == 128 && EltWidth == 32)
4251 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4252 else if (VecWidth == 256 && EltWidth == 32)
4253 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4254 else if (VecWidth == 128 && EltWidth == 64)
4255 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4256 else if (VecWidth == 256 && EltWidth == 64)
4257 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4258 else
4259 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4260
4261 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4262 CI->getArgOperand(2)};
4263 Ops[2] = Builder.CreateFNeg(Ops[2]);
4264 Rep = Builder.CreateIntrinsic(IID, Ops);
4265 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4266 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4267 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4268 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4269 bool IsMask3 = Name[11] == '3';
4270 bool IsMaskZ = Name[11] == 'z';
4271 // Drop the "avx512.mask." to make it easier.
4272 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4273 bool IsSubAdd = Name[3] == 's';
4274 if (CI->arg_size() == 5) {
4275 Intrinsic::ID IID;
4276 // Check the character before ".512" in string.
4277 if (Name[Name.size() - 5] == 's')
4278 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4279 else
4280 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4281
4282 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4283 CI->getArgOperand(2), CI->getArgOperand(4)};
4284 if (IsSubAdd)
4285 Ops[2] = Builder.CreateFNeg(Ops[2]);
4286
4287 Rep = Builder.CreateIntrinsic(IID, Ops);
4288 } else {
4289 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4290
4291 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4292 CI->getArgOperand(2)};
4293
4295 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4296 Value *Odd = Builder.CreateCall(FMA, Ops);
4297 Ops[2] = Builder.CreateFNeg(Ops[2]);
4298 Value *Even = Builder.CreateCall(FMA, Ops);
4299
4300 if (IsSubAdd)
4301 std::swap(Even, Odd);
4302
4303 SmallVector<int, 32> Idxs(NumElts);
4304 for (int i = 0; i != NumElts; ++i)
4305 Idxs[i] = i + (i % 2) * NumElts;
4306
4307 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4308 }
4309
4310 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4311 : IsMask3 ? CI->getArgOperand(2)
4312 : CI->getArgOperand(0);
4313
4314 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4315 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4316 Name.starts_with("avx512.maskz.pternlog.")) {
4317 bool ZeroMask = Name[11] == 'z';
4318 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4319 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4320 Intrinsic::ID IID;
4321 if (VecWidth == 128 && EltWidth == 32)
4322 IID = Intrinsic::x86_avx512_pternlog_d_128;
4323 else if (VecWidth == 256 && EltWidth == 32)
4324 IID = Intrinsic::x86_avx512_pternlog_d_256;
4325 else if (VecWidth == 512 && EltWidth == 32)
4326 IID = Intrinsic::x86_avx512_pternlog_d_512;
4327 else if (VecWidth == 128 && EltWidth == 64)
4328 IID = Intrinsic::x86_avx512_pternlog_q_128;
4329 else if (VecWidth == 256 && EltWidth == 64)
4330 IID = Intrinsic::x86_avx512_pternlog_q_256;
4331 else if (VecWidth == 512 && EltWidth == 64)
4332 IID = Intrinsic::x86_avx512_pternlog_q_512;
4333 else
4334 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4335
4336 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4337 CI->getArgOperand(2), CI->getArgOperand(3)};
4338 Rep = Builder.CreateIntrinsic(IID, Args);
4339 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4340 : CI->getArgOperand(0);
4341 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4342 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4343 Name.starts_with("avx512.maskz.vpmadd52")) {
4344 bool ZeroMask = Name[11] == 'z';
4345 bool High = Name[20] == 'h' || Name[21] == 'h';
4346 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4347 Intrinsic::ID IID;
4348 if (VecWidth == 128 && !High)
4349 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4350 else if (VecWidth == 256 && !High)
4351 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4352 else if (VecWidth == 512 && !High)
4353 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4354 else if (VecWidth == 128 && High)
4355 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4356 else if (VecWidth == 256 && High)
4357 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4358 else if (VecWidth == 512 && High)
4359 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4360 else
4361 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4362
4363 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4364 CI->getArgOperand(2)};
4365 Rep = Builder.CreateIntrinsic(IID, Args);
4366 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4367 : CI->getArgOperand(0);
4368 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4369 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4370 Name.starts_with("avx512.mask.vpermt2var.") ||
4371 Name.starts_with("avx512.maskz.vpermt2var.")) {
4372 bool ZeroMask = Name[11] == 'z';
4373 bool IndexForm = Name[17] == 'i';
4374 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4375 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4376 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4377 Name.starts_with("avx512.mask.vpdpbusds.") ||
4378 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4379 bool ZeroMask = Name[11] == 'z';
4380 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4381 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4382 Intrinsic::ID IID;
4383 if (VecWidth == 128 && !IsSaturating)
4384 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4385 else if (VecWidth == 256 && !IsSaturating)
4386 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4387 else if (VecWidth == 512 && !IsSaturating)
4388 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4389 else if (VecWidth == 128 && IsSaturating)
4390 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4391 else if (VecWidth == 256 && IsSaturating)
4392 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4393 else if (VecWidth == 512 && IsSaturating)
4394 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4395 else
4396 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4397
4398 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4399 CI->getArgOperand(2)};
4400
4401 // Input arguments types were incorrectly set to vectors of i32 before but
4402 // they should be vectors of i8. Insert bit cast when encountering the old
4403 // types
4404 if (Args[1]->getType()->isVectorTy() &&
4405 cast<VectorType>(Args[1]->getType())
4406 ->getElementType()
4407 ->isIntegerTy(32) &&
4408 Args[2]->getType()->isVectorTy() &&
4409 cast<VectorType>(Args[2]->getType())
4410 ->getElementType()
4411 ->isIntegerTy(32)) {
4412 Type *NewArgType = nullptr;
4413 if (VecWidth == 128)
4414 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4415 else if (VecWidth == 256)
4416 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4417 else if (VecWidth == 512)
4418 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4419 else
4420 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4421 CI);
4422
4423 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4424 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4425 }
4426
4427 Rep = Builder.CreateIntrinsic(IID, Args);
4428 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4429 : CI->getArgOperand(0);
4430 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4431 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4432 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4433 Name.starts_with("avx512.mask.vpdpwssds.") ||
4434 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4435 bool ZeroMask = Name[11] == 'z';
4436 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4437 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4438 Intrinsic::ID IID;
4439 if (VecWidth == 128 && !IsSaturating)
4440 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4441 else if (VecWidth == 256 && !IsSaturating)
4442 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4443 else if (VecWidth == 512 && !IsSaturating)
4444 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4445 else if (VecWidth == 128 && IsSaturating)
4446 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4447 else if (VecWidth == 256 && IsSaturating)
4448 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4449 else if (VecWidth == 512 && IsSaturating)
4450 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4451 else
4452 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4453
4454 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4455 CI->getArgOperand(2)};
4456
4457 // Input arguments types were incorrectly set to vectors of i32 before but
4458 // they should be vectors of i16. Insert bit cast when encountering the old
4459 // types
4460 if (Args[1]->getType()->isVectorTy() &&
4461 cast<VectorType>(Args[1]->getType())
4462 ->getElementType()
4463 ->isIntegerTy(32) &&
4464 Args[2]->getType()->isVectorTy() &&
4465 cast<VectorType>(Args[2]->getType())
4466 ->getElementType()
4467 ->isIntegerTy(32)) {
4468 Type *NewArgType = nullptr;
4469 if (VecWidth == 128)
4470 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4471 else if (VecWidth == 256)
4472 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4473 else if (VecWidth == 512)
4474 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4475 else
4476 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4477 CI);
4478
4479 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4480 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4481 }
4482
4483 Rep = Builder.CreateIntrinsic(IID, Args);
4484 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4485 : CI->getArgOperand(0);
4486 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4487 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4488 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4489 Name == "subborrow.u32" || Name == "subborrow.u64") {
4490 Intrinsic::ID IID;
4491 if (Name[0] == 'a' && Name.back() == '2')
4492 IID = Intrinsic::x86_addcarry_32;
4493 else if (Name[0] == 'a' && Name.back() == '4')
4494 IID = Intrinsic::x86_addcarry_64;
4495 else if (Name[0] == 's' && Name.back() == '2')
4496 IID = Intrinsic::x86_subborrow_32;
4497 else if (Name[0] == 's' && Name.back() == '4')
4498 IID = Intrinsic::x86_subborrow_64;
4499 else
4500 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4501
4502 // Make a call with 3 operands.
4503 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4504 CI->getArgOperand(2)};
4505 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4506
4507 // Extract the second result and store it.
4508 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4509 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4510 // Replace the original call result with the first result of the new call.
4511 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4512
4513 CI->replaceAllUsesWith(CF);
4514 Rep = nullptr;
4515 } else if (Name.starts_with("avx512.mask.") &&
4516 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4517 // Rep will be updated by the call in the condition.
4518 } else
4519 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4520
4521 return Rep;
4522}
4523
4525 Function *F, IRBuilder<> &Builder) {
4526 if (Name.starts_with("neon.bfcvt")) {
4527 if (Name.starts_with("neon.bfcvtn2")) {
4528 SmallVector<int, 32> LoMask(4);
4529 std::iota(LoMask.begin(), LoMask.end(), 0);
4530 SmallVector<int, 32> ConcatMask(8);
4531 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4532 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4533 Value *Trunc =
4534 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4535 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4536 } else if (Name.starts_with("neon.bfcvtn")) {
4537 SmallVector<int, 32> ConcatMask(8);
4538 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4539 Type *V4BF16 =
4540 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4541 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4542 dbgs() << "Trunc: " << *Trunc << "\n";
4543 return Builder.CreateShuffleVector(
4544 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4545 } else {
4546 return Builder.CreateFPTrunc(CI->getOperand(0),
4547 Type::getBFloatTy(F->getContext()));
4548 }
4549 } else if (Name.starts_with("sve.fcvt")) {
4550 Intrinsic::ID NewID =
4552 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4553 .Case("sve.fcvtnt.bf16f32",
4554 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4556 if (NewID == Intrinsic::not_intrinsic)
4557 llvm_unreachable("Unhandled Intrinsic!");
4558
4559 SmallVector<Value *, 3> Args(CI->args());
4560
4561 // The original intrinsics incorrectly used a predicate based on the
4562 // smallest element type rather than the largest.
4563 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4564 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4565
4566 if (Args[1]->getType() != BadPredTy)
4567 llvm_unreachable("Unexpected predicate type!");
4568
4569 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4570 BadPredTy, Args[1]);
4571 Args[1] = Builder.CreateIntrinsic(
4572 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4573
4574 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4575 CI->getName());
4576 }
4577
4578 llvm_unreachable("Unhandled Intrinsic!");
4579}
4580
4582 IRBuilder<> &Builder) {
4583 if (Name == "mve.vctp64.old") {
4584 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4585 // correct type.
4586 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4587 CI->getArgOperand(0),
4588 /*FMFSource=*/nullptr, CI->getName());
4589 Value *C1 = Builder.CreateIntrinsic(
4590 Intrinsic::arm_mve_pred_v2i,
4591 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4592 return Builder.CreateIntrinsic(
4593 Intrinsic::arm_mve_pred_i2v,
4594 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4595 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4596 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4597 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4598 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4599 Name ==
4600 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4601 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4602 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4603 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4604 Name ==
4605 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4606 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4607 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4608 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4609 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4610 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4611 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4612 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4613 std::vector<Type *> Tys;
4614 unsigned ID = CI->getIntrinsicID();
4615 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4616 switch (ID) {
4617 case Intrinsic::arm_mve_mull_int_predicated:
4618 case Intrinsic::arm_mve_vqdmull_predicated:
4619 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4620 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4621 break;
4622 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4623 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4624 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4625 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4626 V2I1Ty};
4627 break;
4628 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4629 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4630 CI->getOperand(1)->getType(), V2I1Ty};
4631 break;
4632 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4633 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4634 CI->getOperand(2)->getType(), V2I1Ty};
4635 break;
4636 case Intrinsic::arm_cde_vcx1q_predicated:
4637 case Intrinsic::arm_cde_vcx1qa_predicated:
4638 case Intrinsic::arm_cde_vcx2q_predicated:
4639 case Intrinsic::arm_cde_vcx2qa_predicated:
4640 case Intrinsic::arm_cde_vcx3q_predicated:
4641 case Intrinsic::arm_cde_vcx3qa_predicated:
4642 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4643 break;
4644 default:
4645 llvm_unreachable("Unhandled Intrinsic!");
4646 }
4647
4648 std::vector<Value *> Ops;
4649 for (Value *Op : CI->args()) {
4650 Type *Ty = Op->getType();
4651 if (Ty->getScalarSizeInBits() == 1) {
4652 Value *C1 = Builder.CreateIntrinsic(
4653 Intrinsic::arm_mve_pred_v2i,
4654 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4655 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4656 }
4657 Ops.push_back(Op);
4658 }
4659
4660 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4661 CI->getName());
4662 }
4663 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4664}
4665
4666// These are expected to have the arguments:
4667// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4668//
4669// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4670//
4672 Function *F, IRBuilder<> &Builder) {
4673 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4674 // for compatibility.
4675 auto UpgradeLegacyWMMAIUIntrinsicCall =
4676 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4677 ArrayRef<Type *> OverloadTys) -> Value * {
4678 // Prepare arguments, append clamp=0 for compatibility
4679 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4680 Args.push_back(Builder.getFalse());
4681
4682 // Insert the declaration for the right overload types
4684 F->getParent(), F->getIntrinsicID(), OverloadTys);
4685
4686 // Copy operand bundles if any
4688 CI->getOperandBundlesAsDefs(Bundles);
4689
4690 // Create the new call and copy calling properties
4691 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4692 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4693 NewCall->setCallingConv(CI->getCallingConv());
4694 NewCall->setAttributes(CI->getAttributes());
4695 NewCall->setDebugLoc(CI->getDebugLoc());
4696 NewCall->copyMetadata(*CI);
4697 return NewCall;
4698 };
4699
4700 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4701 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4702 "intrinsic should have 7 arguments");
4703 Type *T1 = CI->getArgOperand(4)->getType();
4704 Type *T2 = CI->getArgOperand(1)->getType();
4705 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4706 }
4707 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4708 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4709 "intrinsic should have 8 arguments");
4710 Type *T1 = CI->getArgOperand(4)->getType();
4711 Type *T2 = CI->getArgOperand(1)->getType();
4712 Type *T3 = CI->getArgOperand(3)->getType();
4713 Type *T4 = CI->getArgOperand(5)->getType();
4714 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4715 }
4716
4717 AtomicRMWInst::BinOp RMWOp =
4719 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4720 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4721 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4722 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4723 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4724 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4725 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4726 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4727 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4728 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4729 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4730 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4731 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4732
4733 unsigned NumOperands = CI->getNumOperands();
4734 if (NumOperands < 3) // Malformed bitcode.
4735 return nullptr;
4736
4737 Value *Ptr = CI->getArgOperand(0);
4738 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4739 if (!PtrTy) // Malformed.
4740 return nullptr;
4741
4742 Value *Val = CI->getArgOperand(1);
4743 if (Val->getType() != CI->getType()) // Malformed.
4744 return nullptr;
4745
4746 ConstantInt *OrderArg = nullptr;
4747 bool IsVolatile = false;
4748
4749 // These should have 5 arguments (plus the callee). A separate version of the
4750 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4751 if (NumOperands > 3)
4752 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4753
4754 // Ignore scope argument at 3
4755
4756 if (NumOperands > 5) {
4757 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4758 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4759 }
4760
4762 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4763 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4766
4767 LLVMContext &Ctx = F->getContext();
4768
4769 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4770 Type *RetTy = CI->getType();
4771 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4772 if (VT->getElementType()->isIntegerTy(16)) {
4773 VectorType *AsBF16 =
4774 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4775 Val = Builder.CreateBitCast(Val, AsBF16);
4776 }
4777 }
4778
4779 // The scope argument never really worked correctly. Use agent as the most
4780 // conservative option which should still always produce the instruction.
4781 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4782 AtomicRMWInst *RMW =
4783 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4784
4785 unsigned AddrSpace = PtrTy->getAddressSpace();
4786 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4787 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4788 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4789 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4790 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4791 }
4792
4793 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4794 MDBuilder MDB(F->getContext());
4795 MDNode *RangeNotPrivate =
4798 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4799 }
4800
4801 if (IsVolatile)
4802 RMW->setVolatile(true);
4803
4804 return Builder.CreateBitCast(RMW, RetTy);
4805}
4806
4807/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4808/// plain MDNode, as it's the verifier's job to check these are the correct
4809/// types later.
4810static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4811 if (Op < CI->arg_size()) {
4812 if (MetadataAsValue *MAV =
4814 Metadata *MD = MAV->getMetadata();
4815 return dyn_cast_if_present<MDNode>(MD);
4816 }
4817 }
4818 return nullptr;
4819}
4820
4821/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4822static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4823 if (Op < CI->arg_size())
4825 return MAV->getMetadata();
4826 return nullptr;
4827}
4828
4830 // The MDNode attached to this instruction might not be the correct type,
4831 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4832 return I->getDebugLoc().getAsMDNode();
4833}
4834
4835/// Convert debug intrinsic calls to non-instruction debug records.
4836/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4837/// \p CI - The debug intrinsic call.
4839 DbgRecord *DR = nullptr;
4840 if (Name == "label") {
4842 CI->getDebugLoc());
4843 } else if (Name == "assign") {
4846 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4847 unwrapMAVMetadataOp(CI, 4),
4848 /*The address is a Value ref, it will be stored as a Metadata */
4849 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4850 } else if (Name == "declare") {
4853 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4854 getDebugLocSafe(CI));
4855 } else if (Name == "addr") {
4856 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4857 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4858 // Don't try to add something to the expression if it's not an expression.
4859 // Instead, allow the verifier to fail later.
4860 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4861 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4862 }
4865 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4866 getDebugLocSafe(CI));
4867 } else if (Name == "value") {
4868 // An old version of dbg.value had an extra offset argument.
4869 unsigned VarOp = 1;
4870 unsigned ExprOp = 2;
4871 if (CI->arg_size() == 4) {
4873 // Nonzero offset dbg.values get dropped without a replacement.
4874 if (!Offset || !Offset->isNullValue())
4875 return;
4876 VarOp = 2;
4877 ExprOp = 3;
4878 }
4881 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4882 nullptr, getDebugLocSafe(CI));
4883 }
4884 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4885 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4886}
4887
4890 if (!Offset)
4891 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4892 int64_t OffsetVal = Offset->getSExtValue();
4893 return Builder.CreateIntrinsic(OffsetVal >= 0
4894 ? Intrinsic::vector_splice_left
4895 : Intrinsic::vector_splice_right,
4896 CI->getType(),
4897 {CI->getArgOperand(0), CI->getArgOperand(1),
4898 Builder.getInt32(std::abs(OffsetVal))});
4899}
4900
4902 Function *F, IRBuilder<> &Builder) {
4903 if (Name.starts_with("to.fp16")) {
4904 Value *Cast =
4905 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4906 return Builder.CreateBitCast(Cast, CI->getType());
4907 }
4908
4909 if (Name.starts_with("from.fp16")) {
4910 Value *Cast =
4911 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4912 return Builder.CreateFPExt(Cast, CI->getType());
4913 }
4914
4915 return nullptr;
4916}
4917
4918/// Upgrade a call to an old intrinsic. All argument and return casting must be
4919/// provided to seamlessly integrate with existing context.
4921 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4922 // checks the callee's function type matches. It's likely we need to handle
4923 // type changes here.
4925 if (!F)
4926 return;
4927
4928 LLVMContext &C = CI->getContext();
4929 IRBuilder<> Builder(C);
4930 if (isa<FPMathOperator>(CI))
4931 Builder.setFastMathFlags(CI->getFastMathFlags());
4932 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4933
4934 if (!NewFn) {
4935 // Get the Function's name.
4936 StringRef Name = F->getName();
4937 if (!Name.consume_front("llvm."))
4938 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4939
4940 bool IsX86 = Name.consume_front("x86.");
4941 bool IsNVVM = Name.consume_front("nvvm.");
4942 bool IsAArch64 = Name.consume_front("aarch64.");
4943 bool IsARM = Name.consume_front("arm.");
4944 bool IsAMDGCN = Name.consume_front("amdgcn.");
4945 bool IsDbg = Name.consume_front("dbg.");
4946 bool IsOldSplice =
4947 (Name.consume_front("experimental.vector.splice") ||
4948 Name.consume_front("vector.splice")) &&
4949 !(Name.starts_with(".left") || Name.starts_with(".right"));
4950 Value *Rep = nullptr;
4951
4952 if (!IsX86 && Name == "stackprotectorcheck") {
4953 Rep = nullptr;
4954 } else if (IsNVVM) {
4955 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4956 } else if (IsX86) {
4957 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4958 } else if (IsAArch64) {
4959 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4960 } else if (IsARM) {
4961 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4962 } else if (IsAMDGCN) {
4963 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4964 } else if (IsDbg) {
4966 } else if (IsOldSplice) {
4967 Rep = upgradeVectorSplice(CI, Builder);
4968 } else if (Name.consume_front("convert.")) {
4969 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
4970 } else {
4971 llvm_unreachable("Unknown function for CallBase upgrade.");
4972 }
4973
4974 if (Rep)
4975 CI->replaceAllUsesWith(Rep);
4976 CI->eraseFromParent();
4977 return;
4978 }
4979
4980 const auto &DefaultCase = [&]() -> void {
4981 if (F == NewFn)
4982 return;
4983
4984 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4985 // Handle generic mangling change.
4986 assert(
4987 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4988 "Unknown function for CallBase upgrade and isn't just a name change");
4989 CI->setCalledFunction(NewFn);
4990 return;
4991 }
4992
4993 // This must be an upgrade from a named to a literal struct.
4994 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4995 assert(OldST != NewFn->getReturnType() &&
4996 "Return type must have changed");
4997 assert(OldST->getNumElements() ==
4998 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4999 "Must have same number of elements");
5000
5001 SmallVector<Value *> Args(CI->args());
5002 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5003 NewCI->setAttributes(CI->getAttributes());
5004 Value *Res = PoisonValue::get(OldST);
5005 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5006 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5007 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5008 }
5009 CI->replaceAllUsesWith(Res);
5010 CI->eraseFromParent();
5011 return;
5012 }
5013
5014 // We're probably about to produce something invalid. Let the verifier catch
5015 // it instead of dying here.
5016 CI->setCalledOperand(
5018 return;
5019 };
5020 CallInst *NewCall = nullptr;
5021 switch (NewFn->getIntrinsicID()) {
5022 default: {
5023 DefaultCase();
5024 return;
5025 }
5026 case Intrinsic::arm_neon_vst1:
5027 case Intrinsic::arm_neon_vst2:
5028 case Intrinsic::arm_neon_vst3:
5029 case Intrinsic::arm_neon_vst4:
5030 case Intrinsic::arm_neon_vst2lane:
5031 case Intrinsic::arm_neon_vst3lane:
5032 case Intrinsic::arm_neon_vst4lane: {
5033 SmallVector<Value *, 4> Args(CI->args());
5034 NewCall = Builder.CreateCall(NewFn, Args);
5035 break;
5036 }
5037 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5038 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5039 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5040 LLVMContext &Ctx = F->getParent()->getContext();
5041 SmallVector<Value *, 4> Args(CI->args());
5042 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5043 cast<ConstantInt>(Args[3])->getZExtValue());
5044 NewCall = Builder.CreateCall(NewFn, Args);
5045 break;
5046 }
5047 case Intrinsic::aarch64_sve_ld3_sret:
5048 case Intrinsic::aarch64_sve_ld4_sret:
5049 case Intrinsic::aarch64_sve_ld2_sret: {
5050 StringRef Name = F->getName();
5051 Name = Name.substr(5);
5052 unsigned N = StringSwitch<unsigned>(Name)
5053 .StartsWith("aarch64.sve.ld2", 2)
5054 .StartsWith("aarch64.sve.ld3", 3)
5055 .StartsWith("aarch64.sve.ld4", 4)
5056 .Default(0);
5057 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5058 unsigned MinElts = RetTy->getMinNumElements() / N;
5059 SmallVector<Value *, 2> Args(CI->args());
5060 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5061 Value *Ret = llvm::PoisonValue::get(RetTy);
5062 for (unsigned I = 0; I < N; I++) {
5063 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5064 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5065 }
5066 NewCall = dyn_cast<CallInst>(Ret);
5067 break;
5068 }
5069
5070 case Intrinsic::coro_end: {
5071 SmallVector<Value *, 3> Args(CI->args());
5072 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5073 NewCall = Builder.CreateCall(NewFn, Args);
5074 break;
5075 }
5076
5077 case Intrinsic::vector_extract: {
5078 StringRef Name = F->getName();
5079 Name = Name.substr(5); // Strip llvm
5080 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5081 DefaultCase();
5082 return;
5083 }
5084 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5085 unsigned MinElts = RetTy->getMinNumElements();
5086 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5087 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5088 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5089 break;
5090 }
5091
5092 case Intrinsic::vector_insert: {
5093 StringRef Name = F->getName();
5094 Name = Name.substr(5);
5095 if (!Name.starts_with("aarch64.sve.tuple")) {
5096 DefaultCase();
5097 return;
5098 }
5099 if (Name.starts_with("aarch64.sve.tuple.set")) {
5100 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5101 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5102 Value *NewIdx =
5103 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5104 NewCall = Builder.CreateCall(
5105 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5106 break;
5107 }
5108 if (Name.starts_with("aarch64.sve.tuple.create")) {
5109 unsigned N = StringSwitch<unsigned>(Name)
5110 .StartsWith("aarch64.sve.tuple.create2", 2)
5111 .StartsWith("aarch64.sve.tuple.create3", 3)
5112 .StartsWith("aarch64.sve.tuple.create4", 4)
5113 .Default(0);
5114 assert(N > 1 && "Create is expected to be between 2-4");
5115 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5116 Value *Ret = llvm::PoisonValue::get(RetTy);
5117 unsigned MinElts = RetTy->getMinNumElements() / N;
5118 for (unsigned I = 0; I < N; I++) {
5119 Value *V = CI->getArgOperand(I);
5120 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5121 }
5122 NewCall = dyn_cast<CallInst>(Ret);
5123 }
5124 break;
5125 }
5126
5127 case Intrinsic::arm_neon_bfdot:
5128 case Intrinsic::arm_neon_bfmmla:
5129 case Intrinsic::arm_neon_bfmlalb:
5130 case Intrinsic::arm_neon_bfmlalt:
5131 case Intrinsic::aarch64_neon_bfdot:
5132 case Intrinsic::aarch64_neon_bfmmla:
5133 case Intrinsic::aarch64_neon_bfmlalb:
5134 case Intrinsic::aarch64_neon_bfmlalt: {
5136 assert(CI->arg_size() == 3 &&
5137 "Mismatch between function args and call args");
5138 size_t OperandWidth =
5140 assert((OperandWidth == 64 || OperandWidth == 128) &&
5141 "Unexpected operand width");
5142 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5143 auto Iter = CI->args().begin();
5144 Args.push_back(*Iter++);
5145 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5146 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5147 NewCall = Builder.CreateCall(NewFn, Args);
5148 break;
5149 }
5150
5151 case Intrinsic::bitreverse:
5152 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5153 break;
5154
5155 case Intrinsic::ctlz:
5156 case Intrinsic::cttz: {
5157 if (CI->arg_size() != 1) {
5158 DefaultCase();
5159 return;
5160 }
5161
5162 NewCall =
5163 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5164 break;
5165 }
5166
5167 case Intrinsic::objectsize: {
5168 Value *NullIsUnknownSize =
5169 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5170 Value *Dynamic =
5171 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5172 NewCall = Builder.CreateCall(
5173 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5174 break;
5175 }
5176
5177 case Intrinsic::ctpop:
5178 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5179 break;
5180 case Intrinsic::dbg_value: {
5181 StringRef Name = F->getName();
5182 Name = Name.substr(5); // Strip llvm.
5183 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5184 if (Name.starts_with("dbg.addr")) {
5186 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5187 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5188 NewCall =
5189 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5190 MetadataAsValue::get(C, Expr)});
5191 break;
5192 }
5193
5194 // Upgrade from the old version that had an extra offset argument.
5195 assert(CI->arg_size() == 4);
5196 // Drop nonzero offsets instead of attempting to upgrade them.
5198 if (Offset->isNullValue()) {
5199 NewCall = Builder.CreateCall(
5200 NewFn,
5201 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5202 break;
5203 }
5204 CI->eraseFromParent();
5205 return;
5206 }
5207
5208 case Intrinsic::ptr_annotation:
5209 // Upgrade from versions that lacked the annotation attribute argument.
5210 if (CI->arg_size() != 4) {
5211 DefaultCase();
5212 return;
5213 }
5214
5215 // Create a new call with an added null annotation attribute argument.
5216 NewCall = Builder.CreateCall(
5217 NewFn,
5218 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5219 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5220 NewCall->takeName(CI);
5221 CI->replaceAllUsesWith(NewCall);
5222 CI->eraseFromParent();
5223 return;
5224
5225 case Intrinsic::var_annotation:
5226 // Upgrade from versions that lacked the annotation attribute argument.
5227 if (CI->arg_size() != 4) {
5228 DefaultCase();
5229 return;
5230 }
5231 // Create a new call with an added null annotation attribute argument.
5232 NewCall = Builder.CreateCall(
5233 NewFn,
5234 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5235 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5236 NewCall->takeName(CI);
5237 CI->replaceAllUsesWith(NewCall);
5238 CI->eraseFromParent();
5239 return;
5240
5241 case Intrinsic::riscv_aes32dsi:
5242 case Intrinsic::riscv_aes32dsmi:
5243 case Intrinsic::riscv_aes32esi:
5244 case Intrinsic::riscv_aes32esmi:
5245 case Intrinsic::riscv_sm4ks:
5246 case Intrinsic::riscv_sm4ed: {
5247 // The last argument to these intrinsics used to be i8 and changed to i32.
5248 // The type overload for sm4ks and sm4ed was removed.
5249 Value *Arg2 = CI->getArgOperand(2);
5250 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5251 return;
5252
5253 Value *Arg0 = CI->getArgOperand(0);
5254 Value *Arg1 = CI->getArgOperand(1);
5255 if (CI->getType()->isIntegerTy(64)) {
5256 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5257 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5258 }
5259
5260 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5261 cast<ConstantInt>(Arg2)->getZExtValue());
5262
5263 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5264 Value *Res = NewCall;
5265 if (Res->getType() != CI->getType())
5266 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5267 NewCall->takeName(CI);
5268 CI->replaceAllUsesWith(Res);
5269 CI->eraseFromParent();
5270 return;
5271 }
5272 case Intrinsic::nvvm_mapa_shared_cluster: {
5273 // Create a new call with the correct address space.
5274 NewCall =
5275 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5276 Value *Res = NewCall;
5277 Res = Builder.CreateAddrSpaceCast(
5278 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5279 NewCall->takeName(CI);
5280 CI->replaceAllUsesWith(Res);
5281 CI->eraseFromParent();
5282 return;
5283 }
5284 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5285 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5286 // Create a new call with the correct address space.
5287 SmallVector<Value *, 4> Args(CI->args());
5288 Args[0] = Builder.CreateAddrSpaceCast(
5289 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5290
5291 NewCall = Builder.CreateCall(NewFn, Args);
5292 NewCall->takeName(CI);
5293 CI->replaceAllUsesWith(NewCall);
5294 CI->eraseFromParent();
5295 return;
5296 }
5297 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5298 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5299 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5300 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5301 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5302 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5303 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5304 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5305 SmallVector<Value *, 16> Args(CI->args());
5306
5307 // Create AddrSpaceCast to shared_cluster if needed.
5308 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5309 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5311 Args[0] = Builder.CreateAddrSpaceCast(
5312 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5313
5314 // Attach the flag argument for cta_group, with a
5315 // default value of 0. This handles case (2) in
5316 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5317 size_t NumArgs = CI->arg_size();
5318 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5319 if (!FlagArg->getType()->isIntegerTy(1))
5320 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5321
5322 NewCall = Builder.CreateCall(NewFn, Args);
5323 NewCall->takeName(CI);
5324 CI->replaceAllUsesWith(NewCall);
5325 CI->eraseFromParent();
5326 return;
5327 }
5328 case Intrinsic::riscv_sha256sig0:
5329 case Intrinsic::riscv_sha256sig1:
5330 case Intrinsic::riscv_sha256sum0:
5331 case Intrinsic::riscv_sha256sum1:
5332 case Intrinsic::riscv_sm3p0:
5333 case Intrinsic::riscv_sm3p1: {
5334 // The last argument to these intrinsics used to be i8 and changed to i32.
5335 // The type overload for sm4ks and sm4ed was removed.
5336 if (!CI->getType()->isIntegerTy(64))
5337 return;
5338
5339 Value *Arg =
5340 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5341
5342 NewCall = Builder.CreateCall(NewFn, Arg);
5343 Value *Res =
5344 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5345 NewCall->takeName(CI);
5346 CI->replaceAllUsesWith(Res);
5347 CI->eraseFromParent();
5348 return;
5349 }
5350
5351 case Intrinsic::x86_xop_vfrcz_ss:
5352 case Intrinsic::x86_xop_vfrcz_sd:
5353 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5354 break;
5355
5356 case Intrinsic::x86_xop_vpermil2pd:
5357 case Intrinsic::x86_xop_vpermil2ps:
5358 case Intrinsic::x86_xop_vpermil2pd_256:
5359 case Intrinsic::x86_xop_vpermil2ps_256: {
5360 SmallVector<Value *, 4> Args(CI->args());
5361 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5362 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5363 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5364 NewCall = Builder.CreateCall(NewFn, Args);
5365 break;
5366 }
5367
5368 case Intrinsic::x86_sse41_ptestc:
5369 case Intrinsic::x86_sse41_ptestz:
5370 case Intrinsic::x86_sse41_ptestnzc: {
5371 // The arguments for these intrinsics used to be v4f32, and changed
5372 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5373 // So, the only thing required is a bitcast for both arguments.
5374 // First, check the arguments have the old type.
5375 Value *Arg0 = CI->getArgOperand(0);
5376 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5377 return;
5378
5379 // Old intrinsic, add bitcasts
5380 Value *Arg1 = CI->getArgOperand(1);
5381
5382 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5383
5384 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5385 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5386
5387 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5388 break;
5389 }
5390
5391 case Intrinsic::x86_rdtscp: {
5392 // This used to take 1 arguments. If we have no arguments, it is already
5393 // upgraded.
5394 if (CI->getNumOperands() == 0)
5395 return;
5396
5397 NewCall = Builder.CreateCall(NewFn);
5398 // Extract the second result and store it.
5399 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5400 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5401 // Replace the original call result with the first result of the new call.
5402 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5403
5404 NewCall->takeName(CI);
5405 CI->replaceAllUsesWith(TSC);
5406 CI->eraseFromParent();
5407 return;
5408 }
5409
5410 case Intrinsic::x86_sse41_insertps:
5411 case Intrinsic::x86_sse41_dppd:
5412 case Intrinsic::x86_sse41_dpps:
5413 case Intrinsic::x86_sse41_mpsadbw:
5414 case Intrinsic::x86_avx_dp_ps_256:
5415 case Intrinsic::x86_avx2_mpsadbw: {
5416 // Need to truncate the last argument from i32 to i8 -- this argument models
5417 // an inherently 8-bit immediate operand to these x86 instructions.
5418 SmallVector<Value *, 4> Args(CI->args());
5419
5420 // Replace the last argument with a trunc.
5421 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5422 NewCall = Builder.CreateCall(NewFn, Args);
5423 break;
5424 }
5425
5426 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5427 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5428 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5429 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5430 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5431 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5432 SmallVector<Value *, 4> Args(CI->args());
5433 unsigned NumElts =
5434 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5435 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5436
5437 NewCall = Builder.CreateCall(NewFn, Args);
5438 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5439
5440 NewCall->takeName(CI);
5441 CI->replaceAllUsesWith(Res);
5442 CI->eraseFromParent();
5443 return;
5444 }
5445
5446 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5447 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5448 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5449 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5450 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5451 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5452 SmallVector<Value *, 4> Args(CI->args());
5453 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5454 if (NewFn->getIntrinsicID() ==
5455 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5456 Args[1] = Builder.CreateBitCast(
5457 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5458
5459 NewCall = Builder.CreateCall(NewFn, Args);
5460 Value *Res = Builder.CreateBitCast(
5461 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5462
5463 NewCall->takeName(CI);
5464 CI->replaceAllUsesWith(Res);
5465 CI->eraseFromParent();
5466 return;
5467 }
5468 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5469 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5470 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5471 SmallVector<Value *, 4> Args(CI->args());
5472 unsigned NumElts =
5473 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5474 Args[1] = Builder.CreateBitCast(
5475 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5476 Args[2] = Builder.CreateBitCast(
5477 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5478
5479 NewCall = Builder.CreateCall(NewFn, Args);
5480 break;
5481 }
5482
5483 case Intrinsic::thread_pointer: {
5484 NewCall = Builder.CreateCall(NewFn, {});
5485 break;
5486 }
5487
5488 case Intrinsic::memcpy:
5489 case Intrinsic::memmove:
5490 case Intrinsic::memset: {
5491 // We have to make sure that the call signature is what we're expecting.
5492 // We only want to change the old signatures by removing the alignment arg:
5493 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5494 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5495 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5496 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5497 // Note: i8*'s in the above can be any pointer type
5498 if (CI->arg_size() != 5) {
5499 DefaultCase();
5500 return;
5501 }
5502 // Remove alignment argument (3), and add alignment attributes to the
5503 // dest/src pointers.
5504 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5505 CI->getArgOperand(2), CI->getArgOperand(4)};
5506 NewCall = Builder.CreateCall(NewFn, Args);
5507 AttributeList OldAttrs = CI->getAttributes();
5508 AttributeList NewAttrs = AttributeList::get(
5509 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5510 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5511 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5512 NewCall->setAttributes(NewAttrs);
5513 auto *MemCI = cast<MemIntrinsic>(NewCall);
5514 // All mem intrinsics support dest alignment.
5516 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5517 // Memcpy/Memmove also support source alignment.
5518 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5519 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5520 break;
5521 }
5522
5523 case Intrinsic::masked_load:
5524 case Intrinsic::masked_gather:
5525 case Intrinsic::masked_store:
5526 case Intrinsic::masked_scatter: {
5527 if (CI->arg_size() != 4) {
5528 DefaultCase();
5529 return;
5530 }
5531
5532 auto GetMaybeAlign = [](Value *Op) {
5533 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5534 uint64_t Val = CI->getZExtValue();
5535 if (Val == 0)
5536 return MaybeAlign();
5537 if (isPowerOf2_64(Val))
5538 return MaybeAlign(Val);
5539 }
5540 reportFatalUsageError("Invalid alignment argument");
5541 };
5542 auto GetAlign = [&](Value *Op) {
5543 MaybeAlign Align = GetMaybeAlign(Op);
5544 if (Align)
5545 return *Align;
5546 reportFatalUsageError("Invalid zero alignment argument");
5547 };
5548
5549 const DataLayout &DL = CI->getDataLayout();
5550 switch (NewFn->getIntrinsicID()) {
5551 case Intrinsic::masked_load:
5552 NewCall = Builder.CreateMaskedLoad(
5553 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5554 CI->getArgOperand(2), CI->getArgOperand(3));
5555 break;
5556 case Intrinsic::masked_gather:
5557 NewCall = Builder.CreateMaskedGather(
5558 CI->getType(), CI->getArgOperand(0),
5559 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5560 CI->getType()->getScalarType()),
5561 CI->getArgOperand(2), CI->getArgOperand(3));
5562 break;
5563 case Intrinsic::masked_store:
5564 NewCall = Builder.CreateMaskedStore(
5565 CI->getArgOperand(0), CI->getArgOperand(1),
5566 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5567 break;
5568 case Intrinsic::masked_scatter:
5569 NewCall = Builder.CreateMaskedScatter(
5570 CI->getArgOperand(0), CI->getArgOperand(1),
5571 DL.getValueOrABITypeAlignment(
5572 GetMaybeAlign(CI->getArgOperand(2)),
5573 CI->getArgOperand(0)->getType()->getScalarType()),
5574 CI->getArgOperand(3));
5575 break;
5576 default:
5577 llvm_unreachable("Unexpected intrinsic ID");
5578 }
5579 // Previous metadata is still valid.
5580 NewCall->copyMetadata(*CI);
5581 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5582 break;
5583 }
5584
5585 case Intrinsic::lifetime_start:
5586 case Intrinsic::lifetime_end: {
5587 if (CI->arg_size() != 2) {
5588 DefaultCase();
5589 return;
5590 }
5591
5592 Value *Ptr = CI->getArgOperand(1);
5593 // Try to strip pointer casts, such that the lifetime works on an alloca.
5594 Ptr = Ptr->stripPointerCasts();
5595 if (isa<AllocaInst>(Ptr)) {
5596 // Don't use NewFn, as we might have looked through an addrspacecast.
5597 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5598 NewCall = Builder.CreateLifetimeStart(Ptr);
5599 else
5600 NewCall = Builder.CreateLifetimeEnd(Ptr);
5601 break;
5602 }
5603
5604 // Otherwise remove the lifetime marker.
5605 CI->eraseFromParent();
5606 return;
5607 }
5608
5609 case Intrinsic::x86_avx512_vpdpbusd_128:
5610 case Intrinsic::x86_avx512_vpdpbusd_256:
5611 case Intrinsic::x86_avx512_vpdpbusd_512:
5612 case Intrinsic::x86_avx512_vpdpbusds_128:
5613 case Intrinsic::x86_avx512_vpdpbusds_256:
5614 case Intrinsic::x86_avx512_vpdpbusds_512:
5615 case Intrinsic::x86_avx2_vpdpbssd_128:
5616 case Intrinsic::x86_avx2_vpdpbssd_256:
5617 case Intrinsic::x86_avx10_vpdpbssd_512:
5618 case Intrinsic::x86_avx2_vpdpbssds_128:
5619 case Intrinsic::x86_avx2_vpdpbssds_256:
5620 case Intrinsic::x86_avx10_vpdpbssds_512:
5621 case Intrinsic::x86_avx2_vpdpbsud_128:
5622 case Intrinsic::x86_avx2_vpdpbsud_256:
5623 case Intrinsic::x86_avx10_vpdpbsud_512:
5624 case Intrinsic::x86_avx2_vpdpbsuds_128:
5625 case Intrinsic::x86_avx2_vpdpbsuds_256:
5626 case Intrinsic::x86_avx10_vpdpbsuds_512:
5627 case Intrinsic::x86_avx2_vpdpbuud_128:
5628 case Intrinsic::x86_avx2_vpdpbuud_256:
5629 case Intrinsic::x86_avx10_vpdpbuud_512:
5630 case Intrinsic::x86_avx2_vpdpbuuds_128:
5631 case Intrinsic::x86_avx2_vpdpbuuds_256:
5632 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5633 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5634 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5635 CI->getArgOperand(2)};
5636 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5637 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5638 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5639
5640 NewCall = Builder.CreateCall(NewFn, Args);
5641 break;
5642 }
5643 case Intrinsic::x86_avx512_vpdpwssd_128:
5644 case Intrinsic::x86_avx512_vpdpwssd_256:
5645 case Intrinsic::x86_avx512_vpdpwssd_512:
5646 case Intrinsic::x86_avx512_vpdpwssds_128:
5647 case Intrinsic::x86_avx512_vpdpwssds_256:
5648 case Intrinsic::x86_avx512_vpdpwssds_512:
5649 case Intrinsic::x86_avx2_vpdpwsud_128:
5650 case Intrinsic::x86_avx2_vpdpwsud_256:
5651 case Intrinsic::x86_avx10_vpdpwsud_512:
5652 case Intrinsic::x86_avx2_vpdpwsuds_128:
5653 case Intrinsic::x86_avx2_vpdpwsuds_256:
5654 case Intrinsic::x86_avx10_vpdpwsuds_512:
5655 case Intrinsic::x86_avx2_vpdpwusd_128:
5656 case Intrinsic::x86_avx2_vpdpwusd_256:
5657 case Intrinsic::x86_avx10_vpdpwusd_512:
5658 case Intrinsic::x86_avx2_vpdpwusds_128:
5659 case Intrinsic::x86_avx2_vpdpwusds_256:
5660 case Intrinsic::x86_avx10_vpdpwusds_512:
5661 case Intrinsic::x86_avx2_vpdpwuud_128:
5662 case Intrinsic::x86_avx2_vpdpwuud_256:
5663 case Intrinsic::x86_avx10_vpdpwuud_512:
5664 case Intrinsic::x86_avx2_vpdpwuuds_128:
5665 case Intrinsic::x86_avx2_vpdpwuuds_256:
5666 case Intrinsic::x86_avx10_vpdpwuuds_512:
5667 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5668 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5669 CI->getArgOperand(2)};
5670 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5671 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5672 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5673
5674 NewCall = Builder.CreateCall(NewFn, Args);
5675 break;
5676 }
5677 assert(NewCall && "Should have either set this variable or returned through "
5678 "the default case");
5679 NewCall->takeName(CI);
5680 CI->replaceAllUsesWith(NewCall);
5681 CI->eraseFromParent();
5682}
5683
5685 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5686
5687 // Check if this function should be upgraded and get the replacement function
5688 // if there is one.
5689 Function *NewFn;
5690 if (UpgradeIntrinsicFunction(F, NewFn)) {
5691 // Replace all users of the old function with the new function or new
5692 // instructions. This is not a range loop because the call is deleted.
5693 for (User *U : make_early_inc_range(F->users()))
5694 if (CallBase *CB = dyn_cast<CallBase>(U))
5695 UpgradeIntrinsicCall(CB, NewFn);
5696
5697 // Remove old function, no longer used, from the module.
5698 if (F != NewFn)
5699 F->eraseFromParent();
5700 }
5701}
5702
5704 const unsigned NumOperands = MD.getNumOperands();
5705 if (NumOperands == 0)
5706 return &MD; // Invalid, punt to a verifier error.
5707
5708 // Check if the tag uses struct-path aware TBAA format.
5709 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5710 return &MD;
5711
5712 auto &Context = MD.getContext();
5713 if (NumOperands == 3) {
5714 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5715 MDNode *ScalarType = MDNode::get(Context, Elts);
5716 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5717 Metadata *Elts2[] = {ScalarType, ScalarType,
5720 MD.getOperand(2)};
5721 return MDNode::get(Context, Elts2);
5722 }
5723 // Create a MDNode <MD, MD, offset 0>
5725 Type::getInt64Ty(Context)))};
5726 return MDNode::get(Context, Elts);
5727}
5728
5730 Instruction *&Temp) {
5731 if (Opc != Instruction::BitCast)
5732 return nullptr;
5733
5734 Temp = nullptr;
5735 Type *SrcTy = V->getType();
5736 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5737 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5738 LLVMContext &Context = V->getContext();
5739
5740 // We have no information about target data layout, so we assume that
5741 // the maximum pointer size is 64bit.
5742 Type *MidTy = Type::getInt64Ty(Context);
5743 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5744
5745 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5746 }
5747
5748 return nullptr;
5749}
5750
5752 if (Opc != Instruction::BitCast)
5753 return nullptr;
5754
5755 Type *SrcTy = C->getType();
5756 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5757 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5758 LLVMContext &Context = C->getContext();
5759
5760 // We have no information about target data layout, so we assume that
5761 // the maximum pointer size is 64bit.
5762 Type *MidTy = Type::getInt64Ty(Context);
5763
5765 DestTy);
5766 }
5767
5768 return nullptr;
5769}
5770
5771/// Check the debug info version number, if it is out-dated, drop the debug
5772/// info. Return true if module is modified.
5775 return false;
5776
5777 llvm::TimeTraceScope timeScope("Upgrade debug info");
5778 // We need to get metadata before the module is verified (i.e., getModuleFlag
5779 // makes assumptions that we haven't verified yet). Carefully extract the flag
5780 // from the metadata.
5781 unsigned Version = 0;
5782 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5783 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5784 if (Flag->getNumOperands() < 3)
5785 return false;
5786 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5787 return K->getString() == "Debug Info Version";
5788 return false;
5789 });
5790 if (OpIt != ModFlags->op_end()) {
5791 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5792 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5793 Version = CI->getZExtValue();
5794 }
5795 }
5796
5798 bool BrokenDebugInfo = false;
5799 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5800 report_fatal_error("Broken module found, compilation aborted!");
5801 if (!BrokenDebugInfo)
5802 // Everything is ok.
5803 return false;
5804 else {
5805 // Diagnose malformed debug info.
5807 M.getContext().diagnose(Diag);
5808 }
5809 }
5810 bool Modified = StripDebugInfo(M);
5812 // Diagnose a version mismatch.
5814 M.getContext().diagnose(DiagVersion);
5815 }
5816 return Modified;
5817}
5818
5819static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5820 GlobalValue *GV, const Metadata *V) {
5821 Function *F = cast<Function>(GV);
5822
5823 constexpr StringLiteral DefaultValue = "1";
5824 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5825 unsigned Length = 0;
5826
5827 if (F->hasFnAttribute(Attr)) {
5828 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5829 // parse these elements placing them into Vect3
5830 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5831 for (; Length < 3 && !S.empty(); Length++) {
5832 auto [Part, Rest] = S.split(',');
5833 Vect3[Length] = Part.trim();
5834 S = Rest;
5835 }
5836 }
5837
5838 const unsigned Dim = DimC - 'x';
5839 assert(Dim < 3 && "Unexpected dim char");
5840
5841 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5842
5843 // local variable required for StringRef in Vect3 to point to.
5844 const std::string VStr = llvm::utostr(VInt);
5845 Vect3[Dim] = VStr;
5846 Length = std::max(Length, Dim + 1);
5847
5848 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5849 F->addFnAttr(Attr, NewAttr);
5850}
5851
5852static inline bool isXYZ(StringRef S) {
5853 return S == "x" || S == "y" || S == "z";
5854}
5855
5857 const Metadata *V) {
5858 if (K == "kernel") {
5860 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5861 return true;
5862 }
5863 if (K == "align") {
5864 // V is a bitfeild specifying two 16-bit values. The alignment value is
5865 // specfied in low 16-bits, The index is specified in the high bits. For the
5866 // index, 0 indicates the return value while higher values correspond to
5867 // each parameter (idx = param + 1).
5868 const uint64_t AlignIdxValuePair =
5869 mdconst::extract<ConstantInt>(V)->getZExtValue();
5870 const unsigned Idx = (AlignIdxValuePair >> 16);
5871 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5872 cast<Function>(GV)->addAttributeAtIndex(
5873 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5874 return true;
5875 }
5876 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5877 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5878 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5879 return true;
5880 }
5881 if (K == "minctasm") {
5882 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5883 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5884 return true;
5885 }
5886 if (K == "maxnreg") {
5887 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5888 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5889 return true;
5890 }
5891 if (K.consume_front("maxntid") && isXYZ(K)) {
5892 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5893 return true;
5894 }
5895 if (K.consume_front("reqntid") && isXYZ(K)) {
5896 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5897 return true;
5898 }
5899 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5900 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5901 return true;
5902 }
5903 if (K == "grid_constant") {
5904 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5905 for (const auto &Op : cast<MDNode>(V)->operands()) {
5906 // For some reason, the index is 1-based in the metadata. Good thing we're
5907 // able to auto-upgrade it!
5908 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5909 cast<Function>(GV)->addParamAttr(Index, Attr);
5910 }
5911 return true;
5912 }
5913
5914 return false;
5915}
5916
5918 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5919 if (!NamedMD)
5920 return;
5921
5922 SmallVector<MDNode *, 8> NewNodes;
5924 for (MDNode *MD : NamedMD->operands()) {
5925 if (!SeenNodes.insert(MD).second)
5926 continue;
5927
5928 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5929 if (!GV)
5930 continue;
5931
5932 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5933
5934 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5935 // Each nvvm.annotations metadata entry will be of the following form:
5936 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5937 // start index = 1, to skip the global variable key
5938 // increment = 2, to skip the value for each property-value pairs
5939 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5940 MDString *K = cast<MDString>(MD->getOperand(j));
5941 const MDOperand &V = MD->getOperand(j + 1);
5942 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5943 if (!Upgraded)
5944 NewOperands.append({K, V});
5945 }
5946
5947 if (NewOperands.size() > 1)
5948 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5949 }
5950
5951 NamedMD->clearOperands();
5952 for (MDNode *N : NewNodes)
5953 NamedMD->addOperand(N);
5954}
5955
5956/// This checks for objc retain release marker which should be upgraded. It
5957/// returns true if module is modified.
5959 bool Changed = false;
5960 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5961 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5962 if (ModRetainReleaseMarker) {
5963 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5964 if (Op) {
5965 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5966 if (ID) {
5967 SmallVector<StringRef, 4> ValueComp;
5968 ID->getString().split(ValueComp, "#");
5969 if (ValueComp.size() == 2) {
5970 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5971 ID = MDString::get(M.getContext(), NewValue);
5972 }
5973 M.addModuleFlag(Module::Error, MarkerKey, ID);
5974 M.eraseNamedMetadata(ModRetainReleaseMarker);
5975 Changed = true;
5976 }
5977 }
5978 }
5979 return Changed;
5980}
5981
5983 // This lambda converts normal function calls to ARC runtime functions to
5984 // intrinsic calls.
5985 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5986 llvm::Intrinsic::ID IntrinsicFunc) {
5987 Function *Fn = M.getFunction(OldFunc);
5988
5989 if (!Fn)
5990 return;
5991
5992 Function *NewFn =
5993 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5994
5995 for (User *U : make_early_inc_range(Fn->users())) {
5997 if (!CI || CI->getCalledFunction() != Fn)
5998 continue;
5999
6000 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6001 FunctionType *NewFuncTy = NewFn->getFunctionType();
6003
6004 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6005 // value to the return type of the old function.
6006 if (NewFuncTy->getReturnType() != CI->getType() &&
6007 !CastInst::castIsValid(Instruction::BitCast, CI,
6008 NewFuncTy->getReturnType()))
6009 continue;
6010
6011 bool InvalidCast = false;
6012
6013 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6014 Value *Arg = CI->getArgOperand(I);
6015
6016 // Bitcast argument to the parameter type of the new function if it's
6017 // not a variadic argument.
6018 if (I < NewFuncTy->getNumParams()) {
6019 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6020 // to the parameter type of the new function.
6021 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6022 NewFuncTy->getParamType(I))) {
6023 InvalidCast = true;
6024 break;
6025 }
6026 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6027 }
6028 Args.push_back(Arg);
6029 }
6030
6031 if (InvalidCast)
6032 continue;
6033
6034 // Create a call instruction that calls the new function.
6035 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6036 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6037 NewCall->takeName(CI);
6038
6039 // Bitcast the return value back to the type of the old call.
6040 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6041
6042 if (!CI->use_empty())
6043 CI->replaceAllUsesWith(NewRetVal);
6044 CI->eraseFromParent();
6045 }
6046
6047 if (Fn->use_empty())
6048 Fn->eraseFromParent();
6049 };
6050
6051 // Unconditionally convert a call to "clang.arc.use" to a call to
6052 // "llvm.objc.clang.arc.use".
6053 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6054
6055 // Upgrade the retain release marker. If there is no need to upgrade
6056 // the marker, that means either the module is already new enough to contain
6057 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6059 return;
6060
6061 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6062 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6063 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6064 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6065 {"objc_autoreleaseReturnValue",
6066 llvm::Intrinsic::objc_autoreleaseReturnValue},
6067 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6068 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6069 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6070 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6071 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6072 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6073 {"objc_release", llvm::Intrinsic::objc_release},
6074 {"objc_retain", llvm::Intrinsic::objc_retain},
6075 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6076 {"objc_retainAutoreleaseReturnValue",
6077 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6078 {"objc_retainAutoreleasedReturnValue",
6079 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6080 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6081 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6082 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6083 {"objc_unsafeClaimAutoreleasedReturnValue",
6084 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6085 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6086 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6087 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6088 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6089 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6090 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6091 {"objc_arc_annotation_topdown_bbstart",
6092 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6093 {"objc_arc_annotation_topdown_bbend",
6094 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6095 {"objc_arc_annotation_bottomup_bbstart",
6096 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6097 {"objc_arc_annotation_bottomup_bbend",
6098 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6099
6100 for (auto &I : RuntimeFuncs)
6101 UpgradeToIntrinsic(I.first, I.second);
6102}
6103
6105 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6106 if (!ModFlags)
6107 return false;
6108
6109 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6110 bool HasSwiftVersionFlag = false;
6111 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6112 uint32_t SwiftABIVersion;
6113 auto Int8Ty = Type::getInt8Ty(M.getContext());
6114 auto Int32Ty = Type::getInt32Ty(M.getContext());
6115
6116 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6117 MDNode *Op = ModFlags->getOperand(I);
6118 if (Op->getNumOperands() != 3)
6119 continue;
6120 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6121 if (!ID)
6122 continue;
6123 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6124 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6125 Type::getInt32Ty(M.getContext()), B)),
6126 MDString::get(M.getContext(), ID->getString()),
6127 Op->getOperand(2)};
6128 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6129 Changed = true;
6130 };
6131
6132 if (ID->getString() == "Objective-C Image Info Version")
6133 HasObjCFlag = true;
6134 if (ID->getString() == "Objective-C Class Properties")
6135 HasClassProperties = true;
6136 // Upgrade PIC from Error/Max to Min.
6137 if (ID->getString() == "PIC Level") {
6138 if (auto *Behavior =
6140 uint64_t V = Behavior->getLimitedValue();
6141 if (V == Module::Error || V == Module::Max)
6142 SetBehavior(Module::Min);
6143 }
6144 }
6145 // Upgrade "PIE Level" from Error to Max.
6146 if (ID->getString() == "PIE Level")
6147 if (auto *Behavior =
6149 if (Behavior->getLimitedValue() == Module::Error)
6150 SetBehavior(Module::Max);
6151
6152 // Upgrade branch protection and return address signing module flags. The
6153 // module flag behavior for these fields were Error and now they are Min.
6154 if (ID->getString() == "branch-target-enforcement" ||
6155 ID->getString().starts_with("sign-return-address")) {
6156 if (auto *Behavior =
6158 if (Behavior->getLimitedValue() == Module::Error) {
6159 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6160 Metadata *Ops[3] = {
6161 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6162 Op->getOperand(1), Op->getOperand(2)};
6163 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6164 Changed = true;
6165 }
6166 }
6167 }
6168
6169 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6170 // section name so that llvm-lto will not complain about mismatching
6171 // module flags that is functionally the same.
6172 if (ID->getString() == "Objective-C Image Info Section") {
6173 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6174 SmallVector<StringRef, 4> ValueComp;
6175 Value->getString().split(ValueComp, " ");
6176 if (ValueComp.size() != 1) {
6177 std::string NewValue;
6178 for (auto &S : ValueComp)
6179 NewValue += S.str();
6180 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6181 MDString::get(M.getContext(), NewValue)};
6182 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6183 Changed = true;
6184 }
6185 }
6186 }
6187
6188 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6189 // If the higher bits are set, it adds new module flag for swift info.
6190 if (ID->getString() == "Objective-C Garbage Collection") {
6191 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6192 if (Md) {
6193 assert(Md->getValue() && "Expected non-empty metadata");
6194 auto Type = Md->getValue()->getType();
6195 if (Type == Int8Ty)
6196 continue;
6197 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6198 if ((Val & 0xff) != Val) {
6199 HasSwiftVersionFlag = true;
6200 SwiftABIVersion = (Val & 0xff00) >> 8;
6201 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6202 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6203 }
6204 Metadata *Ops[3] = {
6206 Op->getOperand(1),
6207 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6208 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6209 Changed = true;
6210 }
6211 }
6212
6213 if (ID->getString() == "amdgpu_code_object_version") {
6214 Metadata *Ops[3] = {
6215 Op->getOperand(0),
6216 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6217 Op->getOperand(2)};
6218 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6219 Changed = true;
6220 }
6221 }
6222
6223 // "Objective-C Class Properties" is recently added for Objective-C. We
6224 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6225 // flag of value 0, so we can correclty downgrade this flag when trying to
6226 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6227 // this module flag.
6228 if (HasObjCFlag && !HasClassProperties) {
6229 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6230 (uint32_t)0);
6231 Changed = true;
6232 }
6233
6234 if (HasSwiftVersionFlag) {
6235 M.addModuleFlag(Module::Error, "Swift ABI Version",
6236 SwiftABIVersion);
6237 M.addModuleFlag(Module::Error, "Swift Major Version",
6238 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6239 M.addModuleFlag(Module::Error, "Swift Minor Version",
6240 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6241 Changed = true;
6242 }
6243
6244 return Changed;
6245}
6246
6248 auto TrimSpaces = [](StringRef Section) -> std::string {
6249 SmallVector<StringRef, 5> Components;
6250 Section.split(Components, ',');
6251
6252 SmallString<32> Buffer;
6253 raw_svector_ostream OS(Buffer);
6254
6255 for (auto Component : Components)
6256 OS << ',' << Component.trim();
6257
6258 return std::string(OS.str().substr(1));
6259 };
6260
6261 for (auto &GV : M.globals()) {
6262 if (!GV.hasSection())
6263 continue;
6264
6265 StringRef Section = GV.getSection();
6266
6267 if (!Section.starts_with("__DATA, __objc_catlist"))
6268 continue;
6269
6270 // __DATA, __objc_catlist, regular, no_dead_strip
6271 // __DATA,__objc_catlist,regular,no_dead_strip
6272 GV.setSection(TrimSpaces(Section));
6273 }
6274}
6275
6276namespace {
6277// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6278// callsites within a function that did not also have the strictfp attribute.
6279// Since 10.0, if strict FP semantics are needed within a function, the
6280// function must have the strictfp attribute and all calls within the function
6281// must also have the strictfp attribute. This latter restriction is
6282// necessary to prevent unwanted libcall simplification when a function is
6283// being cloned (such as for inlining).
6284//
6285// The "dangling" strictfp attribute usage was only used to prevent constant
6286// folding and other libcall simplification. The nobuiltin attribute on the
6287// callsite has the same effect.
6288struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6289 StrictFPUpgradeVisitor() = default;
6290
6291 void visitCallBase(CallBase &Call) {
6292 if (!Call.isStrictFP())
6293 return;
6295 return;
6296 // If we get here, the caller doesn't have the strictfp attribute
6297 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6298 Call.removeFnAttr(Attribute::StrictFP);
6299 Call.addFnAttr(Attribute::NoBuiltin);
6300 }
6301};
6302
6303/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6304struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6305 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6306 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6307
6308 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6309 if (!RMW.isFloatingPointOperation())
6310 return;
6311
6312 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6313 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6314 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6315 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6316 }
6317};
6318} // namespace
6319
6321 // If a function definition doesn't have the strictfp attribute,
6322 // convert any callsite strictfp attributes to nobuiltin.
6323 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6324 StrictFPUpgradeVisitor SFPV;
6325 SFPV.visit(F);
6326 }
6327
6328 // Remove all incompatibile attributes from function.
6329 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6330 F.getReturnType(), F.getAttributes().getRetAttrs()));
6331 for (auto &Arg : F.args())
6332 Arg.removeAttrs(
6333 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6334
6335 bool AddingAttrs = false, RemovingAttrs = false;
6336 AttrBuilder AttrsToAdd(F.getContext());
6337 AttributeMask AttrsToRemove;
6338
6339 // Older versions of LLVM treated an "implicit-section-name" attribute
6340 // similarly to directly setting the section on a Function.
6341 if (Attribute A = F.getFnAttribute("implicit-section-name");
6342 A.isValid() && A.isStringAttribute()) {
6343 F.setSection(A.getValueAsString());
6344 AttrsToRemove.addAttribute("implicit-section-name");
6345 RemovingAttrs = true;
6346 }
6347
6348 if (Attribute A = F.getFnAttribute("nooutline");
6349 A.isValid() && A.isStringAttribute()) {
6350 AttrsToRemove.addAttribute("nooutline");
6351 AttrsToAdd.addAttribute(Attribute::NoOutline);
6352 AddingAttrs = RemovingAttrs = true;
6353 }
6354
6355 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6356 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6357 AttrsToRemove.addAttribute("uniform-work-group-size");
6358 RemovingAttrs = true;
6359 if (A.getValueAsString() == "true") {
6360 AttrsToAdd.addAttribute("uniform-work-group-size");
6361 AddingAttrs = true;
6362 }
6363 }
6364
6365 if (!F.empty()) {
6366 // For some reason this is called twice, and the first time is before any
6367 // instructions are loaded into the body.
6368
6369 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6370 A.isValid()) {
6371
6372 if (A.getValueAsBool()) {
6373 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6374 Visitor.visit(F);
6375 }
6376
6377 // We will leave behind dead attribute uses on external declarations, but
6378 // clang never added these to declarations anyway.
6379 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6380 RemovingAttrs = true;
6381 }
6382 }
6383
6384 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6385 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6386
6387 bool HandleDenormalMode = false;
6388
6389 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6390 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6391 if (ParsedMode.isValid()) {
6392 DenormalFPMath = ParsedMode;
6393 AttrsToRemove.addAttribute("denormal-fp-math");
6394 AddingAttrs = RemovingAttrs = true;
6395 HandleDenormalMode = true;
6396 }
6397 }
6398
6399 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6400 Attr.isValid()) {
6401 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6402 if (ParsedMode.isValid()) {
6403 DenormalFPMathF32 = ParsedMode;
6404 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6405 AddingAttrs = RemovingAttrs = true;
6406 HandleDenormalMode = true;
6407 }
6408 }
6409
6410 if (HandleDenormalMode)
6411 AttrsToAdd.addDenormalFPEnvAttr(
6412 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6413
6414 if (RemovingAttrs)
6415 F.removeFnAttrs(AttrsToRemove);
6416
6417 if (AddingAttrs)
6418 F.addFnAttrs(AttrsToAdd);
6419}
6420
6421// Check if the function attribute is not present and set it.
6423 StringRef Value) {
6424 if (!F.hasFnAttribute(FnAttrName))
6425 F.addFnAttr(FnAttrName, Value);
6426}
6427
6428// Check if the function attribute is not present and set it if needed.
6429// If the attribute is "false" then removes it.
6430// If the attribute is "true" resets it to a valueless attribute.
6431static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6432 if (!F.hasFnAttribute(FnAttrName)) {
6433 if (Set)
6434 F.addFnAttr(FnAttrName);
6435 } else {
6436 auto A = F.getFnAttribute(FnAttrName);
6437 if ("false" == A.getValueAsString())
6438 F.removeFnAttr(FnAttrName);
6439 else if ("true" == A.getValueAsString()) {
6440 F.removeFnAttr(FnAttrName);
6441 F.addFnAttr(FnAttrName);
6442 }
6443 }
6444}
6445
6447 Triple T(M.getTargetTriple());
6448 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6449 return;
6450
6451 uint64_t BTEValue = 0;
6452 uint64_t BPPLRValue = 0;
6453 uint64_t GCSValue = 0;
6454 uint64_t SRAValue = 0;
6455 uint64_t SRAALLValue = 0;
6456 uint64_t SRABKeyValue = 0;
6457
6458 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6459 if (ModFlags) {
6460 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6461 MDNode *Op = ModFlags->getOperand(I);
6462 if (Op->getNumOperands() != 3)
6463 continue;
6464
6465 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6466 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6467 if (!ID || !CI)
6468 continue;
6469
6470 StringRef IDStr = ID->getString();
6471 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6472 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6473 : IDStr == "guarded-control-stack" ? &GCSValue
6474 : IDStr == "sign-return-address" ? &SRAValue
6475 : IDStr == "sign-return-address-all" ? &SRAALLValue
6476 : IDStr == "sign-return-address-with-bkey"
6477 ? &SRABKeyValue
6478 : nullptr;
6479 if (!ValPtr)
6480 continue;
6481
6482 *ValPtr = CI->getZExtValue();
6483 if (*ValPtr == 2)
6484 return;
6485 }
6486 }
6487
6488 bool BTE = BTEValue == 1;
6489 bool BPPLR = BPPLRValue == 1;
6490 bool GCS = GCSValue == 1;
6491 bool SRA = SRAValue == 1;
6492
6493 StringRef SignTypeValue = "non-leaf";
6494 if (SRA && SRAALLValue == 1)
6495 SignTypeValue = "all";
6496
6497 StringRef SignKeyValue = "a_key";
6498 if (SRA && SRABKeyValue == 1)
6499 SignKeyValue = "b_key";
6500
6501 for (Function &F : M.getFunctionList()) {
6502 if (F.isDeclaration())
6503 continue;
6504
6505 if (SRA) {
6506 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6507 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6508 } else {
6509 if (auto A = F.getFnAttribute("sign-return-address");
6510 A.isValid() && "none" == A.getValueAsString()) {
6511 F.removeFnAttr("sign-return-address");
6512 F.removeFnAttr("sign-return-address-key");
6513 }
6514 }
6515 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6516 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6517 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6518 }
6519
6520 if (BTE)
6521 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6522 if (BPPLR)
6523 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6524 if (GCS)
6525 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6526 if (SRA) {
6527 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6528 if (SRAALLValue == 1)
6529 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6530 if (SRABKeyValue == 1)
6531 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6532 }
6533}
6534
6535static bool isOldLoopArgument(Metadata *MD) {
6536 auto *T = dyn_cast_or_null<MDTuple>(MD);
6537 if (!T)
6538 return false;
6539 if (T->getNumOperands() < 1)
6540 return false;
6541 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6542 if (!S)
6543 return false;
6544 return S->getString().starts_with("llvm.vectorizer.");
6545}
6546
6548 StringRef OldPrefix = "llvm.vectorizer.";
6549 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6550
6551 if (OldTag == "llvm.vectorizer.unroll")
6552 return MDString::get(C, "llvm.loop.interleave.count");
6553
6554 return MDString::get(
6555 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6556 .str());
6557}
6558
6560 auto *T = dyn_cast_or_null<MDTuple>(MD);
6561 if (!T)
6562 return MD;
6563 if (T->getNumOperands() < 1)
6564 return MD;
6565 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6566 if (!OldTag)
6567 return MD;
6568 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6569 return MD;
6570
6571 // This has an old tag. Upgrade it.
6573 Ops.reserve(T->getNumOperands());
6574 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6575 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6576 Ops.push_back(T->getOperand(I));
6577
6578 return MDTuple::get(T->getContext(), Ops);
6579}
6580
6582 auto *T = dyn_cast<MDTuple>(&N);
6583 if (!T)
6584 return &N;
6585
6586 if (none_of(T->operands(), isOldLoopArgument))
6587 return &N;
6588
6590 Ops.reserve(T->getNumOperands());
6591 for (Metadata *MD : T->operands())
6592 Ops.push_back(upgradeLoopArgument(MD));
6593
6594 return MDTuple::get(T->getContext(), Ops);
6595}
6596
6598 Triple T(TT);
6599 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6600 // the address space of globals to 1. This does not apply to SPIRV Logical.
6601 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6602 !DL.contains("-G") && !DL.starts_with("G")) {
6603 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6604 }
6605
6606 if (T.isLoongArch64() || T.isRISCV64()) {
6607 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6608 auto I = DL.find("-n64-");
6609 if (I != StringRef::npos)
6610 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6611 return DL.str();
6612 }
6613
6614 // AMDGPU data layout upgrades.
6615 std::string Res = DL.str();
6616 if (T.isAMDGPU()) {
6617 // Define address spaces for constants.
6618 if (!DL.contains("-G") && !DL.starts_with("G"))
6619 Res.append(Res.empty() ? "G1" : "-G1");
6620
6621 // AMDGCN data layout upgrades.
6622 if (T.isAMDGCN()) {
6623
6624 // Add missing non-integral declarations.
6625 // This goes before adding new address spaces to prevent incoherent string
6626 // values.
6627 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6628 Res.append("-ni:7:8:9");
6629 // Update ni:7 to ni:7:8:9.
6630 if (DL.ends_with("ni:7"))
6631 Res.append(":8:9");
6632 if (DL.ends_with("ni:7:8"))
6633 Res.append(":9");
6634
6635 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6636 // resources) An empty data layout has already been upgraded to G1 by now.
6637 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6638 Res.append("-p7:160:256:256:32");
6639 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6640 Res.append("-p8:128:128:128:48");
6641 constexpr StringRef OldP8("-p8:128:128-");
6642 if (DL.contains(OldP8))
6643 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6644 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6645 Res.append("-p9:192:256:256:32");
6646 }
6647
6648 // Upgrade the ELF mangling mode.
6649 if (!DL.contains("m:e"))
6650 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6651
6652 return Res;
6653 }
6654
6655 if (T.isSystemZ() && !DL.empty()) {
6656 // Make sure the stack alignment is present.
6657 if (!DL.contains("-S64"))
6658 return "E-S64" + DL.drop_front(1).str();
6659 return DL.str();
6660 }
6661
6662 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6663 // If the datalayout matches the expected format, add pointer size address
6664 // spaces to the datalayout.
6665 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6666 if (!DL.contains(AddrSpaces)) {
6668 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6669 if (R.match(Res, &Groups))
6670 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6671 }
6672 };
6673
6674 // AArch64 data layout upgrades.
6675 if (T.isAArch64()) {
6676 // Add "-Fn32"
6677 if (!DL.empty() && !DL.contains("-Fn32"))
6678 Res.append("-Fn32");
6679 AddPtr32Ptr64AddrSpaces();
6680 return Res;
6681 }
6682
6683 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6684 T.isWasm()) {
6685 // Mips64 with o32 ABI did not add "-i128:128".
6686 // Add "-i128:128"
6687 std::string I64 = "-i64:64";
6688 std::string I128 = "-i128:128";
6689 if (!StringRef(Res).contains(I128)) {
6690 size_t Pos = Res.find(I64);
6691 if (Pos != size_t(-1))
6692 Res.insert(Pos + I64.size(), I128);
6693 }
6694 }
6695
6696 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6697 size_t Pos = Res.find("-S128");
6698 if (Pos == StringRef::npos)
6699 Pos = Res.size();
6700 Res.insert(Pos, "-f64:32:64");
6701 }
6702
6703 if (!T.isX86())
6704 return Res;
6705
6706 AddPtr32Ptr64AddrSpaces();
6707
6708 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6709 // for i128 operations prior to this being reflected in the data layout, and
6710 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6711 // boundaries, so although this is a breaking change, the upgrade is expected
6712 // to fix more IR than it breaks.
6713 // Intel MCU is an exception and uses 4-byte-alignment.
6714 if (!T.isOSIAMCU()) {
6715 std::string I128 = "-i128:128";
6716 if (StringRef Ref = Res; !Ref.contains(I128)) {
6718 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6719 if (R.match(Res, &Groups))
6720 Res = (Groups[1] + I128 + Groups[3]).str();
6721 }
6722 }
6723
6724 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6725 // Raising the alignment is safe because Clang did not produce f80 values in
6726 // the MSVC environment before this upgrade was added.
6727 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6728 StringRef Ref = Res;
6729 auto I = Ref.find("-f80:32-");
6730 if (I != StringRef::npos)
6731 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6732 }
6733
6734 return Res;
6735}
6736
6737void llvm::UpgradeAttributes(AttrBuilder &B) {
6738 StringRef FramePointer;
6739 Attribute A = B.getAttribute("no-frame-pointer-elim");
6740 if (A.isValid()) {
6741 // The value can be "true" or "false".
6742 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6743 B.removeAttribute("no-frame-pointer-elim");
6744 }
6745 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6746 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6747 if (FramePointer != "all")
6748 FramePointer = "non-leaf";
6749 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6750 }
6751 if (!FramePointer.empty())
6752 B.addAttribute("frame-pointer", FramePointer);
6753
6754 A = B.getAttribute("null-pointer-is-valid");
6755 if (A.isValid()) {
6756 // The value can be "true" or "false".
6757 bool NullPointerIsValid = A.getValueAsString() == "true";
6758 B.removeAttribute("null-pointer-is-valid");
6759 if (NullPointerIsValid)
6760 B.addAttribute(Attribute::NullPointerIsValid);
6761 }
6762
6763 A = B.getAttribute("uniform-work-group-size");
6764 if (A.isValid()) {
6765 StringRef Val = A.getValueAsString();
6766 if (!Val.empty()) {
6767 bool IsTrue = Val == "true";
6768 B.removeAttribute("uniform-work-group-size");
6769 if (IsTrue)
6770 B.addAttribute("uniform-work-group-size");
6771 }
6772 }
6773}
6774
6775void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6776 // clang.arc.attachedcall bundles are now required to have an operand.
6777 // If they don't, it's okay to drop them entirely: when there is an operand,
6778 // the "attachedcall" is meaningful and required, but without an operand,
6779 // it's just a marker NOP. Dropping it merely prevents an optimization.
6780 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6781 return OBD.getTag() == "clang.arc.attachedcall" &&
6782 OBD.inputs().empty();
6783 });
6784}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:450
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:604
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2787
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106