LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
52#include "llvm/Support/Regex.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(llvm::errs());
72 llvm::errs() << "\n";
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(0);
83 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(F);
88 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(F);
104 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(F);
117 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
132 return false;
133
134 rename(F);
135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(F);
173 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
174 return true;
175}
176
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front("avx."))
184 return (Name.starts_with("blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with("movnt.") || // Added in 3.2
189 Name.starts_with("sqrt.p") || // Added in 7.0
190 Name.starts_with("storeu.") || // Added in 3.9
191 Name.starts_with("vbroadcast.s") || // Added in 3.5
192 Name.starts_with("vbroadcastf128") || // Added in 4.0
193 Name.starts_with("vextractf128.") || // Added in 3.7
194 Name.starts_with("vinsertf128.") || // Added in 3.7
195 Name.starts_with("vperm2f128.") || // Added in 6.0
196 Name.starts_with("vpermil.")); // Added in 3.1
197
198 if (Name.consume_front("avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with("pabs.") || // Added in 6.0
201 Name.starts_with("padds.") || // Added in 8.0
202 Name.starts_with("paddus.") || // Added in 8.0
203 Name.starts_with("pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with("pbroadcast") || // Added in 3.8
206 Name.starts_with("pcmpeq.") || // Added in 3.1
207 Name.starts_with("pcmpgt.") || // Added in 3.1
208 Name.starts_with("pmax") || // Added in 3.9
209 Name.starts_with("pmin") || // Added in 3.9
210 Name.starts_with("pmovsx") || // Added in 3.9
211 Name.starts_with("pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with("psll.dq") || // Added in 3.7
215 Name.starts_with("psrl.dq") || // Added in 3.7
216 Name.starts_with("psubs.") || // Added in 8.0
217 Name.starts_with("psubus.") || // Added in 8.0
218 Name.starts_with("vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front("avx512.")) {
225 if (Name.consume_front("mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with("and.") || // Added in 3.9
229 Name.starts_with("andn.") || // Added in 3.9
230 Name.starts_with("broadcast.s") || // Added in 3.9
231 Name.starts_with("broadcastf32x4.") || // Added in 6.0
232 Name.starts_with("broadcastf32x8.") || // Added in 6.0
233 Name.starts_with("broadcastf64x2.") || // Added in 6.0
234 Name.starts_with("broadcastf64x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x8.") || // Added in 6.0
237 Name.starts_with("broadcasti64x2.") || // Added in 6.0
238 Name.starts_with("broadcasti64x4.") || // Added in 6.0
239 Name.starts_with("cmp.b") || // Added in 5.0
240 Name.starts_with("cmp.d") || // Added in 5.0
241 Name.starts_with("cmp.q") || // Added in 5.0
242 Name.starts_with("cmp.w") || // Added in 5.0
243 Name.starts_with("compress.b") || // Added in 9.0
244 Name.starts_with("compress.d") || // Added in 9.0
245 Name.starts_with("compress.p") || // Added in 9.0
246 Name.starts_with("compress.q") || // Added in 9.0
247 Name.starts_with("compress.store.") || // Added in 7.0
248 Name.starts_with("compress.w") || // Added in 9.0
249 Name.starts_with("conflict.") || // Added in 9.0
250 Name.starts_with("cvtdq2pd.") || // Added in 4.0
251 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with("cvtudq2pd.") || // Added in 4.0
263 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with("dbpsadbw.") || // Added in 7.0
268 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with("expand.b") || // Added in 9.0
270 Name.starts_with("expand.d") || // Added in 9.0
271 Name.starts_with("expand.load.") || // Added in 7.0
272 Name.starts_with("expand.p") || // Added in 9.0
273 Name.starts_with("expand.q") || // Added in 9.0
274 Name.starts_with("expand.w") || // Added in 9.0
275 Name.starts_with("fpclass.p") || // Added in 7.0
276 Name.starts_with("insert") || // Added in 4.0
277 Name.starts_with("load.") || // Added in 3.9
278 Name.starts_with("loadu.") || // Added in 3.9
279 Name.starts_with("lzcnt.") || // Added in 5.0
280 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("movddup") || // Added in 3.9
283 Name.starts_with("move.s") || // Added in 4.0
284 Name.starts_with("movshdup") || // Added in 3.9
285 Name.starts_with("movsldup") || // Added in 3.9
286 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with("or.") || // Added in 3.9
288 Name.starts_with("pabs.") || // Added in 6.0
289 Name.starts_with("packssdw.") || // Added in 5.0
290 Name.starts_with("packsswb.") || // Added in 5.0
291 Name.starts_with("packusdw.") || // Added in 5.0
292 Name.starts_with("packuswb.") || // Added in 5.0
293 Name.starts_with("padd.") || // Added in 4.0
294 Name.starts_with("padds.") || // Added in 8.0
295 Name.starts_with("paddus.") || // Added in 8.0
296 Name.starts_with("palignr.") || // Added in 3.9
297 Name.starts_with("pand.") || // Added in 3.9
298 Name.starts_with("pandn.") || // Added in 3.9
299 Name.starts_with("pavg") || // Added in 6.0
300 Name.starts_with("pbroadcast") || // Added in 6.0
301 Name.starts_with("pcmpeq.") || // Added in 3.9
302 Name.starts_with("pcmpgt.") || // Added in 3.9
303 Name.starts_with("perm.df.") || // Added in 3.9
304 Name.starts_with("perm.di.") || // Added in 3.9
305 Name.starts_with("permvar.") || // Added in 7.0
306 Name.starts_with("pmaddubs.w.") || // Added in 7.0
307 Name.starts_with("pmaddw.d.") || // Added in 7.0
308 Name.starts_with("pmax") || // Added in 4.0
309 Name.starts_with("pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with("pmovsx") || // Added in 4.0
315 Name.starts_with("pmovzx") || // Added in 4.0
316 Name.starts_with("pmul.dq.") || // Added in 4.0
317 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with("pmulh.w.") || // Added in 7.0
319 Name.starts_with("pmulhu.w.") || // Added in 7.0
320 Name.starts_with("pmull.") || // Added in 4.0
321 Name.starts_with("pmultishift.qb.") || // Added in 8.0
322 Name.starts_with("pmulu.dq.") || // Added in 4.0
323 Name.starts_with("por.") || // Added in 3.9
324 Name.starts_with("prol.") || // Added in 8.0
325 Name.starts_with("prolv.") || // Added in 8.0
326 Name.starts_with("pror.") || // Added in 8.0
327 Name.starts_with("prorv.") || // Added in 8.0
328 Name.starts_with("pshuf.b.") || // Added in 4.0
329 Name.starts_with("pshuf.d.") || // Added in 3.9
330 Name.starts_with("pshufh.w.") || // Added in 3.9
331 Name.starts_with("pshufl.w.") || // Added in 3.9
332 Name.starts_with("psll.d") || // Added in 4.0
333 Name.starts_with("psll.q") || // Added in 4.0
334 Name.starts_with("psll.w") || // Added in 4.0
335 Name.starts_with("pslli") || // Added in 4.0
336 Name.starts_with("psllv") || // Added in 4.0
337 Name.starts_with("psra.d") || // Added in 4.0
338 Name.starts_with("psra.q") || // Added in 4.0
339 Name.starts_with("psra.w") || // Added in 4.0
340 Name.starts_with("psrai") || // Added in 4.0
341 Name.starts_with("psrav") || // Added in 4.0
342 Name.starts_with("psrl.d") || // Added in 4.0
343 Name.starts_with("psrl.q") || // Added in 4.0
344 Name.starts_with("psrl.w") || // Added in 4.0
345 Name.starts_with("psrli") || // Added in 4.0
346 Name.starts_with("psrlv") || // Added in 4.0
347 Name.starts_with("psub.") || // Added in 4.0
348 Name.starts_with("psubs.") || // Added in 8.0
349 Name.starts_with("psubus.") || // Added in 8.0
350 Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("punpckh") || // Added in 3.9
352 Name.starts_with("punpckl") || // Added in 3.9
353 Name.starts_with("pxor.") || // Added in 3.9
354 Name.starts_with("shuf.f") || // Added in 6.0
355 Name.starts_with("shuf.i") || // Added in 6.0
356 Name.starts_with("shuf.p") || // Added in 4.0
357 Name.starts_with("sqrt.p") || // Added in 7.0
358 Name.starts_with("store.b.") || // Added in 3.9
359 Name.starts_with("store.d.") || // Added in 3.9
360 Name.starts_with("store.p") || // Added in 3.9
361 Name.starts_with("store.q.") || // Added in 3.9
362 Name.starts_with("store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with("storeu.") || // Added in 3.9
365 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with("ucmp.") || // Added in 5.0
367 Name.starts_with("unpckh.") || // Added in 3.9
368 Name.starts_with("unpckl.") || // Added in 3.9
369 Name.starts_with("valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with("vextract") || // Added in 4.0
373 Name.starts_with("vfmadd.") || // Added in 7.0
374 Name.starts_with("vfmaddsub.") || // Added in 7.0
375 Name.starts_with("vfnmadd.") || // Added in 7.0
376 Name.starts_with("vfnmsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermi2var.") || // Added in 7.0
382 Name.starts_with("vpermil.p") || // Added in 3.9
383 Name.starts_with("vpermilvar.") || // Added in 4.0
384 Name.starts_with("vpermt2var.") || // Added in 7.0
385 Name.starts_with("vpmadd52") || // Added in 7.0
386 Name.starts_with("vpshld.") || // Added in 7.0
387 Name.starts_with("vpshldv.") || // Added in 8.0
388 Name.starts_with("vpshrd.") || // Added in 7.0
389 Name.starts_with("vpshrdv.") || // Added in 8.0
390 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with("xor.")); // Added in 3.9
392
393 if (Name.consume_front("mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with("vfmadd.") || // Added in 7.0
396 Name.starts_with("vfmaddsub.") || // Added in 7.0
397 Name.starts_with("vfmsub.") || // Added in 7.0
398 Name.starts_with("vfmsubadd.") || // Added in 7.0
399 Name.starts_with("vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front("maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with("pternlog.") || // Added in 7.0
404 Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmaddsub.") || // Added in 7.0
406 Name.starts_with("vpdpbusd.") || // Added in 7.0
407 Name.starts_with("vpdpbusds.") || // Added in 7.0
408 Name.starts_with("vpdpwssd.") || // Added in 7.0
409 Name.starts_with("vpdpwssds.") || // Added in 7.0
410 Name.starts_with("vpermt2var.") || // Added in 7.0
411 Name.starts_with("vpmadd52") || // Added in 7.0
412 Name.starts_with("vpshldv.") || // Added in 8.0
413 Name.starts_with("vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with("broadcastm") || // Added in 6.0
420 Name.starts_with("cmp.p") || // Added in 12.0
421 Name.starts_with("cvtb2mask.") || // Added in 7.0
422 Name.starts_with("cvtd2mask.") || // Added in 7.0
423 Name.starts_with("cvtmask2") || // Added in 5.0
424 Name.starts_with("cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with("cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with("kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with("padds.") || // Added in 8.0
437 Name.starts_with("pbroadcast") || // Added in 3.9
438 Name.starts_with("prol") || // Added in 8.0
439 Name.starts_with("pror") || // Added in 8.0
440 Name.starts_with("psll.dq") || // Added in 3.9
441 Name.starts_with("psrl.dq") || // Added in 3.9
442 Name.starts_with("psubs.") || // Added in 8.0
443 Name.starts_with("ptestm") || // Added in 6.0
444 Name.starts_with("ptestnm") || // Added in 6.0
445 Name.starts_with("storent.") || // Added in 3.9
446 Name.starts_with("vbroadcast.s") || // Added in 7.0
447 Name.starts_with("vpshld.") || // Added in 8.0
448 Name.starts_with("vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front("fma."))
452 return (Name.starts_with("vfmadd.") || // Added in 7.0
453 Name.starts_with("vfmsub.") || // Added in 7.0
454 Name.starts_with("vfmsubadd.") || // Added in 7.0
455 Name.starts_with("vfnmadd.") || // Added in 7.0
456 Name.starts_with("vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front("fma4."))
459 return Name.starts_with("vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front("sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with("storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front("sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with("padds.") || // Added in 8.0
483 Name.starts_with("paddus.") || // Added in 8.0
484 Name.starts_with("pcmpeq.") || // Added in 3.1
485 Name.starts_with("pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with("pshuf") || // Added in 3.9
492 Name.starts_with("psll.dq") || // Added in 3.7
493 Name.starts_with("psrl.dq") || // Added in 3.7
494 Name.starts_with("psubs.") || // Added in 8.0
495 Name.starts_with("psubus.") || // Added in 8.0
496 Name.starts_with("sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with("storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front("sse41."))
503 return (Name.starts_with("blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with("pmovsx") || // Added in 3.8
515 Name.starts_with("pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front("sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front("sse4a."))
522 return Name.starts_with("movnt."); // Added in 3.9
523
524 if (Name.consume_front("ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front("xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with("vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with("vcvtph2ps.")); // Added in 11.0
542}
543
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front("x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(F);
561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
562 Intrinsic::x86_rdtscp);
563 return true;
564 }
565
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
571 .Case("c", Intrinsic::x86_sse41_ptestc)
572 .Case("z", Intrinsic::x86_sse41_ptestz)
573 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
576 return upgradePTESTIntrinsic(F, ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
586 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
594 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
595
596 if (Name.consume_front("avx512.")) {
597 if (Name.consume_front("mask.cmp.")) {
598 // Added in 7.0
600 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 return upgradeX86MaskedFPCompare(F, ID, NewFn);
609 } else if (Name.starts_with("vpdpbusd.") ||
610 Name.starts_with("vpdpbusds.")) {
611 // Added in 21.1
613 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
622 } else if (Name.starts_with("vpdpwssd.") ||
623 Name.starts_with("vpdpwssds.")) {
624 // Added in 21.1
626 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
634 return upgradeX86MultiplyAddWords(F, ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front("avx2.")) {
640 if (Name.consume_front("vpdpb")) {
641 // Added in 21.1
643 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
658 } else if (Name.consume_front("vpdpw")) {
659 // Added in 21.1
661 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
675 return upgradeX86MultiplyAddWords(F, ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front("avx10.")) {
681 if (Name.consume_front("vpdpb")) {
682 // Added in 21.1
684 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
693 } else if (Name.consume_front("vpdpw")) {
695 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
703 return upgradeX86MultiplyAddWords(F, ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front("avx512bf16.")) {
709 // Added in 9.0
711 .Case("cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case("cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case("cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case("mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case("cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case("cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
725 return upgradeX86BF16Intrinsic(F, ID, NewFn);
726
727 // Added in 9.0
729 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
734 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front("xop.")) {
740 if (Name.starts_with("vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
759 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
762
764 rename(F);
765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
773 Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with("rbit")) {
786 // '(arm|aarch64).rbit'.
788 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
795 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front("neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front("bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
808 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
809 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 {F->getReturnType(),
819 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front("bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
834 .Case("mla",
835 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case("lalb",
838 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case("lalt",
841 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
861 .StartsWith("vclz.", Intrinsic::ctlz)
862 .StartsWith("vcnt.", Intrinsic::ctpop)
863 .StartsWith("vqadds.", Intrinsic::sadd_sat)
864 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
865 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
866 .StartsWith("vqsubu.", Intrinsic::usub_sat)
867 .StartsWith("vrinta.", Intrinsic::round)
868 .StartsWith("vrintn.", Intrinsic::roundeven)
869 .StartsWith("vrintm.", Intrinsic::floor)
870 .StartsWith("vrintp.", Intrinsic::ceil)
871 .StartsWith("vrintx.", Intrinsic::rint)
872 .StartsWith("vrintz.", Intrinsic::trunc)
875 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
876 F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front("vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
884 if (vstRegex.match(Name, &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
898 else
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front("mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with("vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front("vldr.gather.");
937 if (IsGather || Name.consume_front("vstr.scatter.")) {
938 if (Name.consume_front("base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front("wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front("offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front("cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
975 .StartsWith("frintn", Intrinsic::roundeven)
976 .StartsWith("rbit", Intrinsic::bitreverse)
979 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
980 F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with("addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with("bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front("sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front("bf")) {
1007 if (Name.consume_back(".lane")) {
1008 // 'aarch64.sve.bf*.lane'.
1011 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1012 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1013 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1016 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1017 return true;
1018 }
1019 return false; // No other 'aarch64.sve.bf*.lane'.
1020 }
1021 return false; // No other 'aarch64.sve.bf*'.
1022 }
1023
1024 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1025 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1026 NewFn = nullptr;
1027 return true;
1028 }
1029
1030 if (Name.consume_front("addqv")) {
1031 // 'aarch64.sve.addqv'.
1032 if (!F->getReturnType()->isFPOrFPVectorTy())
1033 return false;
1034
1035 auto Args = F->getFunctionType()->params();
1036 Type *Tys[] = {F->getReturnType(), Args[1]};
1038 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1039 return true;
1040 }
1041
1042 if (Name.consume_front("ld")) {
1043 // 'aarch64.sve.ld*'.
1044 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1045 if (LdRegex.match(Name)) {
1046 Type *ScalarTy =
1047 cast<VectorType>(F->getReturnType())->getElementType();
1048 ElementCount EC =
1049 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1050 Type *Ty = VectorType::get(ScalarTy, EC);
1051 static const Intrinsic::ID LoadIDs[] = {
1052 Intrinsic::aarch64_sve_ld2_sret,
1053 Intrinsic::aarch64_sve_ld3_sret,
1054 Intrinsic::aarch64_sve_ld4_sret,
1055 };
1056 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1057 LoadIDs[Name[0] - '2'], Ty);
1058 return true;
1059 }
1060 return false; // No other 'aarch64.sve.ld*'.
1061 }
1062
1063 if (Name.consume_front("tuple.")) {
1064 // 'aarch64.sve.tuple.*'.
1065 if (Name.starts_with("get")) {
1066 // 'aarch64.sve.tuple.get*'.
1067 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1069 F->getParent(), Intrinsic::vector_extract, Tys);
1070 return true;
1071 }
1072
1073 if (Name.starts_with("set")) {
1074 // 'aarch64.sve.tuple.set*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {Args[0], Args[2], Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081
1082 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1083 if (CreateTupleRegex.match(Name)) {
1084 // 'aarch64.sve.tuple.create*'.
1085 auto Args = F->getFunctionType()->params();
1086 Type *Tys[] = {F->getReturnType(), Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1089 return true;
1090 }
1091 return false; // No other 'aarch64.sve.tuple.*'.
1092 }
1093
1094 if (Name.starts_with("rev.nxv")) {
1095 // 'aarch64.sve.rev.<Ty>'
1097 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1098 return true;
1099 }
1100
1101 return false; // No other 'aarch64.sve.*'.
1102 }
1103 }
1104 return false; // No other 'arm.*', 'aarch64.*'.
1105}
1106
1108 StringRef Name) {
1109 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1112 .Case("im2col.3d",
1113 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1114 .Case("im2col.4d",
1115 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1116 .Case("im2col.5d",
1117 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1118 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1119 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1120 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1121 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1122 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1124
1126 return ID;
1127
1128 // These intrinsics may need upgrade for two reasons:
1129 // (1) When the address-space of the first argument is shared[AS=3]
1130 // (and we upgrade it to use shared_cluster address-space[AS=7])
1131 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1133 return ID;
1134
1135 // (2) When there are only two boolean flag arguments at the end:
1136 //
1137 // The last three parameters of the older version of these
1138 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1139 //
1140 // The newer version reads as:
1141 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1142 //
1143 // So, when the type of the [N-3]rd argument is "not i1", then
1144 // it is the older version and we need to upgrade.
1145 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1146 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1147 if (!ArgType->isIntegerTy(1))
1148 return ID;
1149 }
1150
1152}
1153
1155 StringRef Name) {
1156 if (Name.consume_front("mapa.shared.cluster"))
1157 if (F->getReturnType()->getPointerAddressSpace() ==
1159 return Intrinsic::nvvm_mapa_shared_cluster;
1160
1161 if (Name.consume_front("cp.async.bulk.")) {
1164 .Case("global.to.shared.cluster",
1165 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1166 .Case("shared.cta.to.cluster",
1167 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1169
1171 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1173 return ID;
1174 }
1175
1177}
1178
1180 if (Name.consume_front("fma.rn."))
1181 return StringSwitch<Intrinsic::ID>(Name)
1182 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1183 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1184 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1185 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1187
1188 if (Name.consume_front("fmax."))
1189 return StringSwitch<Intrinsic::ID>(Name)
1190 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1191 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1192 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1193 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1194 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1195 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1196 .Case("ftz.nan.xorsign.abs.bf16",
1197 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1198 .Case("ftz.nan.xorsign.abs.bf16x2",
1199 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1200 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1201 .Case("ftz.xorsign.abs.bf16x2",
1202 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1203 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1204 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1205 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1206 .Case("nan.xorsign.abs.bf16x2",
1207 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1208 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1209 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1211
1212 if (Name.consume_front("fmin."))
1213 return StringSwitch<Intrinsic::ID>(Name)
1214 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1215 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1216 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1217 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1218 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1219 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1220 .Case("ftz.nan.xorsign.abs.bf16",
1221 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1222 .Case("ftz.nan.xorsign.abs.bf16x2",
1223 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1224 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1225 .Case("ftz.xorsign.abs.bf16x2",
1226 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1227 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1228 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1229 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1230 .Case("nan.xorsign.abs.bf16x2",
1231 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1232 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1233 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1235
1236 if (Name.consume_front("neg."))
1237 return StringSwitch<Intrinsic::ID>(Name)
1238 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1239 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1241
1243}
1244
1246 return Name.consume_front("local") || Name.consume_front("shared") ||
1247 Name.consume_front("global") || Name.consume_front("constant") ||
1248 Name.consume_front("param");
1249}
1250
1252 const FunctionType *FuncTy) {
1253 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1254 if (Name.starts_with("to.fp16")) {
1255 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1256 HalfTy) &&
1257 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1258 FuncTy->getReturnType());
1259 }
1260
1261 if (Name.starts_with("from.fp16")) {
1262 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1263 HalfTy) &&
1264 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1265 FuncTy->getReturnType());
1266 }
1267
1268 return false;
1269}
1270
1272 bool CanUpgradeDebugIntrinsicsToRecords) {
1273 assert(F && "Illegal to upgrade a non-existent Function.");
1274
1275 StringRef Name = F->getName();
1276
1277 // Quickly eliminate it, if it's not a candidate.
1278 if (!Name.consume_front("llvm.") || Name.empty())
1279 return false;
1280
1281 switch (Name[0]) {
1282 default: break;
1283 case 'a': {
1284 bool IsArm = Name.consume_front("arm.");
1285 if (IsArm || Name.consume_front("aarch64.")) {
1286 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1287 return true;
1288 break;
1289 }
1290
1291 if (Name.consume_front("amdgcn.")) {
1292 if (Name == "alignbit") {
1293 // Target specific intrinsic became redundant
1295 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1296 return true;
1297 }
1298
1299 if (Name.consume_front("atomic.")) {
1300 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1301 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1302 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1303 // and usub_sat so there's no new declaration.
1304 NewFn = nullptr;
1305 return true;
1306 }
1307 break; // No other 'amdgcn.atomic.*'
1308 }
1309
1310 // Legacy wmma iu intrinsics without the optional clamp operand.
1311 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1312 F->arg_size() == 7) {
1313 NewFn = nullptr;
1314 return true;
1315 }
1316 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1317 F->arg_size() == 8) {
1318 NewFn = nullptr;
1319 return true;
1320 }
1321
1322 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1323 Name.consume_front("flat.atomic.")) {
1324 if (Name.starts_with("fadd") ||
1325 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1326 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1327 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1328 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1329 // declaration.
1330 NewFn = nullptr;
1331 return true;
1332 }
1333 }
1334
1335 if (Name.starts_with("ldexp.")) {
1336 // Target specific intrinsic became redundant
1338 F->getParent(), Intrinsic::ldexp,
1339 {F->getReturnType(), F->getArg(1)->getType()});
1340 return true;
1341 }
1342 break; // No other 'amdgcn.*'
1343 }
1344
1345 break;
1346 }
1347 case 'c': {
1348 if (F->arg_size() == 1) {
1349 if (Name.consume_front("convert.")) {
1350 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1351 NewFn = nullptr;
1352 return true;
1353 }
1354 }
1355
1357 .StartsWith("ctlz.", Intrinsic::ctlz)
1358 .StartsWith("cttz.", Intrinsic::cttz)
1361 rename(F);
1362 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1363 F->arg_begin()->getType());
1364 return true;
1365 }
1366 }
1367
1368 if (F->arg_size() == 2 && Name == "coro.end") {
1369 rename(F);
1370 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1371 Intrinsic::coro_end);
1372 return true;
1373 }
1374
1375 break;
1376 }
1377 case 'd':
1378 if (Name.consume_front("dbg.")) {
1379 // Mark debug intrinsics for upgrade to new debug format.
1380 if (CanUpgradeDebugIntrinsicsToRecords) {
1381 if (Name == "addr" || Name == "value" || Name == "assign" ||
1382 Name == "declare" || Name == "label") {
1383 // There's no function to replace these with.
1384 NewFn = nullptr;
1385 // But we do want these to get upgraded.
1386 return true;
1387 }
1388 }
1389 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1390 // converted to DbgVariableRecords later.
1391 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1392 rename(F);
1393 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1394 Intrinsic::dbg_value);
1395 return true;
1396 }
1397 break; // No other 'dbg.*'.
1398 }
1399 break;
1400 case 'e':
1401 if (Name.consume_front("experimental.vector.")) {
1404 // Skip over extract.last.active, otherwise it will be 'upgraded'
1405 // to a regular vector extract which is a different operation.
1406 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1407 .StartsWith("extract.", Intrinsic::vector_extract)
1408 .StartsWith("insert.", Intrinsic::vector_insert)
1409 .StartsWith("reverse.", Intrinsic::vector_reverse)
1410 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1411 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1412 .StartsWith("partial.reduce.add",
1413 Intrinsic::vector_partial_reduce_add)
1416 const auto *FT = F->getFunctionType();
1418 if (ID == Intrinsic::vector_extract ||
1419 ID == Intrinsic::vector_interleave2)
1420 // Extracting overloads the return type.
1421 Tys.push_back(FT->getReturnType());
1422 if (ID != Intrinsic::vector_interleave2)
1423 Tys.push_back(FT->getParamType(0));
1424 if (ID == Intrinsic::vector_insert ||
1425 ID == Intrinsic::vector_partial_reduce_add)
1426 // Inserting overloads the inserted type.
1427 Tys.push_back(FT->getParamType(1));
1428 rename(F);
1429 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1430 return true;
1431 }
1432
1433 if (Name.consume_front("reduce.")) {
1435 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1436 if (R.match(Name, &Groups))
1438 .Case("add", Intrinsic::vector_reduce_add)
1439 .Case("mul", Intrinsic::vector_reduce_mul)
1440 .Case("and", Intrinsic::vector_reduce_and)
1441 .Case("or", Intrinsic::vector_reduce_or)
1442 .Case("xor", Intrinsic::vector_reduce_xor)
1443 .Case("smax", Intrinsic::vector_reduce_smax)
1444 .Case("smin", Intrinsic::vector_reduce_smin)
1445 .Case("umax", Intrinsic::vector_reduce_umax)
1446 .Case("umin", Intrinsic::vector_reduce_umin)
1447 .Case("fmax", Intrinsic::vector_reduce_fmax)
1448 .Case("fmin", Intrinsic::vector_reduce_fmin)
1450
1451 bool V2 = false;
1453 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1454 Groups.clear();
1455 V2 = true;
1456 if (R2.match(Name, &Groups))
1458 .Case("fadd", Intrinsic::vector_reduce_fadd)
1459 .Case("fmul", Intrinsic::vector_reduce_fmul)
1461 }
1463 rename(F);
1464 auto Args = F->getFunctionType()->params();
1465 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1466 {Args[V2 ? 1 : 0]});
1467 return true;
1468 }
1469 break; // No other 'expermental.vector.reduce.*'.
1470 }
1471
1472 if (Name.consume_front("splice"))
1473 return true;
1474 break; // No other 'experimental.vector.*'.
1475 }
1476 if (Name.consume_front("experimental.stepvector.")) {
1477 Intrinsic::ID ID = Intrinsic::stepvector;
1478 rename(F);
1480 F->getParent(), ID, F->getFunctionType()->getReturnType());
1481 return true;
1482 }
1483 break; // No other 'e*'.
1484 case 'f':
1485 if (Name.starts_with("flt.rounds")) {
1486 rename(F);
1487 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1488 Intrinsic::get_rounding);
1489 return true;
1490 }
1491 break;
1492 case 'i':
1493 if (Name.starts_with("invariant.group.barrier")) {
1494 // Rename invariant.group.barrier to launder.invariant.group
1495 auto Args = F->getFunctionType()->params();
1496 Type* ObjectPtr[1] = {Args[0]};
1497 rename(F);
1499 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1500 return true;
1501 }
1502 break;
1503 case 'l':
1504 if ((Name.starts_with("lifetime.start") ||
1505 Name.starts_with("lifetime.end")) &&
1506 F->arg_size() == 2) {
1507 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1508 ? Intrinsic::lifetime_start
1509 : Intrinsic::lifetime_end;
1510 rename(F);
1511 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1512 F->getArg(0)->getType());
1513 return true;
1514 }
1515 break;
1516 case 'm': {
1517 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1518 // alignment parameter to embedding the alignment as an attribute of
1519 // the pointer args.
1520 if (unsigned ID = StringSwitch<unsigned>(Name)
1521 .StartsWith("memcpy.", Intrinsic::memcpy)
1522 .StartsWith("memmove.", Intrinsic::memmove)
1523 .Default(0)) {
1524 if (F->arg_size() == 5) {
1525 rename(F);
1526 // Get the types of dest, src, and len
1527 ArrayRef<Type *> ParamTypes =
1528 F->getFunctionType()->params().slice(0, 3);
1529 NewFn =
1530 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1531 return true;
1532 }
1533 }
1534 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1535 rename(F);
1536 // Get the types of dest, and len
1537 const auto *FT = F->getFunctionType();
1538 Type *ParamTypes[2] = {
1539 FT->getParamType(0), // Dest
1540 FT->getParamType(2) // len
1541 };
1542 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1543 Intrinsic::memset, ParamTypes);
1544 return true;
1545 }
1546
1547 unsigned MaskedID =
1549 .StartsWith("masked.load", Intrinsic::masked_load)
1550 .StartsWith("masked.gather", Intrinsic::masked_gather)
1551 .StartsWith("masked.store", Intrinsic::masked_store)
1552 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1553 .Default(0);
1554 if (MaskedID && F->arg_size() == 4) {
1555 rename(F);
1556 if (MaskedID == Intrinsic::masked_load ||
1557 MaskedID == Intrinsic::masked_gather) {
1559 F->getParent(), MaskedID,
1560 {F->getReturnType(), F->getArg(0)->getType()});
1561 return true;
1562 }
1564 F->getParent(), MaskedID,
1565 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1566 return true;
1567 }
1568 break;
1569 }
1570 case 'n': {
1571 if (Name.consume_front("nvvm.")) {
1572 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1573 if (F->arg_size() == 1) {
1574 Intrinsic::ID IID =
1576 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1577 .Case("clz.i", Intrinsic::ctlz)
1578 .Case("popc.i", Intrinsic::ctpop)
1580 if (IID != Intrinsic::not_intrinsic) {
1581 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1582 {F->getReturnType()});
1583 return true;
1584 }
1585 } else if (F->arg_size() == 2) {
1586 Intrinsic::ID IID =
1588 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1589 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1590 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1591 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1593 if (IID != Intrinsic::not_intrinsic) {
1594 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1595 {F->getReturnType()});
1596 return true;
1597 }
1598 }
1599
1600 // Check for nvvm intrinsics that need a return type adjustment.
1601 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1603 if (IID != Intrinsic::not_intrinsic) {
1604 NewFn = nullptr;
1605 return true;
1606 }
1607 }
1608
1609 // Upgrade Distributed Shared Memory Intrinsics
1611 if (IID != Intrinsic::not_intrinsic) {
1612 rename(F);
1613 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1614 return true;
1615 }
1616
1617 // Upgrade TMA copy G2S Intrinsics
1619 if (IID != Intrinsic::not_intrinsic) {
1620 rename(F);
1621 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1622 return true;
1623 }
1624
1625 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1626 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1627 //
1628 // TODO: We could add lohi.i2d.
1629 bool Expand = false;
1630 if (Name.consume_front("abs."))
1631 // nvvm.abs.{i,ii}
1632 Expand =
1633 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1634 else if (Name.consume_front("fabs."))
1635 // nvvm.fabs.{f,ftz.f,d}
1636 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1637 else if (Name.consume_front("ex2.approx."))
1638 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1639 Expand =
1640 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1641 else if (Name.consume_front("atomic.load."))
1642 // nvvm.atomic.load.add.{f32,f64}.p
1643 // nvvm.atomic.load.{inc,dec}.32.p
1644 Expand = StringSwitch<bool>(Name)
1645 .StartsWith("add.f32.p", true)
1646 .StartsWith("add.f64.p", true)
1647 .StartsWith("inc.32.p", true)
1648 .StartsWith("dec.32.p", true)
1649 .Default(false);
1650 else if (Name.consume_front("bitcast."))
1651 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1652 Expand =
1653 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1654 else if (Name.consume_front("rotate."))
1655 // nvvm.rotate.{b32,b64,right.b64}
1656 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1657 else if (Name.consume_front("ptr.gen.to."))
1658 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1659 Expand = consumeNVVMPtrAddrSpace(Name);
1660 else if (Name.consume_front("ptr."))
1661 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1662 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1663 else if (Name.consume_front("ldg.global."))
1664 // nvvm.ldg.global.{i,p,f}
1665 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1666 Name.starts_with("p."));
1667 else
1668 Expand = StringSwitch<bool>(Name)
1669 .Case("barrier0", true)
1670 .Case("barrier.n", true)
1671 .Case("barrier.sync.cnt", true)
1672 .Case("barrier.sync", true)
1673 .Case("barrier", true)
1674 .Case("bar.sync", true)
1675 .Case("barrier0.popc", true)
1676 .Case("barrier0.and", true)
1677 .Case("barrier0.or", true)
1678 .Case("clz.ll", true)
1679 .Case("popc.ll", true)
1680 .Case("h2f", true)
1681 .Case("swap.lo.hi.b64", true)
1682 .Case("tanh.approx.f32", true)
1683 .Default(false);
1684
1685 if (Expand) {
1686 NewFn = nullptr;
1687 return true;
1688 }
1689 break; // No other 'nvvm.*'.
1690 }
1691 break;
1692 }
1693 case 'o':
1694 if (Name.starts_with("objectsize.")) {
1695 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1696 if (F->arg_size() == 2 || F->arg_size() == 3) {
1697 rename(F);
1698 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1699 Intrinsic::objectsize, Tys);
1700 return true;
1701 }
1702 }
1703 break;
1704
1705 case 'p':
1706 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1707 rename(F);
1709 F->getParent(), Intrinsic::ptr_annotation,
1710 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1711 return true;
1712 }
1713 break;
1714
1715 case 'r': {
1716 if (Name.consume_front("riscv.")) {
1719 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1720 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1721 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1722 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1725 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1726 rename(F);
1727 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1728 return true;
1729 }
1730 break; // No other applicable upgrades.
1731 }
1732
1734 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1735 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1738 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1739 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1740 rename(F);
1741 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1742 return true;
1743 }
1744 break; // No other applicable upgrades.
1745 }
1746
1748 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1749 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1750 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1751 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1752 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1753 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1756 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1757 rename(F);
1758 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1759 return true;
1760 }
1761 break; // No other applicable upgrades.
1762 }
1763
1764 // Replace llvm.riscv.clmul with llvm.clmul.
1765 if (Name == "clmul.i32" || Name == "clmul.i64") {
1767 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1768 return true;
1769 }
1770
1771 break; // No other 'riscv.*' intrinsics
1772 }
1773 } break;
1774
1775 case 's':
1776 if (Name == "stackprotectorcheck") {
1777 NewFn = nullptr;
1778 return true;
1779 }
1780 break;
1781
1782 case 't':
1783 if (Name == "thread.pointer") {
1785 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1786 return true;
1787 }
1788 break;
1789
1790 case 'v': {
1791 if (Name == "var.annotation" && F->arg_size() == 4) {
1792 rename(F);
1794 F->getParent(), Intrinsic::var_annotation,
1795 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1796 return true;
1797 }
1798 if (Name.consume_front("vector.splice")) {
1799 if (Name.starts_with(".left") || Name.starts_with(".right"))
1800 break;
1801 return true;
1802 }
1803 break;
1804 }
1805
1806 case 'w':
1807 if (Name.consume_front("wasm.")) {
1810 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1811 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1812 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1815 rename(F);
1816 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1817 F->getReturnType());
1818 return true;
1819 }
1820
1821 if (Name.consume_front("dot.i8x16.i7x16.")) {
1823 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1824 .Case("add.signed",
1825 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1828 rename(F);
1829 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1830 return true;
1831 }
1832 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1833 }
1834 break; // No other 'wasm.*'.
1835 }
1836 break;
1837
1838 case 'x':
1839 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1840 return true;
1841 }
1842
1843 auto *ST = dyn_cast<StructType>(F->getReturnType());
1844 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1845 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1846 // Replace return type with literal non-packed struct. Only do this for
1847 // intrinsics declared to return a struct, not for intrinsics with
1848 // overloaded return type, in which case the exact struct type will be
1849 // mangled into the name.
1852 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1853 auto *FT = F->getFunctionType();
1854 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1855 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1856 std::string Name = F->getName().str();
1857 rename(F);
1858 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1859 Name, F->getParent());
1860
1861 // The new function may also need remangling.
1862 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1863 NewFn = *Result;
1864 return true;
1865 }
1866 }
1867
1868 // Remangle our intrinsic since we upgrade the mangling
1870 if (Result != std::nullopt) {
1871 NewFn = *Result;
1872 return true;
1873 }
1874
1875 // This may not belong here. This function is effectively being overloaded
1876 // to both detect an intrinsic which needs upgrading, and to provide the
1877 // upgraded form of the intrinsic. We should perhaps have two separate
1878 // functions for this.
1879 return false;
1880}
1881
1883 bool CanUpgradeDebugIntrinsicsToRecords) {
1884 NewFn = nullptr;
1885 bool Upgraded =
1886 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1887
1888 // Upgrade intrinsic attributes. This does not change the function.
1889 if (NewFn)
1890 F = NewFn;
1891 if (Intrinsic::ID id = F->getIntrinsicID()) {
1892 // Only do this if the intrinsic signature is valid.
1893 SmallVector<Type *> OverloadTys;
1894 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1895 F->setAttributes(
1896 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1897 }
1898 return Upgraded;
1899}
1900
1902 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1903 GV->getName() == "llvm.global_dtors")) ||
1904 !GV->hasInitializer())
1905 return nullptr;
1907 if (!ATy)
1908 return nullptr;
1910 if (!STy || STy->getNumElements() != 2)
1911 return nullptr;
1912
1913 LLVMContext &C = GV->getContext();
1914 IRBuilder<> IRB(C);
1915 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1916 IRB.getPtrTy());
1917 Constant *Init = GV->getInitializer();
1918 unsigned N = Init->getNumOperands();
1919 std::vector<Constant *> NewCtors(N);
1920 for (unsigned i = 0; i != N; ++i) {
1921 auto Ctor = cast<Constant>(Init->getOperand(i));
1922 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1923 Ctor->getAggregateElement(1),
1925 }
1926 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1927
1928 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1929 NewInit, GV->getName());
1930}
1931
1932// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1933// to byte shuffles.
1935 unsigned Shift) {
1936 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1937 unsigned NumElts = ResultTy->getNumElements() * 8;
1938
1939 // Bitcast from a 64-bit element type to a byte element type.
1940 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1941 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1942
1943 // We'll be shuffling in zeroes.
1944 Value *Res = Constant::getNullValue(VecTy);
1945
1946 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1947 // we'll just return the zero vector.
1948 if (Shift < 16) {
1949 int Idxs[64];
1950 // 256/512-bit version is split into 2/4 16-byte lanes.
1951 for (unsigned l = 0; l != NumElts; l += 16)
1952 for (unsigned i = 0; i != 16; ++i) {
1953 unsigned Idx = NumElts + i - Shift;
1954 if (Idx < NumElts)
1955 Idx -= NumElts - 16; // end of lane, switch operand.
1956 Idxs[l + i] = Idx + l;
1957 }
1958
1959 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1960 }
1961
1962 // Bitcast back to a 64-bit element type.
1963 return Builder.CreateBitCast(Res, ResultTy, "cast");
1964}
1965
1966// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1967// to byte shuffles.
1969 unsigned Shift) {
1970 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1971 unsigned NumElts = ResultTy->getNumElements() * 8;
1972
1973 // Bitcast from a 64-bit element type to a byte element type.
1974 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1975 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1976
1977 // We'll be shuffling in zeroes.
1978 Value *Res = Constant::getNullValue(VecTy);
1979
1980 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1981 // we'll just return the zero vector.
1982 if (Shift < 16) {
1983 int Idxs[64];
1984 // 256/512-bit version is split into 2/4 16-byte lanes.
1985 for (unsigned l = 0; l != NumElts; l += 16)
1986 for (unsigned i = 0; i != 16; ++i) {
1987 unsigned Idx = i + Shift;
1988 if (Idx >= 16)
1989 Idx += NumElts - 16; // end of lane, switch operand.
1990 Idxs[l + i] = Idx + l;
1991 }
1992
1993 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1994 }
1995
1996 // Bitcast back to a 64-bit element type.
1997 return Builder.CreateBitCast(Res, ResultTy, "cast");
1998}
1999
2000static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2001 unsigned NumElts) {
2002 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2004 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2005 Mask = Builder.CreateBitCast(Mask, MaskTy);
2006
2007 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2008 // i8 and we need to extract down to the right number of elements.
2009 if (NumElts <= 4) {
2010 int Indices[4];
2011 for (unsigned i = 0; i != NumElts; ++i)
2012 Indices[i] = i;
2013 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2014 "extract");
2015 }
2016
2017 return Mask;
2018}
2019
2020static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2021 Value *Op1) {
2022 // If the mask is all ones just emit the first operation.
2023 if (const auto *C = dyn_cast<Constant>(Mask))
2024 if (C->isAllOnesValue())
2025 return Op0;
2026
2027 Mask = getX86MaskVec(Builder, Mask,
2028 cast<FixedVectorType>(Op0->getType())->getNumElements());
2029 return Builder.CreateSelect(Mask, Op0, Op1);
2030}
2031
2032static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2033 Value *Op1) {
2034 // If the mask is all ones just emit the first operation.
2035 if (const auto *C = dyn_cast<Constant>(Mask))
2036 if (C->isAllOnesValue())
2037 return Op0;
2038
2039 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2040 Mask->getType()->getIntegerBitWidth());
2041 Mask = Builder.CreateBitCast(Mask, MaskTy);
2042 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2043 return Builder.CreateSelect(Mask, Op0, Op1);
2044}
2045
2046// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2047// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2048// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2050 Value *Op1, Value *Shift,
2051 Value *Passthru, Value *Mask,
2052 bool IsVALIGN) {
2053 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2054
2055 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2056 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2057 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2058 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2059
2060 // Mask the immediate for VALIGN.
2061 if (IsVALIGN)
2062 ShiftVal &= (NumElts - 1);
2063
2064 // If palignr is shifting the pair of vectors more than the size of two
2065 // lanes, emit zero.
2066 if (ShiftVal >= 32)
2068
2069 // If palignr is shifting the pair of input vectors more than one lane,
2070 // but less than two lanes, convert to shifting in zeroes.
2071 if (ShiftVal > 16) {
2072 ShiftVal -= 16;
2073 Op1 = Op0;
2075 }
2076
2077 int Indices[64];
2078 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2079 for (unsigned l = 0; l < NumElts; l += 16) {
2080 for (unsigned i = 0; i != 16; ++i) {
2081 unsigned Idx = ShiftVal + i;
2082 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2083 Idx += NumElts - 16; // End of lane, switch operand.
2084 Indices[l + i] = Idx + l;
2085 }
2086 }
2087
2088 Value *Align = Builder.CreateShuffleVector(
2089 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2090
2091 return emitX86Select(Builder, Mask, Align, Passthru);
2092}
2093
2095 bool ZeroMask, bool IndexForm) {
2096 Type *Ty = CI.getType();
2097 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2098 unsigned EltWidth = Ty->getScalarSizeInBits();
2099 bool IsFloat = Ty->isFPOrFPVectorTy();
2100 Intrinsic::ID IID;
2101 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2102 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2103 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2104 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2105 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2106 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2107 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2108 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2109 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2110 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2111 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2112 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2113 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2114 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2115 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2116 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2117 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2118 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2119 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2120 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2121 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2123 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2125 else if (VecWidth == 128 && EltWidth == 16)
2126 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2127 else if (VecWidth == 256 && EltWidth == 16)
2128 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2129 else if (VecWidth == 512 && EltWidth == 16)
2130 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2131 else if (VecWidth == 128 && EltWidth == 8)
2132 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2133 else if (VecWidth == 256 && EltWidth == 8)
2134 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2135 else if (VecWidth == 512 && EltWidth == 8)
2136 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2137 else
2138 llvm_unreachable("Unexpected intrinsic");
2139
2140 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2141 CI.getArgOperand(2) };
2142
2143 // If this isn't index form we need to swap operand 0 and 1.
2144 if (!IndexForm)
2145 std::swap(Args[0], Args[1]);
2146
2147 Value *V = Builder.CreateIntrinsic(IID, Args);
2148 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2149 : Builder.CreateBitCast(CI.getArgOperand(1),
2150 Ty);
2151 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2152}
2153
2155 Intrinsic::ID IID) {
2156 Type *Ty = CI.getType();
2157 Value *Op0 = CI.getOperand(0);
2158 Value *Op1 = CI.getOperand(1);
2159 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2160
2161 if (CI.arg_size() == 4) { // For masked intrinsics.
2162 Value *VecSrc = CI.getOperand(2);
2163 Value *Mask = CI.getOperand(3);
2164 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2165 }
2166 return Res;
2167}
2168
2170 bool IsRotateRight) {
2171 Type *Ty = CI.getType();
2172 Value *Src = CI.getArgOperand(0);
2173 Value *Amt = CI.getArgOperand(1);
2174
2175 // Amount may be scalar immediate, in which case create a splat vector.
2176 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2177 // we only care about the lowest log2 bits anyway.
2178 if (Amt->getType() != Ty) {
2179 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2180 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2181 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2182 }
2183
2184 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2185 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2186
2187 if (CI.arg_size() == 4) { // For masked intrinsics.
2188 Value *VecSrc = CI.getOperand(2);
2189 Value *Mask = CI.getOperand(3);
2190 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2191 }
2192 return Res;
2193}
2194
2195static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2196 bool IsSigned) {
2197 Type *Ty = CI.getType();
2198 Value *LHS = CI.getArgOperand(0);
2199 Value *RHS = CI.getArgOperand(1);
2200
2201 CmpInst::Predicate Pred;
2202 switch (Imm) {
2203 case 0x0:
2204 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2205 break;
2206 case 0x1:
2207 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2208 break;
2209 case 0x2:
2210 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2211 break;
2212 case 0x3:
2213 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2214 break;
2215 case 0x4:
2216 Pred = ICmpInst::ICMP_EQ;
2217 break;
2218 case 0x5:
2219 Pred = ICmpInst::ICMP_NE;
2220 break;
2221 case 0x6:
2222 return Constant::getNullValue(Ty); // FALSE
2223 case 0x7:
2224 return Constant::getAllOnesValue(Ty); // TRUE
2225 default:
2226 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2227 }
2228
2229 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2230 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2231 return Ext;
2232}
2233
2235 bool IsShiftRight, bool ZeroMask) {
2236 Type *Ty = CI.getType();
2237 Value *Op0 = CI.getArgOperand(0);
2238 Value *Op1 = CI.getArgOperand(1);
2239 Value *Amt = CI.getArgOperand(2);
2240
2241 if (IsShiftRight)
2242 std::swap(Op0, Op1);
2243
2244 // Amount may be scalar immediate, in which case create a splat vector.
2245 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2246 // we only care about the lowest log2 bits anyway.
2247 if (Amt->getType() != Ty) {
2248 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2249 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2250 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2251 }
2252
2253 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2254 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2255
2256 unsigned NumArgs = CI.arg_size();
2257 if (NumArgs >= 4) { // For masked intrinsics.
2258 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2259 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2260 CI.getArgOperand(0);
2261 Value *Mask = CI.getOperand(NumArgs - 1);
2262 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2263 }
2264 return Res;
2265}
2266
2268 Value *Mask, bool Aligned) {
2269 const Align Alignment =
2270 Aligned
2271 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2272 : Align(1);
2273
2274 // If the mask is all ones just emit a regular store.
2275 if (const auto *C = dyn_cast<Constant>(Mask))
2276 if (C->isAllOnesValue())
2277 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2278
2279 // Convert the mask from an integer type to a vector of i1.
2280 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2281 Mask = getX86MaskVec(Builder, Mask, NumElts);
2282 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2283}
2284
2286 Value *Passthru, Value *Mask, bool Aligned) {
2287 Type *ValTy = Passthru->getType();
2288 const Align Alignment =
2289 Aligned
2290 ? Align(
2292 8)
2293 : Align(1);
2294
2295 // If the mask is all ones just emit a regular store.
2296 if (const auto *C = dyn_cast<Constant>(Mask))
2297 if (C->isAllOnesValue())
2298 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2299
2300 // Convert the mask from an integer type to a vector of i1.
2301 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2302 Mask = getX86MaskVec(Builder, Mask, NumElts);
2303 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2304}
2305
2306static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2307 Type *Ty = CI.getType();
2308 Value *Op0 = CI.getArgOperand(0);
2309 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2310 {Op0, Builder.getInt1(false)});
2311 if (CI.arg_size() == 3)
2312 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2313 return Res;
2314}
2315
2316static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2317 Type *Ty = CI.getType();
2318
2319 // Arguments have a vXi32 type so cast to vXi64.
2320 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2321 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2322
2323 if (IsSigned) {
2324 // Shift left then arithmetic shift right.
2325 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2326 LHS = Builder.CreateShl(LHS, ShiftAmt);
2327 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2328 RHS = Builder.CreateShl(RHS, ShiftAmt);
2329 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2330 } else {
2331 // Clear the upper bits.
2332 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2333 LHS = Builder.CreateAnd(LHS, Mask);
2334 RHS = Builder.CreateAnd(RHS, Mask);
2335 }
2336
2337 Value *Res = Builder.CreateMul(LHS, RHS);
2338
2339 if (CI.arg_size() == 4)
2340 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2341
2342 return Res;
2343}
2344
2345// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2347 Value *Mask) {
2348 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2349 if (Mask) {
2350 const auto *C = dyn_cast<Constant>(Mask);
2351 if (!C || !C->isAllOnesValue())
2352 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2353 }
2354
2355 if (NumElts < 8) {
2356 int Indices[8];
2357 for (unsigned i = 0; i != NumElts; ++i)
2358 Indices[i] = i;
2359 for (unsigned i = NumElts; i != 8; ++i)
2360 Indices[i] = NumElts + i % NumElts;
2361 Vec = Builder.CreateShuffleVector(Vec,
2363 Indices);
2364 }
2365 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2366}
2367
2369 unsigned CC, bool Signed) {
2370 Value *Op0 = CI.getArgOperand(0);
2371 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2372
2373 Value *Cmp;
2374 if (CC == 3) {
2376 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2377 } else if (CC == 7) {
2379 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2380 } else {
2382 switch (CC) {
2383 default: llvm_unreachable("Unknown condition code");
2384 case 0: Pred = ICmpInst::ICMP_EQ; break;
2385 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2386 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2387 case 4: Pred = ICmpInst::ICMP_NE; break;
2388 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2389 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2390 }
2391 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2392 }
2393
2394 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2395
2396 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2397}
2398
2399// Replace a masked intrinsic with an older unmasked intrinsic.
2401 Intrinsic::ID IID) {
2402 Value *Rep =
2403 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2404 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2405}
2406
2408 Value* A = CI.getArgOperand(0);
2409 Value* B = CI.getArgOperand(1);
2410 Value* Src = CI.getArgOperand(2);
2411 Value* Mask = CI.getArgOperand(3);
2412
2413 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2414 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2415 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2416 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2417 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2418 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2419}
2420
2422 Value* Op = CI.getArgOperand(0);
2423 Type* ReturnOp = CI.getType();
2424 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2425 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2426 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2427}
2428
2429// Replace intrinsic with unmasked version and a select.
2431 CallBase &CI, Value *&Rep) {
2432 Name = Name.substr(12); // Remove avx512.mask.
2433
2434 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2435 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2436 Intrinsic::ID IID;
2437 if (Name.starts_with("max.p")) {
2438 if (VecWidth == 128 && EltWidth == 32)
2439 IID = Intrinsic::x86_sse_max_ps;
2440 else if (VecWidth == 128 && EltWidth == 64)
2441 IID = Intrinsic::x86_sse2_max_pd;
2442 else if (VecWidth == 256 && EltWidth == 32)
2443 IID = Intrinsic::x86_avx_max_ps_256;
2444 else if (VecWidth == 256 && EltWidth == 64)
2445 IID = Intrinsic::x86_avx_max_pd_256;
2446 else
2447 llvm_unreachable("Unexpected intrinsic");
2448 } else if (Name.starts_with("min.p")) {
2449 if (VecWidth == 128 && EltWidth == 32)
2450 IID = Intrinsic::x86_sse_min_ps;
2451 else if (VecWidth == 128 && EltWidth == 64)
2452 IID = Intrinsic::x86_sse2_min_pd;
2453 else if (VecWidth == 256 && EltWidth == 32)
2454 IID = Intrinsic::x86_avx_min_ps_256;
2455 else if (VecWidth == 256 && EltWidth == 64)
2456 IID = Intrinsic::x86_avx_min_pd_256;
2457 else
2458 llvm_unreachable("Unexpected intrinsic");
2459 } else if (Name.starts_with("pshuf.b.")) {
2460 if (VecWidth == 128)
2461 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2462 else if (VecWidth == 256)
2463 IID = Intrinsic::x86_avx2_pshuf_b;
2464 else if (VecWidth == 512)
2465 IID = Intrinsic::x86_avx512_pshuf_b_512;
2466 else
2467 llvm_unreachable("Unexpected intrinsic");
2468 } else if (Name.starts_with("pmul.hr.sw.")) {
2469 if (VecWidth == 128)
2470 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2471 else if (VecWidth == 256)
2472 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2473 else if (VecWidth == 512)
2474 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2475 else
2476 llvm_unreachable("Unexpected intrinsic");
2477 } else if (Name.starts_with("pmulh.w.")) {
2478 if (VecWidth == 128)
2479 IID = Intrinsic::x86_sse2_pmulh_w;
2480 else if (VecWidth == 256)
2481 IID = Intrinsic::x86_avx2_pmulh_w;
2482 else if (VecWidth == 512)
2483 IID = Intrinsic::x86_avx512_pmulh_w_512;
2484 else
2485 llvm_unreachable("Unexpected intrinsic");
2486 } else if (Name.starts_with("pmulhu.w.")) {
2487 if (VecWidth == 128)
2488 IID = Intrinsic::x86_sse2_pmulhu_w;
2489 else if (VecWidth == 256)
2490 IID = Intrinsic::x86_avx2_pmulhu_w;
2491 else if (VecWidth == 512)
2492 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2493 else
2494 llvm_unreachable("Unexpected intrinsic");
2495 } else if (Name.starts_with("pmaddw.d.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_sse2_pmadd_wd;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_pmadd_wd;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2502 else
2503 llvm_unreachable("Unexpected intrinsic");
2504 } else if (Name.starts_with("pmaddubs.w.")) {
2505 if (VecWidth == 128)
2506 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2507 else if (VecWidth == 256)
2508 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2509 else if (VecWidth == 512)
2510 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2511 else
2512 llvm_unreachable("Unexpected intrinsic");
2513 } else if (Name.starts_with("packsswb.")) {
2514 if (VecWidth == 128)
2515 IID = Intrinsic::x86_sse2_packsswb_128;
2516 else if (VecWidth == 256)
2517 IID = Intrinsic::x86_avx2_packsswb;
2518 else if (VecWidth == 512)
2519 IID = Intrinsic::x86_avx512_packsswb_512;
2520 else
2521 llvm_unreachable("Unexpected intrinsic");
2522 } else if (Name.starts_with("packssdw.")) {
2523 if (VecWidth == 128)
2524 IID = Intrinsic::x86_sse2_packssdw_128;
2525 else if (VecWidth == 256)
2526 IID = Intrinsic::x86_avx2_packssdw;
2527 else if (VecWidth == 512)
2528 IID = Intrinsic::x86_avx512_packssdw_512;
2529 else
2530 llvm_unreachable("Unexpected intrinsic");
2531 } else if (Name.starts_with("packuswb.")) {
2532 if (VecWidth == 128)
2533 IID = Intrinsic::x86_sse2_packuswb_128;
2534 else if (VecWidth == 256)
2535 IID = Intrinsic::x86_avx2_packuswb;
2536 else if (VecWidth == 512)
2537 IID = Intrinsic::x86_avx512_packuswb_512;
2538 else
2539 llvm_unreachable("Unexpected intrinsic");
2540 } else if (Name.starts_with("packusdw.")) {
2541 if (VecWidth == 128)
2542 IID = Intrinsic::x86_sse41_packusdw;
2543 else if (VecWidth == 256)
2544 IID = Intrinsic::x86_avx2_packusdw;
2545 else if (VecWidth == 512)
2546 IID = Intrinsic::x86_avx512_packusdw_512;
2547 else
2548 llvm_unreachable("Unexpected intrinsic");
2549 } else if (Name.starts_with("vpermilvar.")) {
2550 if (VecWidth == 128 && EltWidth == 32)
2551 IID = Intrinsic::x86_avx_vpermilvar_ps;
2552 else if (VecWidth == 128 && EltWidth == 64)
2553 IID = Intrinsic::x86_avx_vpermilvar_pd;
2554 else if (VecWidth == 256 && EltWidth == 32)
2555 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2556 else if (VecWidth == 256 && EltWidth == 64)
2557 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2558 else if (VecWidth == 512 && EltWidth == 32)
2559 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2560 else if (VecWidth == 512 && EltWidth == 64)
2561 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2562 else
2563 llvm_unreachable("Unexpected intrinsic");
2564 } else if (Name == "cvtpd2dq.256") {
2565 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2566 } else if (Name == "cvtpd2ps.256") {
2567 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2568 } else if (Name == "cvttpd2dq.256") {
2569 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2570 } else if (Name == "cvttps2dq.128") {
2571 IID = Intrinsic::x86_sse2_cvttps2dq;
2572 } else if (Name == "cvttps2dq.256") {
2573 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2574 } else if (Name.starts_with("permvar.")) {
2575 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2576 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2577 IID = Intrinsic::x86_avx2_permps;
2578 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2579 IID = Intrinsic::x86_avx2_permd;
2580 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2581 IID = Intrinsic::x86_avx512_permvar_df_256;
2582 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2583 IID = Intrinsic::x86_avx512_permvar_di_256;
2584 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2585 IID = Intrinsic::x86_avx512_permvar_sf_512;
2586 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2587 IID = Intrinsic::x86_avx512_permvar_si_512;
2588 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2589 IID = Intrinsic::x86_avx512_permvar_df_512;
2590 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2591 IID = Intrinsic::x86_avx512_permvar_di_512;
2592 else if (VecWidth == 128 && EltWidth == 16)
2593 IID = Intrinsic::x86_avx512_permvar_hi_128;
2594 else if (VecWidth == 256 && EltWidth == 16)
2595 IID = Intrinsic::x86_avx512_permvar_hi_256;
2596 else if (VecWidth == 512 && EltWidth == 16)
2597 IID = Intrinsic::x86_avx512_permvar_hi_512;
2598 else if (VecWidth == 128 && EltWidth == 8)
2599 IID = Intrinsic::x86_avx512_permvar_qi_128;
2600 else if (VecWidth == 256 && EltWidth == 8)
2601 IID = Intrinsic::x86_avx512_permvar_qi_256;
2602 else if (VecWidth == 512 && EltWidth == 8)
2603 IID = Intrinsic::x86_avx512_permvar_qi_512;
2604 else
2605 llvm_unreachable("Unexpected intrinsic");
2606 } else if (Name.starts_with("dbpsadbw.")) {
2607 if (VecWidth == 128)
2608 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2609 else if (VecWidth == 256)
2610 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2611 else if (VecWidth == 512)
2612 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2613 else
2614 llvm_unreachable("Unexpected intrinsic");
2615 } else if (Name.starts_with("pmultishift.qb.")) {
2616 if (VecWidth == 128)
2617 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2618 else if (VecWidth == 256)
2619 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2620 else if (VecWidth == 512)
2621 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2622 else
2623 llvm_unreachable("Unexpected intrinsic");
2624 } else if (Name.starts_with("conflict.")) {
2625 if (Name[9] == 'd' && VecWidth == 128)
2626 IID = Intrinsic::x86_avx512_conflict_d_128;
2627 else if (Name[9] == 'd' && VecWidth == 256)
2628 IID = Intrinsic::x86_avx512_conflict_d_256;
2629 else if (Name[9] == 'd' && VecWidth == 512)
2630 IID = Intrinsic::x86_avx512_conflict_d_512;
2631 else if (Name[9] == 'q' && VecWidth == 128)
2632 IID = Intrinsic::x86_avx512_conflict_q_128;
2633 else if (Name[9] == 'q' && VecWidth == 256)
2634 IID = Intrinsic::x86_avx512_conflict_q_256;
2635 else if (Name[9] == 'q' && VecWidth == 512)
2636 IID = Intrinsic::x86_avx512_conflict_q_512;
2637 else
2638 llvm_unreachable("Unexpected intrinsic");
2639 } else if (Name.starts_with("pavg.")) {
2640 if (Name[5] == 'b' && VecWidth == 128)
2641 IID = Intrinsic::x86_sse2_pavg_b;
2642 else if (Name[5] == 'b' && VecWidth == 256)
2643 IID = Intrinsic::x86_avx2_pavg_b;
2644 else if (Name[5] == 'b' && VecWidth == 512)
2645 IID = Intrinsic::x86_avx512_pavg_b_512;
2646 else if (Name[5] == 'w' && VecWidth == 128)
2647 IID = Intrinsic::x86_sse2_pavg_w;
2648 else if (Name[5] == 'w' && VecWidth == 256)
2649 IID = Intrinsic::x86_avx2_pavg_w;
2650 else if (Name[5] == 'w' && VecWidth == 512)
2651 IID = Intrinsic::x86_avx512_pavg_w_512;
2652 else
2653 llvm_unreachable("Unexpected intrinsic");
2654 } else
2655 return false;
2656
2657 SmallVector<Value *, 4> Args(CI.args());
2658 Args.pop_back();
2659 Args.pop_back();
2660 Rep = Builder.CreateIntrinsic(IID, Args);
2661 unsigned NumArgs = CI.arg_size();
2662 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2663 CI.getArgOperand(NumArgs - 2));
2664 return true;
2665}
2666
2667/// Upgrade comment in call to inline asm that represents an objc retain release
2668/// marker.
2669void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2670 size_t Pos;
2671 if (AsmStr->find("mov\tfp") == 0 &&
2672 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2673 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2674 AsmStr->replace(Pos, 1, ";");
2675 }
2676}
2677
2679 Function *F, IRBuilder<> &Builder) {
2680 Value *Rep = nullptr;
2681
2682 if (Name == "abs.i" || Name == "abs.ll") {
2683 Value *Arg = CI->getArgOperand(0);
2684 Value *Neg = Builder.CreateNeg(Arg, "neg");
2685 Value *Cmp = Builder.CreateICmpSGE(
2686 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2687 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2688 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2689 Type *Ty = (Name == "abs.bf16")
2690 ? Builder.getBFloatTy()
2691 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2692 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2693 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2694 Rep = Builder.CreateBitCast(Abs, CI->getType());
2695 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2696 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2697 : Intrinsic::nvvm_fabs;
2698 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2699 } else if (Name.consume_front("ex2.approx.")) {
2700 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2701 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2702 : Intrinsic::nvvm_ex2_approx;
2703 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2704 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2705 Name.starts_with("atomic.load.add.f64.p")) {
2706 Value *Ptr = CI->getArgOperand(0);
2707 Value *Val = CI->getArgOperand(1);
2708 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2710 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2711 Name.starts_with("atomic.load.dec.32.p")) {
2712 Value *Ptr = CI->getArgOperand(0);
2713 Value *Val = CI->getArgOperand(1);
2714 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2716 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2718 } else if (Name == "clz.ll") {
2719 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2720 Value *Arg = CI->getArgOperand(0);
2721 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2722 {Arg, Builder.getFalse()},
2723 /*FMFSource=*/nullptr, "ctlz");
2724 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2725 } else if (Name == "popc.ll") {
2726 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2727 // i64.
2728 Value *Arg = CI->getArgOperand(0);
2729 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2730 Arg, /*FMFSource=*/nullptr, "ctpop");
2731 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2732 } else if (Name == "h2f") {
2733 Value *Cast =
2734 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2735 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2736 } else if (Name.consume_front("bitcast.") &&
2737 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2738 Name == "d2ll")) {
2739 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2740 } else if (Name == "rotate.b32") {
2741 Value *Arg = CI->getOperand(0);
2742 Value *ShiftAmt = CI->getOperand(1);
2743 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2744 {Arg, Arg, ShiftAmt});
2745 } else if (Name == "rotate.b64") {
2746 Type *Int64Ty = Builder.getInt64Ty();
2747 Value *Arg = CI->getOperand(0);
2748 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2749 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2750 {Arg, Arg, ZExtShiftAmt});
2751 } else if (Name == "rotate.right.b64") {
2752 Type *Int64Ty = Builder.getInt64Ty();
2753 Value *Arg = CI->getOperand(0);
2754 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2755 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2756 {Arg, Arg, ZExtShiftAmt});
2757 } else if (Name == "swap.lo.hi.b64") {
2758 Type *Int64Ty = Builder.getInt64Ty();
2759 Value *Arg = CI->getOperand(0);
2760 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2761 {Arg, Arg, Builder.getInt64(32)});
2762 } else if ((Name.consume_front("ptr.gen.to.") &&
2763 consumeNVVMPtrAddrSpace(Name)) ||
2764 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2765 Name.starts_with(".to.gen"))) {
2766 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2767 } else if (Name.consume_front("ldg.global")) {
2768 Value *Ptr = CI->getArgOperand(0);
2769 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2770 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2771 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2772 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2773 MDNode *MD = MDNode::get(Builder.getContext(), {});
2774 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2775 return LD;
2776 } else if (Name == "tanh.approx.f32") {
2777 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2778 FastMathFlags FMF;
2779 FMF.setApproxFunc();
2780 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2781 FMF);
2782 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2783 Value *Arg =
2784 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2785 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2786 {}, {Arg});
2787 } else if (Name == "barrier") {
2788 Rep = Builder.CreateIntrinsic(
2789 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2790 {CI->getArgOperand(0), CI->getArgOperand(1)});
2791 } else if (Name == "barrier.sync") {
2792 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2793 {CI->getArgOperand(0)});
2794 } else if (Name == "barrier.sync.cnt") {
2795 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2796 {CI->getArgOperand(0), CI->getArgOperand(1)});
2797 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2798 Name == "barrier0.or") {
2799 Value *C = CI->getArgOperand(0);
2800 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2801
2802 Intrinsic::ID IID =
2804 .Case("barrier0.popc",
2805 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2806 .Case("barrier0.and",
2807 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2808 .Case("barrier0.or",
2809 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2810 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2811 Rep = Builder.CreateZExt(Bar, CI->getType());
2812 } else {
2814 if (IID != Intrinsic::not_intrinsic &&
2815 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2816 rename(F);
2817 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2819 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2820 Value *Arg = CI->getArgOperand(I);
2821 Type *OldType = Arg->getType();
2822 Type *NewType = NewFn->getArg(I)->getType();
2823 Args.push_back(
2824 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2825 ? Builder.CreateBitCast(Arg, NewType)
2826 : Arg);
2827 }
2828 Rep = Builder.CreateCall(NewFn, Args);
2829 if (F->getReturnType()->isIntegerTy())
2830 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2831 }
2832 }
2833
2834 return Rep;
2835}
2836
2838 IRBuilder<> &Builder) {
2839 LLVMContext &C = F->getContext();
2840 Value *Rep = nullptr;
2841
2842 if (Name.starts_with("sse4a.movnt.")) {
2844 Elts.push_back(
2845 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2846 MDNode *Node = MDNode::get(C, Elts);
2847
2848 Value *Arg0 = CI->getArgOperand(0);
2849 Value *Arg1 = CI->getArgOperand(1);
2850
2851 // Nontemporal (unaligned) store of the 0'th element of the float/double
2852 // vector.
2853 Value *Extract =
2854 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2855
2856 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2857 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2858 } else if (Name.starts_with("avx.movnt.") ||
2859 Name.starts_with("avx512.storent.")) {
2861 Elts.push_back(
2862 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2863 MDNode *Node = MDNode::get(C, Elts);
2864
2865 Value *Arg0 = CI->getArgOperand(0);
2866 Value *Arg1 = CI->getArgOperand(1);
2867
2868 StoreInst *SI = Builder.CreateAlignedStore(
2869 Arg1, Arg0,
2871 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2872 } else if (Name == "sse2.storel.dq") {
2873 Value *Arg0 = CI->getArgOperand(0);
2874 Value *Arg1 = CI->getArgOperand(1);
2875
2876 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2877 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2878 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2879 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2880 } else if (Name.starts_with("sse.storeu.") ||
2881 Name.starts_with("sse2.storeu.") ||
2882 Name.starts_with("avx.storeu.")) {
2883 Value *Arg0 = CI->getArgOperand(0);
2884 Value *Arg1 = CI->getArgOperand(1);
2885 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2886 } else if (Name == "avx512.mask.store.ss") {
2887 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2888 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2889 Mask, false);
2890 } else if (Name.starts_with("avx512.mask.store")) {
2891 // "avx512.mask.storeu." or "avx512.mask.store."
2892 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2893 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2894 CI->getArgOperand(2), Aligned);
2895 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2896 // Upgrade packed integer vector compare intrinsics to compare instructions.
2897 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2898 bool CmpEq = Name[9] == 'e';
2899 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2900 CI->getArgOperand(0), CI->getArgOperand(1));
2901 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2902 } else if (Name.starts_with("avx512.broadcastm")) {
2903 Type *ExtTy = Type::getInt32Ty(C);
2904 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2905 ExtTy = Type::getInt64Ty(C);
2906 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2907 ExtTy->getPrimitiveSizeInBits();
2908 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2909 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2910 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2911 Value *Vec = CI->getArgOperand(0);
2912 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2913 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2914 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2915 } else if (Name.starts_with("avx.sqrt.p") ||
2916 Name.starts_with("sse2.sqrt.p") ||
2917 Name.starts_with("sse.sqrt.p")) {
2918 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2919 {CI->getArgOperand(0)});
2920 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2921 if (CI->arg_size() == 4 &&
2922 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2923 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2924 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2925 : Intrinsic::x86_avx512_sqrt_pd_512;
2926
2927 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2928 Rep = Builder.CreateIntrinsic(IID, Args);
2929 } else {
2930 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2931 {CI->getArgOperand(0)});
2932 }
2933 Rep =
2934 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2935 } else if (Name.starts_with("avx512.ptestm") ||
2936 Name.starts_with("avx512.ptestnm")) {
2937 Value *Op0 = CI->getArgOperand(0);
2938 Value *Op1 = CI->getArgOperand(1);
2939 Value *Mask = CI->getArgOperand(2);
2940 Rep = Builder.CreateAnd(Op0, Op1);
2941 llvm::Type *Ty = Op0->getType();
2943 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2946 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2947 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2948 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2949 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2950 ->getNumElements();
2951 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2952 Rep =
2953 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2954 } else if (Name.starts_with("avx512.kunpck")) {
2955 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2956 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2957 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2958 int Indices[64];
2959 for (unsigned i = 0; i != NumElts; ++i)
2960 Indices[i] = i;
2961
2962 // First extract half of each vector. This gives better codegen than
2963 // doing it in a single shuffle.
2964 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2965 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2966 // Concat the vectors.
2967 // NOTE: Operands have to be swapped to match intrinsic definition.
2968 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2969 Rep = Builder.CreateBitCast(Rep, CI->getType());
2970 } else if (Name == "avx512.kand.w") {
2971 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2972 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2973 Rep = Builder.CreateAnd(LHS, RHS);
2974 Rep = Builder.CreateBitCast(Rep, CI->getType());
2975 } else if (Name == "avx512.kandn.w") {
2976 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2977 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2978 LHS = Builder.CreateNot(LHS);
2979 Rep = Builder.CreateAnd(LHS, RHS);
2980 Rep = Builder.CreateBitCast(Rep, CI->getType());
2981 } else if (Name == "avx512.kor.w") {
2982 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2983 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2984 Rep = Builder.CreateOr(LHS, RHS);
2985 Rep = Builder.CreateBitCast(Rep, CI->getType());
2986 } else if (Name == "avx512.kxor.w") {
2987 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2988 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2989 Rep = Builder.CreateXor(LHS, RHS);
2990 Rep = Builder.CreateBitCast(Rep, CI->getType());
2991 } else if (Name == "avx512.kxnor.w") {
2992 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2993 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2994 LHS = Builder.CreateNot(LHS);
2995 Rep = Builder.CreateXor(LHS, RHS);
2996 Rep = Builder.CreateBitCast(Rep, CI->getType());
2997 } else if (Name == "avx512.knot.w") {
2998 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2999 Rep = Builder.CreateNot(Rep);
3000 Rep = Builder.CreateBitCast(Rep, CI->getType());
3001 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3002 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3003 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3004 Rep = Builder.CreateOr(LHS, RHS);
3005 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3006 Value *C;
3007 if (Name[14] == 'c')
3008 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3009 else
3010 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3011 Rep = Builder.CreateICmpEQ(Rep, C);
3012 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3013 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3014 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3015 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3016 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3017 Type *I32Ty = Type::getInt32Ty(C);
3018 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3019 ConstantInt::get(I32Ty, 0));
3020 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3021 ConstantInt::get(I32Ty, 0));
3022 Value *EltOp;
3023 if (Name.contains(".add."))
3024 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3025 else if (Name.contains(".sub."))
3026 EltOp = Builder.CreateFSub(Elt0, Elt1);
3027 else if (Name.contains(".mul."))
3028 EltOp = Builder.CreateFMul(Elt0, Elt1);
3029 else
3030 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3031 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3032 ConstantInt::get(I32Ty, 0));
3033 } else if (Name.starts_with("avx512.mask.pcmp")) {
3034 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3035 bool CmpEq = Name[16] == 'e';
3036 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3037 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3038 Type *OpTy = CI->getArgOperand(0)->getType();
3039 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3040 Intrinsic::ID IID;
3041 switch (VecWidth) {
3042 default:
3043 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3044 break;
3045 case 128:
3046 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3047 break;
3048 case 256:
3049 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3050 break;
3051 case 512:
3052 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3053 break;
3054 }
3055
3056 Rep =
3057 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3058 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3059 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3060 Type *OpTy = CI->getArgOperand(0)->getType();
3061 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3062 unsigned EltWidth = OpTy->getScalarSizeInBits();
3063 Intrinsic::ID IID;
3064 if (VecWidth == 128 && EltWidth == 32)
3065 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3066 else if (VecWidth == 256 && EltWidth == 32)
3067 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3068 else if (VecWidth == 512 && EltWidth == 32)
3069 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3070 else if (VecWidth == 128 && EltWidth == 64)
3071 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3072 else if (VecWidth == 256 && EltWidth == 64)
3073 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3074 else if (VecWidth == 512 && EltWidth == 64)
3075 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3076 else
3077 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3078
3079 Rep =
3080 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3081 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3082 } else if (Name.starts_with("avx512.cmp.p")) {
3083 SmallVector<Value *, 4> Args(CI->args());
3084 Type *OpTy = Args[0]->getType();
3085 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3086 unsigned EltWidth = OpTy->getScalarSizeInBits();
3087 Intrinsic::ID IID;
3088 if (VecWidth == 128 && EltWidth == 32)
3089 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3090 else if (VecWidth == 256 && EltWidth == 32)
3091 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3092 else if (VecWidth == 512 && EltWidth == 32)
3093 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3094 else if (VecWidth == 128 && EltWidth == 64)
3095 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3096 else if (VecWidth == 256 && EltWidth == 64)
3097 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3098 else if (VecWidth == 512 && EltWidth == 64)
3099 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3100 else
3101 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3102
3104 if (VecWidth == 512)
3105 std::swap(Mask, Args.back());
3106 Args.push_back(Mask);
3107
3108 Rep = Builder.CreateIntrinsic(IID, Args);
3109 } else if (Name.starts_with("avx512.mask.cmp.")) {
3110 // Integer compare intrinsics.
3111 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3112 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3113 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3114 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3115 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3116 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3117 Name.starts_with("avx512.cvtw2mask.") ||
3118 Name.starts_with("avx512.cvtd2mask.") ||
3119 Name.starts_with("avx512.cvtq2mask.")) {
3120 Value *Op = CI->getArgOperand(0);
3121 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3122 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3123 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3124 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3125 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3126 Name.starts_with("avx512.mask.pabs")) {
3127 Rep = upgradeAbs(Builder, *CI);
3128 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3129 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3130 Name.starts_with("avx512.mask.pmaxs")) {
3131 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3132 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3133 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3134 Name.starts_with("avx512.mask.pmaxu")) {
3135 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3136 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3137 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3138 Name.starts_with("avx512.mask.pmins")) {
3139 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3140 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3141 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3142 Name.starts_with("avx512.mask.pminu")) {
3143 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3144 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3145 Name == "avx512.pmulu.dq.512" ||
3146 Name.starts_with("avx512.mask.pmulu.dq.")) {
3147 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3148 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3149 Name == "avx512.pmul.dq.512" ||
3150 Name.starts_with("avx512.mask.pmul.dq.")) {
3151 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3152 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3153 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3154 Rep =
3155 Builder.CreateSIToFP(CI->getArgOperand(1),
3156 cast<VectorType>(CI->getType())->getElementType());
3157 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3158 } else if (Name == "avx512.cvtusi2sd") {
3159 Rep =
3160 Builder.CreateUIToFP(CI->getArgOperand(1),
3161 cast<VectorType>(CI->getType())->getElementType());
3162 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3163 } else if (Name == "sse2.cvtss2sd") {
3164 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3165 Rep = Builder.CreateFPExt(
3166 Rep, cast<VectorType>(CI->getType())->getElementType());
3167 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3168 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3169 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3170 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3171 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3172 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3173 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3174 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3175 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3176 Name == "avx512.mask.cvtqq2ps.256" ||
3177 Name == "avx512.mask.cvtqq2ps.512" ||
3178 Name == "avx512.mask.cvtuqq2ps.256" ||
3179 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3180 Name == "avx.cvt.ps2.pd.256" ||
3181 Name == "avx512.mask.cvtps2pd.128" ||
3182 Name == "avx512.mask.cvtps2pd.256") {
3183 auto *DstTy = cast<FixedVectorType>(CI->getType());
3184 Rep = CI->getArgOperand(0);
3185 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3186
3187 unsigned NumDstElts = DstTy->getNumElements();
3188 if (NumDstElts < SrcTy->getNumElements()) {
3189 assert(NumDstElts == 2 && "Unexpected vector size");
3190 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3191 }
3192
3193 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3194 bool IsUnsigned = Name.contains("cvtu");
3195 if (IsPS2PD)
3196 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3197 else if (CI->arg_size() == 4 &&
3198 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3199 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3200 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3201 : Intrinsic::x86_avx512_sitofp_round;
3202 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3203 {Rep, CI->getArgOperand(3)});
3204 } else {
3205 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3206 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3207 }
3208
3209 if (CI->arg_size() >= 3)
3210 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3211 CI->getArgOperand(1));
3212 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3213 Name.starts_with("vcvtph2ps.")) {
3214 auto *DstTy = cast<FixedVectorType>(CI->getType());
3215 Rep = CI->getArgOperand(0);
3216 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3217 unsigned NumDstElts = DstTy->getNumElements();
3218 if (NumDstElts != SrcTy->getNumElements()) {
3219 assert(NumDstElts == 4 && "Unexpected vector size");
3220 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3221 }
3222 Rep = Builder.CreateBitCast(
3223 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3224 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3225 if (CI->arg_size() >= 3)
3226 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3227 CI->getArgOperand(1));
3228 } else if (Name.starts_with("avx512.mask.load")) {
3229 // "avx512.mask.loadu." or "avx512.mask.load."
3230 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3231 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3232 CI->getArgOperand(2), Aligned);
3233 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3234 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3235 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3236 ResultTy->getNumElements());
3237
3238 Rep = Builder.CreateIntrinsic(
3239 Intrinsic::masked_expandload, ResultTy,
3240 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3241 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3242 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3243 Value *MaskVec =
3244 getX86MaskVec(Builder, CI->getArgOperand(2),
3245 cast<FixedVectorType>(ResultTy)->getNumElements());
3246
3247 Rep = Builder.CreateIntrinsic(
3248 Intrinsic::masked_compressstore, ResultTy,
3249 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3250 } else if (Name.starts_with("avx512.mask.compress.") ||
3251 Name.starts_with("avx512.mask.expand.")) {
3252 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3253
3254 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3255 ResultTy->getNumElements());
3256
3257 bool IsCompress = Name[12] == 'c';
3258 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3259 : Intrinsic::x86_avx512_mask_expand;
3260 Rep = Builder.CreateIntrinsic(
3261 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3262 } else if (Name.starts_with("xop.vpcom")) {
3263 bool IsSigned;
3264 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3265 Name.ends_with("uq"))
3266 IsSigned = false;
3267 else if (Name.ends_with("b") || Name.ends_with("w") ||
3268 Name.ends_with("d") || Name.ends_with("q"))
3269 IsSigned = true;
3270 else
3271 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3272
3273 unsigned Imm;
3274 if (CI->arg_size() == 3) {
3275 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3276 } else {
3277 Name = Name.substr(9); // strip off "xop.vpcom"
3278 if (Name.starts_with("lt"))
3279 Imm = 0;
3280 else if (Name.starts_with("le"))
3281 Imm = 1;
3282 else if (Name.starts_with("gt"))
3283 Imm = 2;
3284 else if (Name.starts_with("ge"))
3285 Imm = 3;
3286 else if (Name.starts_with("eq"))
3287 Imm = 4;
3288 else if (Name.starts_with("ne"))
3289 Imm = 5;
3290 else if (Name.starts_with("false"))
3291 Imm = 6;
3292 else if (Name.starts_with("true"))
3293 Imm = 7;
3294 else
3295 llvm_unreachable("Unknown condition");
3296 }
3297
3298 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3299 } else if (Name.starts_with("xop.vpcmov")) {
3300 Value *Sel = CI->getArgOperand(2);
3301 Value *NotSel = Builder.CreateNot(Sel);
3302 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3303 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3304 Rep = Builder.CreateOr(Sel0, Sel1);
3305 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3306 Name.starts_with("avx512.mask.prol")) {
3307 Rep = upgradeX86Rotate(Builder, *CI, false);
3308 } else if (Name.starts_with("avx512.pror") ||
3309 Name.starts_with("avx512.mask.pror")) {
3310 Rep = upgradeX86Rotate(Builder, *CI, true);
3311 } else if (Name.starts_with("avx512.vpshld.") ||
3312 Name.starts_with("avx512.mask.vpshld") ||
3313 Name.starts_with("avx512.maskz.vpshld")) {
3314 bool ZeroMask = Name[11] == 'z';
3315 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3316 } else if (Name.starts_with("avx512.vpshrd.") ||
3317 Name.starts_with("avx512.mask.vpshrd") ||
3318 Name.starts_with("avx512.maskz.vpshrd")) {
3319 bool ZeroMask = Name[11] == 'z';
3320 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3321 } else if (Name == "sse42.crc32.64.8") {
3322 Value *Trunc0 =
3323 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3324 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3325 {Trunc0, CI->getArgOperand(1)});
3326 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3327 } else if (Name.starts_with("avx.vbroadcast.s") ||
3328 Name.starts_with("avx512.vbroadcast.s")) {
3329 // Replace broadcasts with a series of insertelements.
3330 auto *VecTy = cast<FixedVectorType>(CI->getType());
3331 Type *EltTy = VecTy->getElementType();
3332 unsigned EltNum = VecTy->getNumElements();
3333 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3334 Type *I32Ty = Type::getInt32Ty(C);
3335 Rep = PoisonValue::get(VecTy);
3336 for (unsigned I = 0; I < EltNum; ++I)
3337 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3338 } else if (Name.starts_with("sse41.pmovsx") ||
3339 Name.starts_with("sse41.pmovzx") ||
3340 Name.starts_with("avx2.pmovsx") ||
3341 Name.starts_with("avx2.pmovzx") ||
3342 Name.starts_with("avx512.mask.pmovsx") ||
3343 Name.starts_with("avx512.mask.pmovzx")) {
3344 auto *DstTy = cast<FixedVectorType>(CI->getType());
3345 unsigned NumDstElts = DstTy->getNumElements();
3346
3347 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3348 SmallVector<int, 8> ShuffleMask(NumDstElts);
3349 for (unsigned i = 0; i != NumDstElts; ++i)
3350 ShuffleMask[i] = i;
3351
3352 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3353
3354 bool DoSext = Name.contains("pmovsx");
3355 Rep =
3356 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3357 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3358 if (CI->arg_size() == 3)
3359 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3360 CI->getArgOperand(1));
3361 } else if (Name == "avx512.mask.pmov.qd.256" ||
3362 Name == "avx512.mask.pmov.qd.512" ||
3363 Name == "avx512.mask.pmov.wb.256" ||
3364 Name == "avx512.mask.pmov.wb.512") {
3365 Type *Ty = CI->getArgOperand(1)->getType();
3366 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3367 Rep =
3368 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3369 } else if (Name.starts_with("avx.vbroadcastf128") ||
3370 Name == "avx2.vbroadcasti128") {
3371 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3372 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3373 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3374 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3375 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3376 if (NumSrcElts == 2)
3377 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3378 else
3379 Rep = Builder.CreateShuffleVector(Load,
3380 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3381 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3382 Name.starts_with("avx512.mask.shuf.f")) {
3383 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3384 Type *VT = CI->getType();
3385 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3386 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3387 unsigned ControlBitsMask = NumLanes - 1;
3388 unsigned NumControlBits = NumLanes / 2;
3389 SmallVector<int, 8> ShuffleMask(0);
3390
3391 for (unsigned l = 0; l != NumLanes; ++l) {
3392 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3393 // We actually need the other source.
3394 if (l >= NumLanes / 2)
3395 LaneMask += NumLanes;
3396 for (unsigned i = 0; i != NumElementsInLane; ++i)
3397 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3398 }
3399 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3400 CI->getArgOperand(1), ShuffleMask);
3401 Rep =
3402 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3403 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3404 Name.starts_with("avx512.mask.broadcasti")) {
3405 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3406 ->getNumElements();
3407 unsigned NumDstElts =
3408 cast<FixedVectorType>(CI->getType())->getNumElements();
3409
3410 SmallVector<int, 8> ShuffleMask(NumDstElts);
3411 for (unsigned i = 0; i != NumDstElts; ++i)
3412 ShuffleMask[i] = i % NumSrcElts;
3413
3414 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3415 CI->getArgOperand(0), ShuffleMask);
3416 Rep =
3417 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3418 } else if (Name.starts_with("avx2.pbroadcast") ||
3419 Name.starts_with("avx2.vbroadcast") ||
3420 Name.starts_with("avx512.pbroadcast") ||
3421 Name.starts_with("avx512.mask.broadcast.s")) {
3422 // Replace vp?broadcasts with a vector shuffle.
3423 Value *Op = CI->getArgOperand(0);
3424 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3425 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3428 Rep = Builder.CreateShuffleVector(Op, M);
3429
3430 if (CI->arg_size() == 3)
3431 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3432 CI->getArgOperand(1));
3433 } else if (Name.starts_with("sse2.padds.") ||
3434 Name.starts_with("avx2.padds.") ||
3435 Name.starts_with("avx512.padds.") ||
3436 Name.starts_with("avx512.mask.padds.")) {
3437 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3438 } else if (Name.starts_with("sse2.psubs.") ||
3439 Name.starts_with("avx2.psubs.") ||
3440 Name.starts_with("avx512.psubs.") ||
3441 Name.starts_with("avx512.mask.psubs.")) {
3442 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3443 } else if (Name.starts_with("sse2.paddus.") ||
3444 Name.starts_with("avx2.paddus.") ||
3445 Name.starts_with("avx512.mask.paddus.")) {
3446 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3447 } else if (Name.starts_with("sse2.psubus.") ||
3448 Name.starts_with("avx2.psubus.") ||
3449 Name.starts_with("avx512.mask.psubus.")) {
3450 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3451 } else if (Name.starts_with("avx512.mask.palignr.")) {
3452 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3453 CI->getArgOperand(1), CI->getArgOperand(2),
3454 CI->getArgOperand(3), CI->getArgOperand(4),
3455 false);
3456 } else if (Name.starts_with("avx512.mask.valign.")) {
3458 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3459 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3460 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3461 // 128/256-bit shift left specified in bits.
3462 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3463 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3464 Shift / 8); // Shift is in bits.
3465 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3466 // 128/256-bit shift right specified in bits.
3467 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3468 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3469 Shift / 8); // Shift is in bits.
3470 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3471 Name == "avx512.psll.dq.512") {
3472 // 128/256/512-bit shift left specified in bytes.
3473 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3474 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3475 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3476 Name == "avx512.psrl.dq.512") {
3477 // 128/256/512-bit shift right specified in bytes.
3478 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3479 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3480 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3481 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3482 Name.starts_with("avx2.pblendd.")) {
3483 Value *Op0 = CI->getArgOperand(0);
3484 Value *Op1 = CI->getArgOperand(1);
3485 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3486 auto *VecTy = cast<FixedVectorType>(CI->getType());
3487 unsigned NumElts = VecTy->getNumElements();
3488
3489 SmallVector<int, 16> Idxs(NumElts);
3490 for (unsigned i = 0; i != NumElts; ++i)
3491 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3492
3493 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3494 } else if (Name.starts_with("avx.vinsertf128.") ||
3495 Name == "avx2.vinserti128" ||
3496 Name.starts_with("avx512.mask.insert")) {
3497 Value *Op0 = CI->getArgOperand(0);
3498 Value *Op1 = CI->getArgOperand(1);
3499 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3500 unsigned DstNumElts =
3501 cast<FixedVectorType>(CI->getType())->getNumElements();
3502 unsigned SrcNumElts =
3503 cast<FixedVectorType>(Op1->getType())->getNumElements();
3504 unsigned Scale = DstNumElts / SrcNumElts;
3505
3506 // Mask off the high bits of the immediate value; hardware ignores those.
3507 Imm = Imm % Scale;
3508
3509 // Extend the second operand into a vector the size of the destination.
3510 SmallVector<int, 8> Idxs(DstNumElts);
3511 for (unsigned i = 0; i != SrcNumElts; ++i)
3512 Idxs[i] = i;
3513 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3514 Idxs[i] = SrcNumElts;
3515 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3516
3517 // Insert the second operand into the first operand.
3518
3519 // Note that there is no guarantee that instruction lowering will actually
3520 // produce a vinsertf128 instruction for the created shuffles. In
3521 // particular, the 0 immediate case involves no lane changes, so it can
3522 // be handled as a blend.
3523
3524 // Example of shuffle mask for 32-bit elements:
3525 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3526 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3527
3528 // First fill with identify mask.
3529 for (unsigned i = 0; i != DstNumElts; ++i)
3530 Idxs[i] = i;
3531 // Then replace the elements where we need to insert.
3532 for (unsigned i = 0; i != SrcNumElts; ++i)
3533 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3534 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3535
3536 // If the intrinsic has a mask operand, handle that.
3537 if (CI->arg_size() == 5)
3538 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3539 CI->getArgOperand(3));
3540 } else if (Name.starts_with("avx.vextractf128.") ||
3541 Name == "avx2.vextracti128" ||
3542 Name.starts_with("avx512.mask.vextract")) {
3543 Value *Op0 = CI->getArgOperand(0);
3544 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3545 unsigned DstNumElts =
3546 cast<FixedVectorType>(CI->getType())->getNumElements();
3547 unsigned SrcNumElts =
3548 cast<FixedVectorType>(Op0->getType())->getNumElements();
3549 unsigned Scale = SrcNumElts / DstNumElts;
3550
3551 // Mask off the high bits of the immediate value; hardware ignores those.
3552 Imm = Imm % Scale;
3553
3554 // Get indexes for the subvector of the input vector.
3555 SmallVector<int, 8> Idxs(DstNumElts);
3556 for (unsigned i = 0; i != DstNumElts; ++i) {
3557 Idxs[i] = i + (Imm * DstNumElts);
3558 }
3559 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3560
3561 // If the intrinsic has a mask operand, handle that.
3562 if (CI->arg_size() == 4)
3563 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3564 CI->getArgOperand(2));
3565 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3566 Name.starts_with("avx512.mask.perm.di.")) {
3567 Value *Op0 = CI->getArgOperand(0);
3568 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3569 auto *VecTy = cast<FixedVectorType>(CI->getType());
3570 unsigned NumElts = VecTy->getNumElements();
3571
3572 SmallVector<int, 8> Idxs(NumElts);
3573 for (unsigned i = 0; i != NumElts; ++i)
3574 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3575
3576 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3577
3578 if (CI->arg_size() == 4)
3579 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3580 CI->getArgOperand(2));
3581 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3582 // The immediate permute control byte looks like this:
3583 // [1:0] - select 128 bits from sources for low half of destination
3584 // [2] - ignore
3585 // [3] - zero low half of destination
3586 // [5:4] - select 128 bits from sources for high half of destination
3587 // [6] - ignore
3588 // [7] - zero high half of destination
3589
3590 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3591
3592 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3593 unsigned HalfSize = NumElts / 2;
3594 SmallVector<int, 8> ShuffleMask(NumElts);
3595
3596 // Determine which operand(s) are actually in use for this instruction.
3597 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3598 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3599
3600 // If needed, replace operands based on zero mask.
3601 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3602 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3603
3604 // Permute low half of result.
3605 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3606 for (unsigned i = 0; i < HalfSize; ++i)
3607 ShuffleMask[i] = StartIndex + i;
3608
3609 // Permute high half of result.
3610 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3611 for (unsigned i = 0; i < HalfSize; ++i)
3612 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3613
3614 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3615
3616 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3617 Name.starts_with("avx512.mask.vpermil.p") ||
3618 Name.starts_with("avx512.mask.pshuf.d.")) {
3619 Value *Op0 = CI->getArgOperand(0);
3620 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3621 auto *VecTy = cast<FixedVectorType>(CI->getType());
3622 unsigned NumElts = VecTy->getNumElements();
3623 // Calculate the size of each index in the immediate.
3624 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3625 unsigned IdxMask = ((1 << IdxSize) - 1);
3626
3627 SmallVector<int, 8> Idxs(NumElts);
3628 // Lookup the bits for this element, wrapping around the immediate every
3629 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3630 // to offset by the first index of each group.
3631 for (unsigned i = 0; i != NumElts; ++i)
3632 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3633
3634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3635
3636 if (CI->arg_size() == 4)
3637 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3638 CI->getArgOperand(2));
3639 } else if (Name == "sse2.pshufl.w" ||
3640 Name.starts_with("avx512.mask.pshufl.w.")) {
3641 Value *Op0 = CI->getArgOperand(0);
3642 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3643 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3644
3645 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3646 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3647
3648 SmallVector<int, 16> Idxs(NumElts);
3649 for (unsigned l = 0; l != NumElts; l += 8) {
3650 for (unsigned i = 0; i != 4; ++i)
3651 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3652 for (unsigned i = 4; i != 8; ++i)
3653 Idxs[i + l] = i + l;
3654 }
3655
3656 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3657
3658 if (CI->arg_size() == 4)
3659 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3660 CI->getArgOperand(2));
3661 } else if (Name == "sse2.pshufh.w" ||
3662 Name.starts_with("avx512.mask.pshufh.w.")) {
3663 Value *Op0 = CI->getArgOperand(0);
3664 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3665 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3666
3667 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3668 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3669
3670 SmallVector<int, 16> Idxs(NumElts);
3671 for (unsigned l = 0; l != NumElts; l += 8) {
3672 for (unsigned i = 0; i != 4; ++i)
3673 Idxs[i + l] = i + l;
3674 for (unsigned i = 0; i != 4; ++i)
3675 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3676 }
3677
3678 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3679
3680 if (CI->arg_size() == 4)
3681 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3682 CI->getArgOperand(2));
3683 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3684 Value *Op0 = CI->getArgOperand(0);
3685 Value *Op1 = CI->getArgOperand(1);
3686 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3687 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3688
3689 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3690 unsigned HalfLaneElts = NumLaneElts / 2;
3691
3692 SmallVector<int, 16> Idxs(NumElts);
3693 for (unsigned i = 0; i != NumElts; ++i) {
3694 // Base index is the starting element of the lane.
3695 Idxs[i] = i - (i % NumLaneElts);
3696 // If we are half way through the lane switch to the other source.
3697 if ((i % NumLaneElts) >= HalfLaneElts)
3698 Idxs[i] += NumElts;
3699 // Now select the specific element. By adding HalfLaneElts bits from
3700 // the immediate. Wrapping around the immediate every 8-bits.
3701 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3702 }
3703
3704 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3705
3706 Rep =
3707 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3708 } else if (Name.starts_with("avx512.mask.movddup") ||
3709 Name.starts_with("avx512.mask.movshdup") ||
3710 Name.starts_with("avx512.mask.movsldup")) {
3711 Value *Op0 = CI->getArgOperand(0);
3712 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3713 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3714
3715 unsigned Offset = 0;
3716 if (Name.starts_with("avx512.mask.movshdup."))
3717 Offset = 1;
3718
3719 SmallVector<int, 16> Idxs(NumElts);
3720 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3721 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3722 Idxs[i + l + 0] = i + l + Offset;
3723 Idxs[i + l + 1] = i + l + Offset;
3724 }
3725
3726 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3727
3728 Rep =
3729 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3730 } else if (Name.starts_with("avx512.mask.punpckl") ||
3731 Name.starts_with("avx512.mask.unpckl.")) {
3732 Value *Op0 = CI->getArgOperand(0);
3733 Value *Op1 = CI->getArgOperand(1);
3734 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3735 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3736
3737 SmallVector<int, 64> Idxs(NumElts);
3738 for (int l = 0; l != NumElts; l += NumLaneElts)
3739 for (int i = 0; i != NumLaneElts; ++i)
3740 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3741
3742 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3743
3744 Rep =
3745 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3746 } else if (Name.starts_with("avx512.mask.punpckh") ||
3747 Name.starts_with("avx512.mask.unpckh.")) {
3748 Value *Op0 = CI->getArgOperand(0);
3749 Value *Op1 = CI->getArgOperand(1);
3750 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3751 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3752
3753 SmallVector<int, 64> Idxs(NumElts);
3754 for (int l = 0; l != NumElts; l += NumLaneElts)
3755 for (int i = 0; i != NumLaneElts; ++i)
3756 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3757
3758 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3759
3760 Rep =
3761 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3762 } else if (Name.starts_with("avx512.mask.and.") ||
3763 Name.starts_with("avx512.mask.pand.")) {
3764 VectorType *FTy = cast<VectorType>(CI->getType());
3766 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3767 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3768 Rep = Builder.CreateBitCast(Rep, FTy);
3769 Rep =
3770 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3771 } else if (Name.starts_with("avx512.mask.andn.") ||
3772 Name.starts_with("avx512.mask.pandn.")) {
3773 VectorType *FTy = cast<VectorType>(CI->getType());
3775 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3776 Rep = Builder.CreateAnd(Rep,
3777 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3778 Rep = Builder.CreateBitCast(Rep, FTy);
3779 Rep =
3780 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3781 } else if (Name.starts_with("avx512.mask.or.") ||
3782 Name.starts_with("avx512.mask.por.")) {
3783 VectorType *FTy = cast<VectorType>(CI->getType());
3785 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3786 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3787 Rep = Builder.CreateBitCast(Rep, FTy);
3788 Rep =
3789 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3790 } else if (Name.starts_with("avx512.mask.xor.") ||
3791 Name.starts_with("avx512.mask.pxor.")) {
3792 VectorType *FTy = cast<VectorType>(CI->getType());
3794 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3795 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3796 Rep = Builder.CreateBitCast(Rep, FTy);
3797 Rep =
3798 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3799 } else if (Name.starts_with("avx512.mask.padd.")) {
3800 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3801 Rep =
3802 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3803 } else if (Name.starts_with("avx512.mask.psub.")) {
3804 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3805 Rep =
3806 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3807 } else if (Name.starts_with("avx512.mask.pmull.")) {
3808 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3809 Rep =
3810 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3811 } else if (Name.starts_with("avx512.mask.add.p")) {
3812 if (Name.ends_with(".512")) {
3813 Intrinsic::ID IID;
3814 if (Name[17] == 's')
3815 IID = Intrinsic::x86_avx512_add_ps_512;
3816 else
3817 IID = Intrinsic::x86_avx512_add_pd_512;
3818
3819 Rep = Builder.CreateIntrinsic(
3820 IID,
3821 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3822 } else {
3823 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3824 }
3825 Rep =
3826 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3827 } else if (Name.starts_with("avx512.mask.div.p")) {
3828 if (Name.ends_with(".512")) {
3829 Intrinsic::ID IID;
3830 if (Name[17] == 's')
3831 IID = Intrinsic::x86_avx512_div_ps_512;
3832 else
3833 IID = Intrinsic::x86_avx512_div_pd_512;
3834
3835 Rep = Builder.CreateIntrinsic(
3836 IID,
3837 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3838 } else {
3839 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3840 }
3841 Rep =
3842 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3843 } else if (Name.starts_with("avx512.mask.mul.p")) {
3844 if (Name.ends_with(".512")) {
3845 Intrinsic::ID IID;
3846 if (Name[17] == 's')
3847 IID = Intrinsic::x86_avx512_mul_ps_512;
3848 else
3849 IID = Intrinsic::x86_avx512_mul_pd_512;
3850
3851 Rep = Builder.CreateIntrinsic(
3852 IID,
3853 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3854 } else {
3855 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3856 }
3857 Rep =
3858 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3859 } else if (Name.starts_with("avx512.mask.sub.p")) {
3860 if (Name.ends_with(".512")) {
3861 Intrinsic::ID IID;
3862 if (Name[17] == 's')
3863 IID = Intrinsic::x86_avx512_sub_ps_512;
3864 else
3865 IID = Intrinsic::x86_avx512_sub_pd_512;
3866
3867 Rep = Builder.CreateIntrinsic(
3868 IID,
3869 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3870 } else {
3871 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3872 }
3873 Rep =
3874 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3875 } else if ((Name.starts_with("avx512.mask.max.p") ||
3876 Name.starts_with("avx512.mask.min.p")) &&
3877 Name.drop_front(18) == ".512") {
3878 bool IsDouble = Name[17] == 'd';
3879 bool IsMin = Name[13] == 'i';
3880 static const Intrinsic::ID MinMaxTbl[2][2] = {
3881 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3882 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3883 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3884
3885 Rep = Builder.CreateIntrinsic(
3886 IID,
3887 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3888 Rep =
3889 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3890 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3891 Rep =
3892 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3893 {CI->getArgOperand(0), Builder.getInt1(false)});
3894 Rep =
3895 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3896 } else if (Name.starts_with("avx512.mask.psll")) {
3897 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3898 bool IsVariable = Name[16] == 'v';
3899 char Size = Name[16] == '.' ? Name[17]
3900 : Name[17] == '.' ? Name[18]
3901 : Name[18] == '.' ? Name[19]
3902 : Name[20];
3903
3904 Intrinsic::ID IID;
3905 if (IsVariable && Name[17] != '.') {
3906 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3907 IID = Intrinsic::x86_avx2_psllv_q;
3908 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3909 IID = Intrinsic::x86_avx2_psllv_q_256;
3910 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3911 IID = Intrinsic::x86_avx2_psllv_d;
3912 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3913 IID = Intrinsic::x86_avx2_psllv_d_256;
3914 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3915 IID = Intrinsic::x86_avx512_psllv_w_128;
3916 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3917 IID = Intrinsic::x86_avx512_psllv_w_256;
3918 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3919 IID = Intrinsic::x86_avx512_psllv_w_512;
3920 else
3921 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3922 } else if (Name.ends_with(".128")) {
3923 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3924 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3925 : Intrinsic::x86_sse2_psll_d;
3926 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3927 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3928 : Intrinsic::x86_sse2_psll_q;
3929 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3930 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3931 : Intrinsic::x86_sse2_psll_w;
3932 else
3933 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3934 } else if (Name.ends_with(".256")) {
3935 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3936 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3937 : Intrinsic::x86_avx2_psll_d;
3938 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3939 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3940 : Intrinsic::x86_avx2_psll_q;
3941 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3942 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3943 : Intrinsic::x86_avx2_psll_w;
3944 else
3945 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3946 } else {
3947 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3948 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3949 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3950 : Intrinsic::x86_avx512_psll_d_512;
3951 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3952 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3953 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3954 : Intrinsic::x86_avx512_psll_q_512;
3955 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3956 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3957 : Intrinsic::x86_avx512_psll_w_512;
3958 else
3959 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3960 }
3961
3962 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3963 } else if (Name.starts_with("avx512.mask.psrl")) {
3964 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3965 bool IsVariable = Name[16] == 'v';
3966 char Size = Name[16] == '.' ? Name[17]
3967 : Name[17] == '.' ? Name[18]
3968 : Name[18] == '.' ? Name[19]
3969 : Name[20];
3970
3971 Intrinsic::ID IID;
3972 if (IsVariable && Name[17] != '.') {
3973 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3974 IID = Intrinsic::x86_avx2_psrlv_q;
3975 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3976 IID = Intrinsic::x86_avx2_psrlv_q_256;
3977 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3978 IID = Intrinsic::x86_avx2_psrlv_d;
3979 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3980 IID = Intrinsic::x86_avx2_psrlv_d_256;
3981 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3982 IID = Intrinsic::x86_avx512_psrlv_w_128;
3983 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3984 IID = Intrinsic::x86_avx512_psrlv_w_256;
3985 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3986 IID = Intrinsic::x86_avx512_psrlv_w_512;
3987 else
3988 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3989 } else if (Name.ends_with(".128")) {
3990 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3991 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3992 : Intrinsic::x86_sse2_psrl_d;
3993 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3994 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3995 : Intrinsic::x86_sse2_psrl_q;
3996 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3997 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3998 : Intrinsic::x86_sse2_psrl_w;
3999 else
4000 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4001 } else if (Name.ends_with(".256")) {
4002 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4003 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4004 : Intrinsic::x86_avx2_psrl_d;
4005 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4006 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4007 : Intrinsic::x86_avx2_psrl_q;
4008 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4009 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4010 : Intrinsic::x86_avx2_psrl_w;
4011 else
4012 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4013 } else {
4014 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4015 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4016 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4017 : Intrinsic::x86_avx512_psrl_d_512;
4018 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4019 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4020 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4021 : Intrinsic::x86_avx512_psrl_q_512;
4022 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4023 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4024 : Intrinsic::x86_avx512_psrl_w_512;
4025 else
4026 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4027 }
4028
4029 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4030 } else if (Name.starts_with("avx512.mask.psra")) {
4031 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4032 bool IsVariable = Name[16] == 'v';
4033 char Size = Name[16] == '.' ? Name[17]
4034 : Name[17] == '.' ? Name[18]
4035 : Name[18] == '.' ? Name[19]
4036 : Name[20];
4037
4038 Intrinsic::ID IID;
4039 if (IsVariable && Name[17] != '.') {
4040 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4041 IID = Intrinsic::x86_avx2_psrav_d;
4042 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4043 IID = Intrinsic::x86_avx2_psrav_d_256;
4044 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4045 IID = Intrinsic::x86_avx512_psrav_w_128;
4046 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4047 IID = Intrinsic::x86_avx512_psrav_w_256;
4048 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4049 IID = Intrinsic::x86_avx512_psrav_w_512;
4050 else
4051 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4052 } else if (Name.ends_with(".128")) {
4053 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4054 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4055 : Intrinsic::x86_sse2_psra_d;
4056 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4057 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4058 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4059 : Intrinsic::x86_avx512_psra_q_128;
4060 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4061 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4062 : Intrinsic::x86_sse2_psra_w;
4063 else
4064 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4065 } else if (Name.ends_with(".256")) {
4066 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4067 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4068 : Intrinsic::x86_avx2_psra_d;
4069 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4070 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4071 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4072 : Intrinsic::x86_avx512_psra_q_256;
4073 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4074 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4075 : Intrinsic::x86_avx2_psra_w;
4076 else
4077 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4078 } else {
4079 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4080 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4081 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4082 : Intrinsic::x86_avx512_psra_d_512;
4083 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4084 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4085 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4086 : Intrinsic::x86_avx512_psra_q_512;
4087 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4088 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4089 : Intrinsic::x86_avx512_psra_w_512;
4090 else
4091 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4092 }
4093
4094 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4095 } else if (Name.starts_with("avx512.mask.move.s")) {
4096 Rep = upgradeMaskedMove(Builder, *CI);
4097 } else if (Name.starts_with("avx512.cvtmask2")) {
4098 Rep = upgradeMaskToInt(Builder, *CI);
4099 } else if (Name.ends_with(".movntdqa")) {
4101 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4102
4103 LoadInst *LI = Builder.CreateAlignedLoad(
4104 CI->getType(), CI->getArgOperand(0),
4106 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4107 Rep = LI;
4108 } else if (Name.starts_with("fma.vfmadd.") ||
4109 Name.starts_with("fma.vfmsub.") ||
4110 Name.starts_with("fma.vfnmadd.") ||
4111 Name.starts_with("fma.vfnmsub.")) {
4112 bool NegMul = Name[6] == 'n';
4113 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4114 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4115
4116 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4117 CI->getArgOperand(2)};
4118
4119 if (IsScalar) {
4120 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4121 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4122 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4123 }
4124
4125 if (NegMul && !IsScalar)
4126 Ops[0] = Builder.CreateFNeg(Ops[0]);
4127 if (NegMul && IsScalar)
4128 Ops[1] = Builder.CreateFNeg(Ops[1]);
4129 if (NegAcc)
4130 Ops[2] = Builder.CreateFNeg(Ops[2]);
4131
4132 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4133
4134 if (IsScalar)
4135 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4136 } else if (Name.starts_with("fma4.vfmadd.s")) {
4137 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4138 CI->getArgOperand(2)};
4139
4140 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4141 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4142 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4143
4144 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4145
4146 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4147 Rep, (uint64_t)0);
4148 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4149 Name.starts_with("avx512.maskz.vfmadd.s") ||
4150 Name.starts_with("avx512.mask3.vfmadd.s") ||
4151 Name.starts_with("avx512.mask3.vfmsub.s") ||
4152 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4153 bool IsMask3 = Name[11] == '3';
4154 bool IsMaskZ = Name[11] == 'z';
4155 // Drop the "avx512.mask." to make it easier.
4156 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4157 bool NegMul = Name[2] == 'n';
4158 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4159
4160 Value *A = CI->getArgOperand(0);
4161 Value *B = CI->getArgOperand(1);
4162 Value *C = CI->getArgOperand(2);
4163
4164 if (NegMul && (IsMask3 || IsMaskZ))
4165 A = Builder.CreateFNeg(A);
4166 if (NegMul && !(IsMask3 || IsMaskZ))
4167 B = Builder.CreateFNeg(B);
4168 if (NegAcc)
4169 C = Builder.CreateFNeg(C);
4170
4171 A = Builder.CreateExtractElement(A, (uint64_t)0);
4172 B = Builder.CreateExtractElement(B, (uint64_t)0);
4173 C = Builder.CreateExtractElement(C, (uint64_t)0);
4174
4175 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4176 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4177 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4178
4179 Intrinsic::ID IID;
4180 if (Name.back() == 'd')
4181 IID = Intrinsic::x86_avx512_vfmadd_f64;
4182 else
4183 IID = Intrinsic::x86_avx512_vfmadd_f32;
4184 Rep = Builder.CreateIntrinsic(IID, Ops);
4185 } else {
4186 Rep = Builder.CreateFMA(A, B, C);
4187 }
4188
4189 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4190 : IsMask3 ? C
4191 : A;
4192
4193 // For Mask3 with NegAcc, we need to create a new extractelement that
4194 // avoids the negation above.
4195 if (NegAcc && IsMask3)
4196 PassThru =
4197 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4198
4199 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4200 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4201 (uint64_t)0);
4202 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4203 Name.starts_with("avx512.mask.vfnmadd.p") ||
4204 Name.starts_with("avx512.mask.vfnmsub.p") ||
4205 Name.starts_with("avx512.mask3.vfmadd.p") ||
4206 Name.starts_with("avx512.mask3.vfmsub.p") ||
4207 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4208 Name.starts_with("avx512.maskz.vfmadd.p")) {
4209 bool IsMask3 = Name[11] == '3';
4210 bool IsMaskZ = Name[11] == 'z';
4211 // Drop the "avx512.mask." to make it easier.
4212 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4213 bool NegMul = Name[2] == 'n';
4214 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4215
4216 Value *A = CI->getArgOperand(0);
4217 Value *B = CI->getArgOperand(1);
4218 Value *C = CI->getArgOperand(2);
4219
4220 if (NegMul && (IsMask3 || IsMaskZ))
4221 A = Builder.CreateFNeg(A);
4222 if (NegMul && !(IsMask3 || IsMaskZ))
4223 B = Builder.CreateFNeg(B);
4224 if (NegAcc)
4225 C = Builder.CreateFNeg(C);
4226
4227 if (CI->arg_size() == 5 &&
4228 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4229 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4230 Intrinsic::ID IID;
4231 // Check the character before ".512" in string.
4232 if (Name[Name.size() - 5] == 's')
4233 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4234 else
4235 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4236
4237 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4238 } else {
4239 Rep = Builder.CreateFMA(A, B, C);
4240 }
4241
4242 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4243 : IsMask3 ? CI->getArgOperand(2)
4244 : CI->getArgOperand(0);
4245
4246 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4247 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4248 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4249 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4250 Intrinsic::ID IID;
4251 if (VecWidth == 128 && EltWidth == 32)
4252 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4253 else if (VecWidth == 256 && EltWidth == 32)
4254 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4255 else if (VecWidth == 128 && EltWidth == 64)
4256 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4257 else if (VecWidth == 256 && EltWidth == 64)
4258 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4259 else
4260 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4261
4262 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4263 CI->getArgOperand(2)};
4264 Ops[2] = Builder.CreateFNeg(Ops[2]);
4265 Rep = Builder.CreateIntrinsic(IID, Ops);
4266 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4267 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4268 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4269 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4270 bool IsMask3 = Name[11] == '3';
4271 bool IsMaskZ = Name[11] == 'z';
4272 // Drop the "avx512.mask." to make it easier.
4273 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4274 bool IsSubAdd = Name[3] == 's';
4275 if (CI->arg_size() == 5) {
4276 Intrinsic::ID IID;
4277 // Check the character before ".512" in string.
4278 if (Name[Name.size() - 5] == 's')
4279 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4280 else
4281 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4282
4283 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4284 CI->getArgOperand(2), CI->getArgOperand(4)};
4285 if (IsSubAdd)
4286 Ops[2] = Builder.CreateFNeg(Ops[2]);
4287
4288 Rep = Builder.CreateIntrinsic(IID, Ops);
4289 } else {
4290 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4291
4292 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4293 CI->getArgOperand(2)};
4294
4296 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4297 Value *Odd = Builder.CreateCall(FMA, Ops);
4298 Ops[2] = Builder.CreateFNeg(Ops[2]);
4299 Value *Even = Builder.CreateCall(FMA, Ops);
4300
4301 if (IsSubAdd)
4302 std::swap(Even, Odd);
4303
4304 SmallVector<int, 32> Idxs(NumElts);
4305 for (int i = 0; i != NumElts; ++i)
4306 Idxs[i] = i + (i % 2) * NumElts;
4307
4308 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4309 }
4310
4311 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4312 : IsMask3 ? CI->getArgOperand(2)
4313 : CI->getArgOperand(0);
4314
4315 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4316 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4317 Name.starts_with("avx512.maskz.pternlog.")) {
4318 bool ZeroMask = Name[11] == 'z';
4319 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4320 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4321 Intrinsic::ID IID;
4322 if (VecWidth == 128 && EltWidth == 32)
4323 IID = Intrinsic::x86_avx512_pternlog_d_128;
4324 else if (VecWidth == 256 && EltWidth == 32)
4325 IID = Intrinsic::x86_avx512_pternlog_d_256;
4326 else if (VecWidth == 512 && EltWidth == 32)
4327 IID = Intrinsic::x86_avx512_pternlog_d_512;
4328 else if (VecWidth == 128 && EltWidth == 64)
4329 IID = Intrinsic::x86_avx512_pternlog_q_128;
4330 else if (VecWidth == 256 && EltWidth == 64)
4331 IID = Intrinsic::x86_avx512_pternlog_q_256;
4332 else if (VecWidth == 512 && EltWidth == 64)
4333 IID = Intrinsic::x86_avx512_pternlog_q_512;
4334 else
4335 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4336
4337 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4338 CI->getArgOperand(2), CI->getArgOperand(3)};
4339 Rep = Builder.CreateIntrinsic(IID, Args);
4340 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4341 : CI->getArgOperand(0);
4342 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4343 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4344 Name.starts_with("avx512.maskz.vpmadd52")) {
4345 bool ZeroMask = Name[11] == 'z';
4346 bool High = Name[20] == 'h' || Name[21] == 'h';
4347 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4348 Intrinsic::ID IID;
4349 if (VecWidth == 128 && !High)
4350 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4351 else if (VecWidth == 256 && !High)
4352 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4353 else if (VecWidth == 512 && !High)
4354 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4355 else if (VecWidth == 128 && High)
4356 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4357 else if (VecWidth == 256 && High)
4358 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4359 else if (VecWidth == 512 && High)
4360 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4361 else
4362 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4363
4364 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4365 CI->getArgOperand(2)};
4366 Rep = Builder.CreateIntrinsic(IID, Args);
4367 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4368 : CI->getArgOperand(0);
4369 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4370 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4371 Name.starts_with("avx512.mask.vpermt2var.") ||
4372 Name.starts_with("avx512.maskz.vpermt2var.")) {
4373 bool ZeroMask = Name[11] == 'z';
4374 bool IndexForm = Name[17] == 'i';
4375 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4376 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4377 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4378 Name.starts_with("avx512.mask.vpdpbusds.") ||
4379 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4380 bool ZeroMask = Name[11] == 'z';
4381 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4382 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4383 Intrinsic::ID IID;
4384 if (VecWidth == 128 && !IsSaturating)
4385 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4386 else if (VecWidth == 256 && !IsSaturating)
4387 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4388 else if (VecWidth == 512 && !IsSaturating)
4389 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4390 else if (VecWidth == 128 && IsSaturating)
4391 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4392 else if (VecWidth == 256 && IsSaturating)
4393 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4394 else if (VecWidth == 512 && IsSaturating)
4395 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4396 else
4397 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4398
4399 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4400 CI->getArgOperand(2)};
4401
4402 // Input arguments types were incorrectly set to vectors of i32 before but
4403 // they should be vectors of i8. Insert bit cast when encountering the old
4404 // types
4405 if (Args[1]->getType()->isVectorTy() &&
4406 cast<VectorType>(Args[1]->getType())
4407 ->getElementType()
4408 ->isIntegerTy(32) &&
4409 Args[2]->getType()->isVectorTy() &&
4410 cast<VectorType>(Args[2]->getType())
4411 ->getElementType()
4412 ->isIntegerTy(32)) {
4413 Type *NewArgType = nullptr;
4414 if (VecWidth == 128)
4415 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4416 else if (VecWidth == 256)
4417 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4418 else if (VecWidth == 512)
4419 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4420 else
4421 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4422 CI);
4423
4424 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4425 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4426 }
4427
4428 Rep = Builder.CreateIntrinsic(IID, Args);
4429 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4430 : CI->getArgOperand(0);
4431 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4432 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4433 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4434 Name.starts_with("avx512.mask.vpdpwssds.") ||
4435 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4436 bool ZeroMask = Name[11] == 'z';
4437 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4438 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4439 Intrinsic::ID IID;
4440 if (VecWidth == 128 && !IsSaturating)
4441 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4442 else if (VecWidth == 256 && !IsSaturating)
4443 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4444 else if (VecWidth == 512 && !IsSaturating)
4445 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4446 else if (VecWidth == 128 && IsSaturating)
4447 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4448 else if (VecWidth == 256 && IsSaturating)
4449 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4450 else if (VecWidth == 512 && IsSaturating)
4451 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4452 else
4453 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4454
4455 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4456 CI->getArgOperand(2)};
4457
4458 // Input arguments types were incorrectly set to vectors of i32 before but
4459 // they should be vectors of i16. Insert bit cast when encountering the old
4460 // types
4461 if (Args[1]->getType()->isVectorTy() &&
4462 cast<VectorType>(Args[1]->getType())
4463 ->getElementType()
4464 ->isIntegerTy(32) &&
4465 Args[2]->getType()->isVectorTy() &&
4466 cast<VectorType>(Args[2]->getType())
4467 ->getElementType()
4468 ->isIntegerTy(32)) {
4469 Type *NewArgType = nullptr;
4470 if (VecWidth == 128)
4471 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4472 else if (VecWidth == 256)
4473 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4474 else if (VecWidth == 512)
4475 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4476 else
4477 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4478 CI);
4479
4480 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4481 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4482 }
4483
4484 Rep = Builder.CreateIntrinsic(IID, Args);
4485 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4486 : CI->getArgOperand(0);
4487 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4488 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4489 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4490 Name == "subborrow.u32" || Name == "subborrow.u64") {
4491 Intrinsic::ID IID;
4492 if (Name[0] == 'a' && Name.back() == '2')
4493 IID = Intrinsic::x86_addcarry_32;
4494 else if (Name[0] == 'a' && Name.back() == '4')
4495 IID = Intrinsic::x86_addcarry_64;
4496 else if (Name[0] == 's' && Name.back() == '2')
4497 IID = Intrinsic::x86_subborrow_32;
4498 else if (Name[0] == 's' && Name.back() == '4')
4499 IID = Intrinsic::x86_subborrow_64;
4500 else
4501 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4502
4503 // Make a call with 3 operands.
4504 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4505 CI->getArgOperand(2)};
4506 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4507
4508 // Extract the second result and store it.
4509 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4510 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4511 // Replace the original call result with the first result of the new call.
4512 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4513
4514 CI->replaceAllUsesWith(CF);
4515 Rep = nullptr;
4516 } else if (Name.starts_with("avx512.mask.") &&
4517 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4518 // Rep will be updated by the call in the condition.
4519 } else
4520 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4521
4522 return Rep;
4523}
4524
4526 Function *F, IRBuilder<> &Builder) {
4527 if (Name.starts_with("neon.bfcvt")) {
4528 if (Name.starts_with("neon.bfcvtn2")) {
4529 SmallVector<int, 32> LoMask(4);
4530 std::iota(LoMask.begin(), LoMask.end(), 0);
4531 SmallVector<int, 32> ConcatMask(8);
4532 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4533 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4534 Value *Trunc =
4535 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4536 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4537 } else if (Name.starts_with("neon.bfcvtn")) {
4538 SmallVector<int, 32> ConcatMask(8);
4539 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4540 Type *V4BF16 =
4541 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4542 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4543 dbgs() << "Trunc: " << *Trunc << "\n";
4544 return Builder.CreateShuffleVector(
4545 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4546 } else {
4547 return Builder.CreateFPTrunc(CI->getOperand(0),
4548 Type::getBFloatTy(F->getContext()));
4549 }
4550 } else if (Name.starts_with("sve.fcvt")) {
4551 Intrinsic::ID NewID =
4553 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4554 .Case("sve.fcvtnt.bf16f32",
4555 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4557 if (NewID == Intrinsic::not_intrinsic)
4558 llvm_unreachable("Unhandled Intrinsic!");
4559
4560 SmallVector<Value *, 3> Args(CI->args());
4561
4562 // The original intrinsics incorrectly used a predicate based on the
4563 // smallest element type rather than the largest.
4564 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4565 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4566
4567 if (Args[1]->getType() != BadPredTy)
4568 llvm_unreachable("Unexpected predicate type!");
4569
4570 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4571 BadPredTy, Args[1]);
4572 Args[1] = Builder.CreateIntrinsic(
4573 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4574
4575 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4576 CI->getName());
4577 }
4578
4579 llvm_unreachable("Unhandled Intrinsic!");
4580}
4581
4583 IRBuilder<> &Builder) {
4584 if (Name == "mve.vctp64.old") {
4585 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4586 // correct type.
4587 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4588 CI->getArgOperand(0),
4589 /*FMFSource=*/nullptr, CI->getName());
4590 Value *C1 = Builder.CreateIntrinsic(
4591 Intrinsic::arm_mve_pred_v2i,
4592 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4593 return Builder.CreateIntrinsic(
4594 Intrinsic::arm_mve_pred_i2v,
4595 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4596 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4597 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4598 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4599 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4600 Name ==
4601 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4602 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4603 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4604 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4605 Name ==
4606 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4607 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4608 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4609 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4610 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4611 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4612 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4613 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4614 std::vector<Type *> Tys;
4615 unsigned ID = CI->getIntrinsicID();
4616 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4617 switch (ID) {
4618 case Intrinsic::arm_mve_mull_int_predicated:
4619 case Intrinsic::arm_mve_vqdmull_predicated:
4620 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4621 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4622 break;
4623 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4624 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4625 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4626 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4627 V2I1Ty};
4628 break;
4629 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4630 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4631 CI->getOperand(1)->getType(), V2I1Ty};
4632 break;
4633 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4634 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4635 CI->getOperand(2)->getType(), V2I1Ty};
4636 break;
4637 case Intrinsic::arm_cde_vcx1q_predicated:
4638 case Intrinsic::arm_cde_vcx1qa_predicated:
4639 case Intrinsic::arm_cde_vcx2q_predicated:
4640 case Intrinsic::arm_cde_vcx2qa_predicated:
4641 case Intrinsic::arm_cde_vcx3q_predicated:
4642 case Intrinsic::arm_cde_vcx3qa_predicated:
4643 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4644 break;
4645 default:
4646 llvm_unreachable("Unhandled Intrinsic!");
4647 }
4648
4649 std::vector<Value *> Ops;
4650 for (Value *Op : CI->args()) {
4651 Type *Ty = Op->getType();
4652 if (Ty->getScalarSizeInBits() == 1) {
4653 Value *C1 = Builder.CreateIntrinsic(
4654 Intrinsic::arm_mve_pred_v2i,
4655 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4656 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4657 }
4658 Ops.push_back(Op);
4659 }
4660
4661 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4662 CI->getName());
4663 }
4664 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4665}
4666
4667// These are expected to have the arguments:
4668// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4669//
4670// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4671//
4673 Function *F, IRBuilder<> &Builder) {
4674 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4675 // for compatibility.
4676 auto UpgradeLegacyWMMAIUIntrinsicCall =
4677 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4678 ArrayRef<Type *> OverloadTys) -> Value * {
4679 // Prepare arguments, append clamp=0 for compatibility
4680 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4681 Args.push_back(Builder.getFalse());
4682
4683 // Insert the declaration for the right overload types
4685 F->getParent(), F->getIntrinsicID(), OverloadTys);
4686
4687 // Copy operand bundles if any
4689 CI->getOperandBundlesAsDefs(Bundles);
4690
4691 // Create the new call and copy calling properties
4692 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4693 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4694 NewCall->setCallingConv(CI->getCallingConv());
4695 NewCall->setAttributes(CI->getAttributes());
4696 NewCall->setDebugLoc(CI->getDebugLoc());
4697 NewCall->copyMetadata(*CI);
4698 return NewCall;
4699 };
4700
4701 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4702 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4703 "intrinsic should have 7 arguments");
4704 Type *T1 = CI->getArgOperand(4)->getType();
4705 Type *T2 = CI->getArgOperand(1)->getType();
4706 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4707 }
4708 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4709 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4710 "intrinsic should have 8 arguments");
4711 Type *T1 = CI->getArgOperand(4)->getType();
4712 Type *T2 = CI->getArgOperand(1)->getType();
4713 Type *T3 = CI->getArgOperand(3)->getType();
4714 Type *T4 = CI->getArgOperand(5)->getType();
4715 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4716 }
4717
4718 AtomicRMWInst::BinOp RMWOp =
4720 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4721 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4722 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4723 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4724 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4725 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4726 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4727 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4728 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4729 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4730 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4731 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4732 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4733
4734 unsigned NumOperands = CI->getNumOperands();
4735 if (NumOperands < 3) // Malformed bitcode.
4736 return nullptr;
4737
4738 Value *Ptr = CI->getArgOperand(0);
4739 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4740 if (!PtrTy) // Malformed.
4741 return nullptr;
4742
4743 Value *Val = CI->getArgOperand(1);
4744 if (Val->getType() != CI->getType()) // Malformed.
4745 return nullptr;
4746
4747 ConstantInt *OrderArg = nullptr;
4748 bool IsVolatile = false;
4749
4750 // These should have 5 arguments (plus the callee). A separate version of the
4751 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4752 if (NumOperands > 3)
4753 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4754
4755 // Ignore scope argument at 3
4756
4757 if (NumOperands > 5) {
4758 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4759 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4760 }
4761
4763 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4764 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4767
4768 LLVMContext &Ctx = F->getContext();
4769
4770 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4771 Type *RetTy = CI->getType();
4772 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4773 if (VT->getElementType()->isIntegerTy(16)) {
4774 VectorType *AsBF16 =
4775 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4776 Val = Builder.CreateBitCast(Val, AsBF16);
4777 }
4778 }
4779
4780 // The scope argument never really worked correctly. Use agent as the most
4781 // conservative option which should still always produce the instruction.
4782 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4783 AtomicRMWInst *RMW =
4784 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4785
4786 unsigned AddrSpace = PtrTy->getAddressSpace();
4787 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4788 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4789 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4790 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4791 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4792 }
4793
4794 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4795 MDBuilder MDB(F->getContext());
4796 MDNode *RangeNotPrivate =
4799 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4800 }
4801
4802 if (IsVolatile)
4803 RMW->setVolatile(true);
4804
4805 return Builder.CreateBitCast(RMW, RetTy);
4806}
4807
4808/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4809/// plain MDNode, as it's the verifier's job to check these are the correct
4810/// types later.
4811static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4812 if (Op < CI->arg_size()) {
4813 if (MetadataAsValue *MAV =
4815 Metadata *MD = MAV->getMetadata();
4816 return dyn_cast_if_present<MDNode>(MD);
4817 }
4818 }
4819 return nullptr;
4820}
4821
4822/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4823static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4824 if (Op < CI->arg_size())
4826 return MAV->getMetadata();
4827 return nullptr;
4828}
4829
4831 // The MDNode attached to this instruction might not be the correct type,
4832 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4833 return I->getDebugLoc().getAsMDNode();
4834}
4835
4836/// Convert debug intrinsic calls to non-instruction debug records.
4837/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4838/// \p CI - The debug intrinsic call.
4840 DbgRecord *DR = nullptr;
4841 if (Name == "label") {
4843 CI->getDebugLoc());
4844 } else if (Name == "assign") {
4847 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4848 unwrapMAVMetadataOp(CI, 4),
4849 /*The address is a Value ref, it will be stored as a Metadata */
4850 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4851 } else if (Name == "declare") {
4854 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4855 getDebugLocSafe(CI));
4856 } else if (Name == "addr") {
4857 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4858 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4859 // Don't try to add something to the expression if it's not an expression.
4860 // Instead, allow the verifier to fail later.
4861 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4862 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4863 }
4866 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4867 getDebugLocSafe(CI));
4868 } else if (Name == "value") {
4869 // An old version of dbg.value had an extra offset argument.
4870 unsigned VarOp = 1;
4871 unsigned ExprOp = 2;
4872 if (CI->arg_size() == 4) {
4874 // Nonzero offset dbg.values get dropped without a replacement.
4875 if (!Offset || !Offset->isNullValue())
4876 return;
4877 VarOp = 2;
4878 ExprOp = 3;
4879 }
4882 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4883 nullptr, getDebugLocSafe(CI));
4884 }
4885 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4886 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4887}
4888
4891 if (!Offset)
4892 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4893 int64_t OffsetVal = Offset->getSExtValue();
4894 return Builder.CreateIntrinsic(OffsetVal >= 0
4895 ? Intrinsic::vector_splice_left
4896 : Intrinsic::vector_splice_right,
4897 CI->getType(),
4898 {CI->getArgOperand(0), CI->getArgOperand(1),
4899 Builder.getInt32(std::abs(OffsetVal))});
4900}
4901
4903 Function *F, IRBuilder<> &Builder) {
4904 if (Name.starts_with("to.fp16")) {
4905 Value *Cast =
4906 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4907 return Builder.CreateBitCast(Cast, CI->getType());
4908 }
4909
4910 if (Name.starts_with("from.fp16")) {
4911 Value *Cast =
4912 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4913 return Builder.CreateFPExt(Cast, CI->getType());
4914 }
4915
4916 return nullptr;
4917}
4918
4919/// Upgrade a call to an old intrinsic. All argument and return casting must be
4920/// provided to seamlessly integrate with existing context.
4922 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4923 // checks the callee's function type matches. It's likely we need to handle
4924 // type changes here.
4926 if (!F)
4927 return;
4928
4929 LLVMContext &C = CI->getContext();
4930 IRBuilder<> Builder(C);
4931 if (isa<FPMathOperator>(CI))
4932 Builder.setFastMathFlags(CI->getFastMathFlags());
4933 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4934
4935 if (!NewFn) {
4936 // Get the Function's name.
4937 StringRef Name = F->getName();
4938 if (!Name.consume_front("llvm."))
4939 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4940
4941 bool IsX86 = Name.consume_front("x86.");
4942 bool IsNVVM = Name.consume_front("nvvm.");
4943 bool IsAArch64 = Name.consume_front("aarch64.");
4944 bool IsARM = Name.consume_front("arm.");
4945 bool IsAMDGCN = Name.consume_front("amdgcn.");
4946 bool IsDbg = Name.consume_front("dbg.");
4947 bool IsOldSplice =
4948 (Name.consume_front("experimental.vector.splice") ||
4949 Name.consume_front("vector.splice")) &&
4950 !(Name.starts_with(".left") || Name.starts_with(".right"));
4951 Value *Rep = nullptr;
4952
4953 if (!IsX86 && Name == "stackprotectorcheck") {
4954 Rep = nullptr;
4955 } else if (IsNVVM) {
4956 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4957 } else if (IsX86) {
4958 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4959 } else if (IsAArch64) {
4960 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4961 } else if (IsARM) {
4962 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4963 } else if (IsAMDGCN) {
4964 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4965 } else if (IsDbg) {
4967 } else if (IsOldSplice) {
4968 Rep = upgradeVectorSplice(CI, Builder);
4969 } else if (Name.consume_front("convert.")) {
4970 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
4971 } else {
4972 llvm_unreachable("Unknown function for CallBase upgrade.");
4973 }
4974
4975 if (Rep)
4976 CI->replaceAllUsesWith(Rep);
4977 CI->eraseFromParent();
4978 return;
4979 }
4980
4981 const auto &DefaultCase = [&]() -> void {
4982 if (F == NewFn)
4983 return;
4984
4985 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4986 // Handle generic mangling change.
4987 assert(
4988 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4989 "Unknown function for CallBase upgrade and isn't just a name change");
4990 CI->setCalledFunction(NewFn);
4991 return;
4992 }
4993
4994 // This must be an upgrade from a named to a literal struct.
4995 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4996 assert(OldST != NewFn->getReturnType() &&
4997 "Return type must have changed");
4998 assert(OldST->getNumElements() ==
4999 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5000 "Must have same number of elements");
5001
5002 SmallVector<Value *> Args(CI->args());
5003 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5004 NewCI->setAttributes(CI->getAttributes());
5005 Value *Res = PoisonValue::get(OldST);
5006 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5007 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5008 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5009 }
5010 CI->replaceAllUsesWith(Res);
5011 CI->eraseFromParent();
5012 return;
5013 }
5014
5015 // We're probably about to produce something invalid. Let the verifier catch
5016 // it instead of dying here.
5017 CI->setCalledOperand(
5019 return;
5020 };
5021 CallInst *NewCall = nullptr;
5022 switch (NewFn->getIntrinsicID()) {
5023 default: {
5024 DefaultCase();
5025 return;
5026 }
5027 case Intrinsic::arm_neon_vst1:
5028 case Intrinsic::arm_neon_vst2:
5029 case Intrinsic::arm_neon_vst3:
5030 case Intrinsic::arm_neon_vst4:
5031 case Intrinsic::arm_neon_vst2lane:
5032 case Intrinsic::arm_neon_vst3lane:
5033 case Intrinsic::arm_neon_vst4lane: {
5034 SmallVector<Value *, 4> Args(CI->args());
5035 NewCall = Builder.CreateCall(NewFn, Args);
5036 break;
5037 }
5038 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5039 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5040 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5041 LLVMContext &Ctx = F->getParent()->getContext();
5042 SmallVector<Value *, 4> Args(CI->args());
5043 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5044 cast<ConstantInt>(Args[3])->getZExtValue());
5045 NewCall = Builder.CreateCall(NewFn, Args);
5046 break;
5047 }
5048 case Intrinsic::aarch64_sve_ld3_sret:
5049 case Intrinsic::aarch64_sve_ld4_sret:
5050 case Intrinsic::aarch64_sve_ld2_sret: {
5051 StringRef Name = F->getName();
5052 Name = Name.substr(5);
5053 unsigned N = StringSwitch<unsigned>(Name)
5054 .StartsWith("aarch64.sve.ld2", 2)
5055 .StartsWith("aarch64.sve.ld3", 3)
5056 .StartsWith("aarch64.sve.ld4", 4)
5057 .Default(0);
5058 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5059 unsigned MinElts = RetTy->getMinNumElements() / N;
5060 SmallVector<Value *, 2> Args(CI->args());
5061 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5062 Value *Ret = llvm::PoisonValue::get(RetTy);
5063 for (unsigned I = 0; I < N; I++) {
5064 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5065 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5066 }
5067 NewCall = dyn_cast<CallInst>(Ret);
5068 break;
5069 }
5070
5071 case Intrinsic::coro_end: {
5072 SmallVector<Value *, 3> Args(CI->args());
5073 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5074 NewCall = Builder.CreateCall(NewFn, Args);
5075 break;
5076 }
5077
5078 case Intrinsic::vector_extract: {
5079 StringRef Name = F->getName();
5080 Name = Name.substr(5); // Strip llvm
5081 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5082 DefaultCase();
5083 return;
5084 }
5085 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5086 unsigned MinElts = RetTy->getMinNumElements();
5087 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5088 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5089 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5090 break;
5091 }
5092
5093 case Intrinsic::vector_insert: {
5094 StringRef Name = F->getName();
5095 Name = Name.substr(5);
5096 if (!Name.starts_with("aarch64.sve.tuple")) {
5097 DefaultCase();
5098 return;
5099 }
5100 if (Name.starts_with("aarch64.sve.tuple.set")) {
5101 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5102 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5103 Value *NewIdx =
5104 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5105 NewCall = Builder.CreateCall(
5106 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5107 break;
5108 }
5109 if (Name.starts_with("aarch64.sve.tuple.create")) {
5110 unsigned N = StringSwitch<unsigned>(Name)
5111 .StartsWith("aarch64.sve.tuple.create2", 2)
5112 .StartsWith("aarch64.sve.tuple.create3", 3)
5113 .StartsWith("aarch64.sve.tuple.create4", 4)
5114 .Default(0);
5115 assert(N > 1 && "Create is expected to be between 2-4");
5116 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5117 Value *Ret = llvm::PoisonValue::get(RetTy);
5118 unsigned MinElts = RetTy->getMinNumElements() / N;
5119 for (unsigned I = 0; I < N; I++) {
5120 Value *V = CI->getArgOperand(I);
5121 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5122 }
5123 NewCall = dyn_cast<CallInst>(Ret);
5124 }
5125 break;
5126 }
5127
5128 case Intrinsic::arm_neon_bfdot:
5129 case Intrinsic::arm_neon_bfmmla:
5130 case Intrinsic::arm_neon_bfmlalb:
5131 case Intrinsic::arm_neon_bfmlalt:
5132 case Intrinsic::aarch64_neon_bfdot:
5133 case Intrinsic::aarch64_neon_bfmmla:
5134 case Intrinsic::aarch64_neon_bfmlalb:
5135 case Intrinsic::aarch64_neon_bfmlalt: {
5137 assert(CI->arg_size() == 3 &&
5138 "Mismatch between function args and call args");
5139 size_t OperandWidth =
5141 assert((OperandWidth == 64 || OperandWidth == 128) &&
5142 "Unexpected operand width");
5143 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5144 auto Iter = CI->args().begin();
5145 Args.push_back(*Iter++);
5146 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5147 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5148 NewCall = Builder.CreateCall(NewFn, Args);
5149 break;
5150 }
5151
5152 case Intrinsic::bitreverse:
5153 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5154 break;
5155
5156 case Intrinsic::ctlz:
5157 case Intrinsic::cttz: {
5158 if (CI->arg_size() != 1) {
5159 DefaultCase();
5160 return;
5161 }
5162
5163 NewCall =
5164 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5165 break;
5166 }
5167
5168 case Intrinsic::objectsize: {
5169 Value *NullIsUnknownSize =
5170 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5171 Value *Dynamic =
5172 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5173 NewCall = Builder.CreateCall(
5174 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5175 break;
5176 }
5177
5178 case Intrinsic::ctpop:
5179 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5180 break;
5181 case Intrinsic::dbg_value: {
5182 StringRef Name = F->getName();
5183 Name = Name.substr(5); // Strip llvm.
5184 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5185 if (Name.starts_with("dbg.addr")) {
5187 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5188 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5189 NewCall =
5190 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5191 MetadataAsValue::get(C, Expr)});
5192 break;
5193 }
5194
5195 // Upgrade from the old version that had an extra offset argument.
5196 assert(CI->arg_size() == 4);
5197 // Drop nonzero offsets instead of attempting to upgrade them.
5199 if (Offset->isNullValue()) {
5200 NewCall = Builder.CreateCall(
5201 NewFn,
5202 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5203 break;
5204 }
5205 CI->eraseFromParent();
5206 return;
5207 }
5208
5209 case Intrinsic::ptr_annotation:
5210 // Upgrade from versions that lacked the annotation attribute argument.
5211 if (CI->arg_size() != 4) {
5212 DefaultCase();
5213 return;
5214 }
5215
5216 // Create a new call with an added null annotation attribute argument.
5217 NewCall = Builder.CreateCall(
5218 NewFn,
5219 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5220 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5221 NewCall->takeName(CI);
5222 CI->replaceAllUsesWith(NewCall);
5223 CI->eraseFromParent();
5224 return;
5225
5226 case Intrinsic::var_annotation:
5227 // Upgrade from versions that lacked the annotation attribute argument.
5228 if (CI->arg_size() != 4) {
5229 DefaultCase();
5230 return;
5231 }
5232 // Create a new call with an added null annotation attribute argument.
5233 NewCall = Builder.CreateCall(
5234 NewFn,
5235 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5236 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5237 NewCall->takeName(CI);
5238 CI->replaceAllUsesWith(NewCall);
5239 CI->eraseFromParent();
5240 return;
5241
5242 case Intrinsic::riscv_aes32dsi:
5243 case Intrinsic::riscv_aes32dsmi:
5244 case Intrinsic::riscv_aes32esi:
5245 case Intrinsic::riscv_aes32esmi:
5246 case Intrinsic::riscv_sm4ks:
5247 case Intrinsic::riscv_sm4ed: {
5248 // The last argument to these intrinsics used to be i8 and changed to i32.
5249 // The type overload for sm4ks and sm4ed was removed.
5250 Value *Arg2 = CI->getArgOperand(2);
5251 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5252 return;
5253
5254 Value *Arg0 = CI->getArgOperand(0);
5255 Value *Arg1 = CI->getArgOperand(1);
5256 if (CI->getType()->isIntegerTy(64)) {
5257 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5258 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5259 }
5260
5261 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5262 cast<ConstantInt>(Arg2)->getZExtValue());
5263
5264 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5265 Value *Res = NewCall;
5266 if (Res->getType() != CI->getType())
5267 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5268 NewCall->takeName(CI);
5269 CI->replaceAllUsesWith(Res);
5270 CI->eraseFromParent();
5271 return;
5272 }
5273 case Intrinsic::nvvm_mapa_shared_cluster: {
5274 // Create a new call with the correct address space.
5275 NewCall =
5276 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5277 Value *Res = NewCall;
5278 Res = Builder.CreateAddrSpaceCast(
5279 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5280 NewCall->takeName(CI);
5281 CI->replaceAllUsesWith(Res);
5282 CI->eraseFromParent();
5283 return;
5284 }
5285 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5286 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5287 // Create a new call with the correct address space.
5288 SmallVector<Value *, 4> Args(CI->args());
5289 Args[0] = Builder.CreateAddrSpaceCast(
5290 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5291
5292 NewCall = Builder.CreateCall(NewFn, Args);
5293 NewCall->takeName(CI);
5294 CI->replaceAllUsesWith(NewCall);
5295 CI->eraseFromParent();
5296 return;
5297 }
5298 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5299 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5300 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5301 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5302 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5303 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5304 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5305 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5306 SmallVector<Value *, 16> Args(CI->args());
5307
5308 // Create AddrSpaceCast to shared_cluster if needed.
5309 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5310 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5312 Args[0] = Builder.CreateAddrSpaceCast(
5313 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5314
5315 // Attach the flag argument for cta_group, with a
5316 // default value of 0. This handles case (2) in
5317 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5318 size_t NumArgs = CI->arg_size();
5319 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5320 if (!FlagArg->getType()->isIntegerTy(1))
5321 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5322
5323 NewCall = Builder.CreateCall(NewFn, Args);
5324 NewCall->takeName(CI);
5325 CI->replaceAllUsesWith(NewCall);
5326 CI->eraseFromParent();
5327 return;
5328 }
5329 case Intrinsic::riscv_sha256sig0:
5330 case Intrinsic::riscv_sha256sig1:
5331 case Intrinsic::riscv_sha256sum0:
5332 case Intrinsic::riscv_sha256sum1:
5333 case Intrinsic::riscv_sm3p0:
5334 case Intrinsic::riscv_sm3p1: {
5335 // The last argument to these intrinsics used to be i8 and changed to i32.
5336 // The type overload for sm4ks and sm4ed was removed.
5337 if (!CI->getType()->isIntegerTy(64))
5338 return;
5339
5340 Value *Arg =
5341 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5342
5343 NewCall = Builder.CreateCall(NewFn, Arg);
5344 Value *Res =
5345 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5346 NewCall->takeName(CI);
5347 CI->replaceAllUsesWith(Res);
5348 CI->eraseFromParent();
5349 return;
5350 }
5351
5352 case Intrinsic::x86_xop_vfrcz_ss:
5353 case Intrinsic::x86_xop_vfrcz_sd:
5354 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5355 break;
5356
5357 case Intrinsic::x86_xop_vpermil2pd:
5358 case Intrinsic::x86_xop_vpermil2ps:
5359 case Intrinsic::x86_xop_vpermil2pd_256:
5360 case Intrinsic::x86_xop_vpermil2ps_256: {
5361 SmallVector<Value *, 4> Args(CI->args());
5362 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5363 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5364 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5365 NewCall = Builder.CreateCall(NewFn, Args);
5366 break;
5367 }
5368
5369 case Intrinsic::x86_sse41_ptestc:
5370 case Intrinsic::x86_sse41_ptestz:
5371 case Intrinsic::x86_sse41_ptestnzc: {
5372 // The arguments for these intrinsics used to be v4f32, and changed
5373 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5374 // So, the only thing required is a bitcast for both arguments.
5375 // First, check the arguments have the old type.
5376 Value *Arg0 = CI->getArgOperand(0);
5377 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5378 return;
5379
5380 // Old intrinsic, add bitcasts
5381 Value *Arg1 = CI->getArgOperand(1);
5382
5383 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5384
5385 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5386 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5387
5388 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5389 break;
5390 }
5391
5392 case Intrinsic::x86_rdtscp: {
5393 // This used to take 1 arguments. If we have no arguments, it is already
5394 // upgraded.
5395 if (CI->getNumOperands() == 0)
5396 return;
5397
5398 NewCall = Builder.CreateCall(NewFn);
5399 // Extract the second result and store it.
5400 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5401 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5402 // Replace the original call result with the first result of the new call.
5403 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5404
5405 NewCall->takeName(CI);
5406 CI->replaceAllUsesWith(TSC);
5407 CI->eraseFromParent();
5408 return;
5409 }
5410
5411 case Intrinsic::x86_sse41_insertps:
5412 case Intrinsic::x86_sse41_dppd:
5413 case Intrinsic::x86_sse41_dpps:
5414 case Intrinsic::x86_sse41_mpsadbw:
5415 case Intrinsic::x86_avx_dp_ps_256:
5416 case Intrinsic::x86_avx2_mpsadbw: {
5417 // Need to truncate the last argument from i32 to i8 -- this argument models
5418 // an inherently 8-bit immediate operand to these x86 instructions.
5419 SmallVector<Value *, 4> Args(CI->args());
5420
5421 // Replace the last argument with a trunc.
5422 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5423 NewCall = Builder.CreateCall(NewFn, Args);
5424 break;
5425 }
5426
5427 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5428 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5429 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5430 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5431 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5432 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5433 SmallVector<Value *, 4> Args(CI->args());
5434 unsigned NumElts =
5435 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5436 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5437
5438 NewCall = Builder.CreateCall(NewFn, Args);
5439 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5440
5441 NewCall->takeName(CI);
5442 CI->replaceAllUsesWith(Res);
5443 CI->eraseFromParent();
5444 return;
5445 }
5446
5447 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5448 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5449 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5450 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5451 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5452 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5453 SmallVector<Value *, 4> Args(CI->args());
5454 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5455 if (NewFn->getIntrinsicID() ==
5456 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5457 Args[1] = Builder.CreateBitCast(
5458 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5459
5460 NewCall = Builder.CreateCall(NewFn, Args);
5461 Value *Res = Builder.CreateBitCast(
5462 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5463
5464 NewCall->takeName(CI);
5465 CI->replaceAllUsesWith(Res);
5466 CI->eraseFromParent();
5467 return;
5468 }
5469 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5470 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5471 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5472 SmallVector<Value *, 4> Args(CI->args());
5473 unsigned NumElts =
5474 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5475 Args[1] = Builder.CreateBitCast(
5476 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5477 Args[2] = Builder.CreateBitCast(
5478 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5479
5480 NewCall = Builder.CreateCall(NewFn, Args);
5481 break;
5482 }
5483
5484 case Intrinsic::thread_pointer: {
5485 NewCall = Builder.CreateCall(NewFn, {});
5486 break;
5487 }
5488
5489 case Intrinsic::memcpy:
5490 case Intrinsic::memmove:
5491 case Intrinsic::memset: {
5492 // We have to make sure that the call signature is what we're expecting.
5493 // We only want to change the old signatures by removing the alignment arg:
5494 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5495 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5496 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5497 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5498 // Note: i8*'s in the above can be any pointer type
5499 if (CI->arg_size() != 5) {
5500 DefaultCase();
5501 return;
5502 }
5503 // Remove alignment argument (3), and add alignment attributes to the
5504 // dest/src pointers.
5505 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5506 CI->getArgOperand(2), CI->getArgOperand(4)};
5507 NewCall = Builder.CreateCall(NewFn, Args);
5508 AttributeList OldAttrs = CI->getAttributes();
5509 AttributeList NewAttrs = AttributeList::get(
5510 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5511 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5512 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5513 NewCall->setAttributes(NewAttrs);
5514 auto *MemCI = cast<MemIntrinsic>(NewCall);
5515 // All mem intrinsics support dest alignment.
5517 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5518 // Memcpy/Memmove also support source alignment.
5519 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5520 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5521 break;
5522 }
5523
5524 case Intrinsic::masked_load:
5525 case Intrinsic::masked_gather:
5526 case Intrinsic::masked_store:
5527 case Intrinsic::masked_scatter: {
5528 if (CI->arg_size() != 4) {
5529 DefaultCase();
5530 return;
5531 }
5532
5533 auto GetMaybeAlign = [](Value *Op) {
5534 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5535 uint64_t Val = CI->getZExtValue();
5536 if (Val == 0)
5537 return MaybeAlign();
5538 if (isPowerOf2_64(Val))
5539 return MaybeAlign(Val);
5540 }
5541 reportFatalUsageError("Invalid alignment argument");
5542 };
5543 auto GetAlign = [&](Value *Op) {
5544 MaybeAlign Align = GetMaybeAlign(Op);
5545 if (Align)
5546 return *Align;
5547 reportFatalUsageError("Invalid zero alignment argument");
5548 };
5549
5550 const DataLayout &DL = CI->getDataLayout();
5551 switch (NewFn->getIntrinsicID()) {
5552 case Intrinsic::masked_load:
5553 NewCall = Builder.CreateMaskedLoad(
5554 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5555 CI->getArgOperand(2), CI->getArgOperand(3));
5556 break;
5557 case Intrinsic::masked_gather:
5558 NewCall = Builder.CreateMaskedGather(
5559 CI->getType(), CI->getArgOperand(0),
5560 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5561 CI->getType()->getScalarType()),
5562 CI->getArgOperand(2), CI->getArgOperand(3));
5563 break;
5564 case Intrinsic::masked_store:
5565 NewCall = Builder.CreateMaskedStore(
5566 CI->getArgOperand(0), CI->getArgOperand(1),
5567 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5568 break;
5569 case Intrinsic::masked_scatter:
5570 NewCall = Builder.CreateMaskedScatter(
5571 CI->getArgOperand(0), CI->getArgOperand(1),
5572 DL.getValueOrABITypeAlignment(
5573 GetMaybeAlign(CI->getArgOperand(2)),
5574 CI->getArgOperand(0)->getType()->getScalarType()),
5575 CI->getArgOperand(3));
5576 break;
5577 default:
5578 llvm_unreachable("Unexpected intrinsic ID");
5579 }
5580 // Previous metadata is still valid.
5581 NewCall->copyMetadata(*CI);
5582 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5583 break;
5584 }
5585
5586 case Intrinsic::lifetime_start:
5587 case Intrinsic::lifetime_end: {
5588 if (CI->arg_size() != 2) {
5589 DefaultCase();
5590 return;
5591 }
5592
5593 Value *Ptr = CI->getArgOperand(1);
5594 // Try to strip pointer casts, such that the lifetime works on an alloca.
5595 Ptr = Ptr->stripPointerCasts();
5596 if (isa<AllocaInst>(Ptr)) {
5597 // Don't use NewFn, as we might have looked through an addrspacecast.
5598 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5599 NewCall = Builder.CreateLifetimeStart(Ptr);
5600 else
5601 NewCall = Builder.CreateLifetimeEnd(Ptr);
5602 break;
5603 }
5604
5605 // Otherwise remove the lifetime marker.
5606 CI->eraseFromParent();
5607 return;
5608 }
5609
5610 case Intrinsic::x86_avx512_vpdpbusd_128:
5611 case Intrinsic::x86_avx512_vpdpbusd_256:
5612 case Intrinsic::x86_avx512_vpdpbusd_512:
5613 case Intrinsic::x86_avx512_vpdpbusds_128:
5614 case Intrinsic::x86_avx512_vpdpbusds_256:
5615 case Intrinsic::x86_avx512_vpdpbusds_512:
5616 case Intrinsic::x86_avx2_vpdpbssd_128:
5617 case Intrinsic::x86_avx2_vpdpbssd_256:
5618 case Intrinsic::x86_avx10_vpdpbssd_512:
5619 case Intrinsic::x86_avx2_vpdpbssds_128:
5620 case Intrinsic::x86_avx2_vpdpbssds_256:
5621 case Intrinsic::x86_avx10_vpdpbssds_512:
5622 case Intrinsic::x86_avx2_vpdpbsud_128:
5623 case Intrinsic::x86_avx2_vpdpbsud_256:
5624 case Intrinsic::x86_avx10_vpdpbsud_512:
5625 case Intrinsic::x86_avx2_vpdpbsuds_128:
5626 case Intrinsic::x86_avx2_vpdpbsuds_256:
5627 case Intrinsic::x86_avx10_vpdpbsuds_512:
5628 case Intrinsic::x86_avx2_vpdpbuud_128:
5629 case Intrinsic::x86_avx2_vpdpbuud_256:
5630 case Intrinsic::x86_avx10_vpdpbuud_512:
5631 case Intrinsic::x86_avx2_vpdpbuuds_128:
5632 case Intrinsic::x86_avx2_vpdpbuuds_256:
5633 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5634 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5635 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5636 CI->getArgOperand(2)};
5637 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5638 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5639 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5640
5641 NewCall = Builder.CreateCall(NewFn, Args);
5642 break;
5643 }
5644 case Intrinsic::x86_avx512_vpdpwssd_128:
5645 case Intrinsic::x86_avx512_vpdpwssd_256:
5646 case Intrinsic::x86_avx512_vpdpwssd_512:
5647 case Intrinsic::x86_avx512_vpdpwssds_128:
5648 case Intrinsic::x86_avx512_vpdpwssds_256:
5649 case Intrinsic::x86_avx512_vpdpwssds_512:
5650 case Intrinsic::x86_avx2_vpdpwsud_128:
5651 case Intrinsic::x86_avx2_vpdpwsud_256:
5652 case Intrinsic::x86_avx10_vpdpwsud_512:
5653 case Intrinsic::x86_avx2_vpdpwsuds_128:
5654 case Intrinsic::x86_avx2_vpdpwsuds_256:
5655 case Intrinsic::x86_avx10_vpdpwsuds_512:
5656 case Intrinsic::x86_avx2_vpdpwusd_128:
5657 case Intrinsic::x86_avx2_vpdpwusd_256:
5658 case Intrinsic::x86_avx10_vpdpwusd_512:
5659 case Intrinsic::x86_avx2_vpdpwusds_128:
5660 case Intrinsic::x86_avx2_vpdpwusds_256:
5661 case Intrinsic::x86_avx10_vpdpwusds_512:
5662 case Intrinsic::x86_avx2_vpdpwuud_128:
5663 case Intrinsic::x86_avx2_vpdpwuud_256:
5664 case Intrinsic::x86_avx10_vpdpwuud_512:
5665 case Intrinsic::x86_avx2_vpdpwuuds_128:
5666 case Intrinsic::x86_avx2_vpdpwuuds_256:
5667 case Intrinsic::x86_avx10_vpdpwuuds_512:
5668 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5669 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5670 CI->getArgOperand(2)};
5671 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5672 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5673 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5674
5675 NewCall = Builder.CreateCall(NewFn, Args);
5676 break;
5677 }
5678 assert(NewCall && "Should have either set this variable or returned through "
5679 "the default case");
5680 NewCall->takeName(CI);
5681 CI->replaceAllUsesWith(NewCall);
5682 CI->eraseFromParent();
5683}
5684
5686 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5687
5688 // Check if this function should be upgraded and get the replacement function
5689 // if there is one.
5690 Function *NewFn;
5691 if (UpgradeIntrinsicFunction(F, NewFn)) {
5692 // Replace all users of the old function with the new function or new
5693 // instructions. This is not a range loop because the call is deleted.
5694 for (User *U : make_early_inc_range(F->users()))
5695 if (CallBase *CB = dyn_cast<CallBase>(U))
5696 UpgradeIntrinsicCall(CB, NewFn);
5697
5698 // Remove old function, no longer used, from the module.
5699 if (F != NewFn)
5700 F->eraseFromParent();
5701 }
5702}
5703
5705 const unsigned NumOperands = MD.getNumOperands();
5706 if (NumOperands == 0)
5707 return &MD; // Invalid, punt to a verifier error.
5708
5709 // Check if the tag uses struct-path aware TBAA format.
5710 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5711 return &MD;
5712
5713 auto &Context = MD.getContext();
5714 if (NumOperands == 3) {
5715 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5716 MDNode *ScalarType = MDNode::get(Context, Elts);
5717 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5718 Metadata *Elts2[] = {ScalarType, ScalarType,
5721 MD.getOperand(2)};
5722 return MDNode::get(Context, Elts2);
5723 }
5724 // Create a MDNode <MD, MD, offset 0>
5726 Type::getInt64Ty(Context)))};
5727 return MDNode::get(Context, Elts);
5728}
5729
5731 Instruction *&Temp) {
5732 if (Opc != Instruction::BitCast)
5733 return nullptr;
5734
5735 Temp = nullptr;
5736 Type *SrcTy = V->getType();
5737 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5738 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5739 LLVMContext &Context = V->getContext();
5740
5741 // We have no information about target data layout, so we assume that
5742 // the maximum pointer size is 64bit.
5743 Type *MidTy = Type::getInt64Ty(Context);
5744 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5745
5746 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5747 }
5748
5749 return nullptr;
5750}
5751
5753 if (Opc != Instruction::BitCast)
5754 return nullptr;
5755
5756 Type *SrcTy = C->getType();
5757 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5758 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5759 LLVMContext &Context = C->getContext();
5760
5761 // We have no information about target data layout, so we assume that
5762 // the maximum pointer size is 64bit.
5763 Type *MidTy = Type::getInt64Ty(Context);
5764
5766 DestTy);
5767 }
5768
5769 return nullptr;
5770}
5771
5772/// Check the debug info version number, if it is out-dated, drop the debug
5773/// info. Return true if module is modified.
5776 return false;
5777
5778 llvm::TimeTraceScope timeScope("Upgrade debug info");
5779 // We need to get metadata before the module is verified (i.e., getModuleFlag
5780 // makes assumptions that we haven't verified yet). Carefully extract the flag
5781 // from the metadata.
5782 unsigned Version = 0;
5783 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5784 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5785 if (Flag->getNumOperands() < 3)
5786 return false;
5787 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5788 return K->getString() == "Debug Info Version";
5789 return false;
5790 });
5791 if (OpIt != ModFlags->op_end()) {
5792 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5793 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5794 Version = CI->getZExtValue();
5795 }
5796 }
5797
5799 bool BrokenDebugInfo = false;
5800 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5801 report_fatal_error("Broken module found, compilation aborted!");
5802 if (!BrokenDebugInfo)
5803 // Everything is ok.
5804 return false;
5805 else {
5806 // Diagnose malformed debug info.
5808 M.getContext().diagnose(Diag);
5809 }
5810 }
5811 bool Modified = StripDebugInfo(M);
5813 // Diagnose a version mismatch.
5815 M.getContext().diagnose(DiagVersion);
5816 }
5817 return Modified;
5818}
5819
5820static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5821 GlobalValue *GV, const Metadata *V) {
5822 Function *F = cast<Function>(GV);
5823
5824 constexpr StringLiteral DefaultValue = "1";
5825 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5826 unsigned Length = 0;
5827
5828 if (F->hasFnAttribute(Attr)) {
5829 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5830 // parse these elements placing them into Vect3
5831 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5832 for (; Length < 3 && !S.empty(); Length++) {
5833 auto [Part, Rest] = S.split(',');
5834 Vect3[Length] = Part.trim();
5835 S = Rest;
5836 }
5837 }
5838
5839 const unsigned Dim = DimC - 'x';
5840 assert(Dim < 3 && "Unexpected dim char");
5841
5842 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5843
5844 // local variable required for StringRef in Vect3 to point to.
5845 const std::string VStr = llvm::utostr(VInt);
5846 Vect3[Dim] = VStr;
5847 Length = std::max(Length, Dim + 1);
5848
5849 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5850 F->addFnAttr(Attr, NewAttr);
5851}
5852
5853static inline bool isXYZ(StringRef S) {
5854 return S == "x" || S == "y" || S == "z";
5855}
5856
5858 const Metadata *V) {
5859 if (K == "kernel") {
5861 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5862 return true;
5863 }
5864 if (K == "align") {
5865 // V is a bitfeild specifying two 16-bit values. The alignment value is
5866 // specfied in low 16-bits, The index is specified in the high bits. For the
5867 // index, 0 indicates the return value while higher values correspond to
5868 // each parameter (idx = param + 1).
5869 const uint64_t AlignIdxValuePair =
5870 mdconst::extract<ConstantInt>(V)->getZExtValue();
5871 const unsigned Idx = (AlignIdxValuePair >> 16);
5872 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5873 cast<Function>(GV)->addAttributeAtIndex(
5874 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5875 return true;
5876 }
5877 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5878 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5880 return true;
5881 }
5882 if (K == "minctasm") {
5883 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5884 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
5885 return true;
5886 }
5887 if (K == "maxnreg") {
5888 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5889 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
5890 return true;
5891 }
5892 if (K.consume_front("maxntid") && isXYZ(K)) {
5894 return true;
5895 }
5896 if (K.consume_front("reqntid") && isXYZ(K)) {
5898 return true;
5899 }
5900 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5902 return true;
5903 }
5904 if (K == "grid_constant") {
5905 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
5906 for (const auto &Op : cast<MDNode>(V)->operands()) {
5907 // For some reason, the index is 1-based in the metadata. Good thing we're
5908 // able to auto-upgrade it!
5909 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5910 cast<Function>(GV)->addParamAttr(Index, Attr);
5911 }
5912 return true;
5913 }
5914
5915 return false;
5916}
5917
5919 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5920 if (!NamedMD)
5921 return;
5922
5923 SmallVector<MDNode *, 8> NewNodes;
5925 for (MDNode *MD : NamedMD->operands()) {
5926 if (!SeenNodes.insert(MD).second)
5927 continue;
5928
5929 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5930 if (!GV)
5931 continue;
5932
5933 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5934
5935 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5936 // Each nvvm.annotations metadata entry will be of the following form:
5937 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5938 // start index = 1, to skip the global variable key
5939 // increment = 2, to skip the value for each property-value pairs
5940 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5941 MDString *K = cast<MDString>(MD->getOperand(j));
5942 const MDOperand &V = MD->getOperand(j + 1);
5943 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5944 if (!Upgraded)
5945 NewOperands.append({K, V});
5946 }
5947
5948 if (NewOperands.size() > 1)
5949 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5950 }
5951
5952 NamedMD->clearOperands();
5953 for (MDNode *N : NewNodes)
5954 NamedMD->addOperand(N);
5955}
5956
5957/// This checks for objc retain release marker which should be upgraded. It
5958/// returns true if module is modified.
5960 bool Changed = false;
5961 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5962 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5963 if (ModRetainReleaseMarker) {
5964 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5965 if (Op) {
5966 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5967 if (ID) {
5968 SmallVector<StringRef, 4> ValueComp;
5969 ID->getString().split(ValueComp, "#");
5970 if (ValueComp.size() == 2) {
5971 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5972 ID = MDString::get(M.getContext(), NewValue);
5973 }
5974 M.addModuleFlag(Module::Error, MarkerKey, ID);
5975 M.eraseNamedMetadata(ModRetainReleaseMarker);
5976 Changed = true;
5977 }
5978 }
5979 }
5980 return Changed;
5981}
5982
5984 // This lambda converts normal function calls to ARC runtime functions to
5985 // intrinsic calls.
5986 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5987 llvm::Intrinsic::ID IntrinsicFunc) {
5988 Function *Fn = M.getFunction(OldFunc);
5989
5990 if (!Fn)
5991 return;
5992
5993 Function *NewFn =
5994 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5995
5996 for (User *U : make_early_inc_range(Fn->users())) {
5998 if (!CI || CI->getCalledFunction() != Fn)
5999 continue;
6000
6001 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6002 FunctionType *NewFuncTy = NewFn->getFunctionType();
6004
6005 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6006 // value to the return type of the old function.
6007 if (NewFuncTy->getReturnType() != CI->getType() &&
6008 !CastInst::castIsValid(Instruction::BitCast, CI,
6009 NewFuncTy->getReturnType()))
6010 continue;
6011
6012 bool InvalidCast = false;
6013
6014 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6015 Value *Arg = CI->getArgOperand(I);
6016
6017 // Bitcast argument to the parameter type of the new function if it's
6018 // not a variadic argument.
6019 if (I < NewFuncTy->getNumParams()) {
6020 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6021 // to the parameter type of the new function.
6022 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6023 NewFuncTy->getParamType(I))) {
6024 InvalidCast = true;
6025 break;
6026 }
6027 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6028 }
6029 Args.push_back(Arg);
6030 }
6031
6032 if (InvalidCast)
6033 continue;
6034
6035 // Create a call instruction that calls the new function.
6036 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6037 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6038 NewCall->takeName(CI);
6039
6040 // Bitcast the return value back to the type of the old call.
6041 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6042
6043 if (!CI->use_empty())
6044 CI->replaceAllUsesWith(NewRetVal);
6045 CI->eraseFromParent();
6046 }
6047
6048 if (Fn->use_empty())
6049 Fn->eraseFromParent();
6050 };
6051
6052 // Unconditionally convert a call to "clang.arc.use" to a call to
6053 // "llvm.objc.clang.arc.use".
6054 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6055
6056 // Upgrade the retain release marker. If there is no need to upgrade
6057 // the marker, that means either the module is already new enough to contain
6058 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6060 return;
6061
6062 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6063 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6064 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6065 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6066 {"objc_autoreleaseReturnValue",
6067 llvm::Intrinsic::objc_autoreleaseReturnValue},
6068 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6069 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6070 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6071 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6072 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6073 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6074 {"objc_release", llvm::Intrinsic::objc_release},
6075 {"objc_retain", llvm::Intrinsic::objc_retain},
6076 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6077 {"objc_retainAutoreleaseReturnValue",
6078 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6079 {"objc_retainAutoreleasedReturnValue",
6080 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6081 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6082 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6083 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6084 {"objc_unsafeClaimAutoreleasedReturnValue",
6085 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6086 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6087 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6088 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6089 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6090 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6091 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6092 {"objc_arc_annotation_topdown_bbstart",
6093 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6094 {"objc_arc_annotation_topdown_bbend",
6095 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6096 {"objc_arc_annotation_bottomup_bbstart",
6097 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6098 {"objc_arc_annotation_bottomup_bbend",
6099 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6100
6101 for (auto &I : RuntimeFuncs)
6102 UpgradeToIntrinsic(I.first, I.second);
6103}
6104
6106 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6107 if (!ModFlags)
6108 return false;
6109
6110 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6111 bool HasSwiftVersionFlag = false;
6112 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6113 uint32_t SwiftABIVersion;
6114 auto Int8Ty = Type::getInt8Ty(M.getContext());
6115 auto Int32Ty = Type::getInt32Ty(M.getContext());
6116
6117 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6118 MDNode *Op = ModFlags->getOperand(I);
6119 if (Op->getNumOperands() != 3)
6120 continue;
6121 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6122 if (!ID)
6123 continue;
6124 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6125 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6126 Type::getInt32Ty(M.getContext()), B)),
6127 MDString::get(M.getContext(), ID->getString()),
6128 Op->getOperand(2)};
6129 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6130 Changed = true;
6131 };
6132
6133 if (ID->getString() == "Objective-C Image Info Version")
6134 HasObjCFlag = true;
6135 if (ID->getString() == "Objective-C Class Properties")
6136 HasClassProperties = true;
6137 // Upgrade PIC from Error/Max to Min.
6138 if (ID->getString() == "PIC Level") {
6139 if (auto *Behavior =
6141 uint64_t V = Behavior->getLimitedValue();
6142 if (V == Module::Error || V == Module::Max)
6143 SetBehavior(Module::Min);
6144 }
6145 }
6146 // Upgrade "PIE Level" from Error to Max.
6147 if (ID->getString() == "PIE Level")
6148 if (auto *Behavior =
6150 if (Behavior->getLimitedValue() == Module::Error)
6151 SetBehavior(Module::Max);
6152
6153 // Upgrade branch protection and return address signing module flags. The
6154 // module flag behavior for these fields were Error and now they are Min.
6155 if (ID->getString() == "branch-target-enforcement" ||
6156 ID->getString().starts_with("sign-return-address")) {
6157 if (auto *Behavior =
6159 if (Behavior->getLimitedValue() == Module::Error) {
6160 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6161 Metadata *Ops[3] = {
6162 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6163 Op->getOperand(1), Op->getOperand(2)};
6164 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6165 Changed = true;
6166 }
6167 }
6168 }
6169
6170 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6171 // section name so that llvm-lto will not complain about mismatching
6172 // module flags that is functionally the same.
6173 if (ID->getString() == "Objective-C Image Info Section") {
6174 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6175 SmallVector<StringRef, 4> ValueComp;
6176 Value->getString().split(ValueComp, " ");
6177 if (ValueComp.size() != 1) {
6178 std::string NewValue;
6179 for (auto &S : ValueComp)
6180 NewValue += S.str();
6181 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6182 MDString::get(M.getContext(), NewValue)};
6183 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6184 Changed = true;
6185 }
6186 }
6187 }
6188
6189 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6190 // If the higher bits are set, it adds new module flag for swift info.
6191 if (ID->getString() == "Objective-C Garbage Collection") {
6192 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6193 if (Md) {
6194 assert(Md->getValue() && "Expected non-empty metadata");
6195 auto Type = Md->getValue()->getType();
6196 if (Type == Int8Ty)
6197 continue;
6198 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6199 if ((Val & 0xff) != Val) {
6200 HasSwiftVersionFlag = true;
6201 SwiftABIVersion = (Val & 0xff00) >> 8;
6202 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6203 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6204 }
6205 Metadata *Ops[3] = {
6207 Op->getOperand(1),
6208 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6209 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6210 Changed = true;
6211 }
6212 }
6213
6214 if (ID->getString() == "amdgpu_code_object_version") {
6215 Metadata *Ops[3] = {
6216 Op->getOperand(0),
6217 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6218 Op->getOperand(2)};
6219 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6220 Changed = true;
6221 }
6222 }
6223
6224 // "Objective-C Class Properties" is recently added for Objective-C. We
6225 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6226 // flag of value 0, so we can correclty downgrade this flag when trying to
6227 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6228 // this module flag.
6229 if (HasObjCFlag && !HasClassProperties) {
6230 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6231 (uint32_t)0);
6232 Changed = true;
6233 }
6234
6235 if (HasSwiftVersionFlag) {
6236 M.addModuleFlag(Module::Error, "Swift ABI Version",
6237 SwiftABIVersion);
6238 M.addModuleFlag(Module::Error, "Swift Major Version",
6239 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6240 M.addModuleFlag(Module::Error, "Swift Minor Version",
6241 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6242 Changed = true;
6243 }
6244
6245 return Changed;
6246}
6247
6249 auto TrimSpaces = [](StringRef Section) -> std::string {
6250 SmallVector<StringRef, 5> Components;
6251 Section.split(Components, ',');
6252
6253 SmallString<32> Buffer;
6254 raw_svector_ostream OS(Buffer);
6255
6256 for (auto Component : Components)
6257 OS << ',' << Component.trim();
6258
6259 return std::string(OS.str().substr(1));
6260 };
6261
6262 for (auto &GV : M.globals()) {
6263 if (!GV.hasSection())
6264 continue;
6265
6266 StringRef Section = GV.getSection();
6267
6268 if (!Section.starts_with("__DATA, __objc_catlist"))
6269 continue;
6270
6271 // __DATA, __objc_catlist, regular, no_dead_strip
6272 // __DATA,__objc_catlist,regular,no_dead_strip
6273 GV.setSection(TrimSpaces(Section));
6274 }
6275}
6276
6277namespace {
6278// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6279// callsites within a function that did not also have the strictfp attribute.
6280// Since 10.0, if strict FP semantics are needed within a function, the
6281// function must have the strictfp attribute and all calls within the function
6282// must also have the strictfp attribute. This latter restriction is
6283// necessary to prevent unwanted libcall simplification when a function is
6284// being cloned (such as for inlining).
6285//
6286// The "dangling" strictfp attribute usage was only used to prevent constant
6287// folding and other libcall simplification. The nobuiltin attribute on the
6288// callsite has the same effect.
6289struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6290 StrictFPUpgradeVisitor() = default;
6291
6292 void visitCallBase(CallBase &Call) {
6293 if (!Call.isStrictFP())
6294 return;
6296 return;
6297 // If we get here, the caller doesn't have the strictfp attribute
6298 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6299 Call.removeFnAttr(Attribute::StrictFP);
6300 Call.addFnAttr(Attribute::NoBuiltin);
6301 }
6302};
6303
6304/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6305struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6306 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6307 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6308
6309 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6310 if (!RMW.isFloatingPointOperation())
6311 return;
6312
6313 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6314 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6315 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6316 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6317 }
6318};
6319} // namespace
6320
6322 // If a function definition doesn't have the strictfp attribute,
6323 // convert any callsite strictfp attributes to nobuiltin.
6324 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6325 StrictFPUpgradeVisitor SFPV;
6326 SFPV.visit(F);
6327 }
6328
6329 // Remove all incompatibile attributes from function.
6330 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6331 F.getReturnType(), F.getAttributes().getRetAttrs()));
6332 for (auto &Arg : F.args())
6333 Arg.removeAttrs(
6334 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6335
6336 bool AddingAttrs = false, RemovingAttrs = false;
6337 AttrBuilder AttrsToAdd(F.getContext());
6338 AttributeMask AttrsToRemove;
6339
6340 // Older versions of LLVM treated an "implicit-section-name" attribute
6341 // similarly to directly setting the section on a Function.
6342 if (Attribute A = F.getFnAttribute("implicit-section-name");
6343 A.isValid() && A.isStringAttribute()) {
6344 F.setSection(A.getValueAsString());
6345 AttrsToRemove.addAttribute("implicit-section-name");
6346 RemovingAttrs = true;
6347 }
6348
6349 if (Attribute A = F.getFnAttribute("nooutline");
6350 A.isValid() && A.isStringAttribute()) {
6351 AttrsToRemove.addAttribute("nooutline");
6352 AttrsToAdd.addAttribute(Attribute::NoOutline);
6353 AddingAttrs = RemovingAttrs = true;
6354 }
6355
6356 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6357 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6358 AttrsToRemove.addAttribute("uniform-work-group-size");
6359 RemovingAttrs = true;
6360 if (A.getValueAsString() == "true") {
6361 AttrsToAdd.addAttribute("uniform-work-group-size");
6362 AddingAttrs = true;
6363 }
6364 }
6365
6366 if (!F.empty()) {
6367 // For some reason this is called twice, and the first time is before any
6368 // instructions are loaded into the body.
6369
6370 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6371 A.isValid()) {
6372
6373 if (A.getValueAsBool()) {
6374 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6375 Visitor.visit(F);
6376 }
6377
6378 // We will leave behind dead attribute uses on external declarations, but
6379 // clang never added these to declarations anyway.
6380 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6381 RemovingAttrs = true;
6382 }
6383 }
6384
6385 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6386 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6387
6388 bool HandleDenormalMode = false;
6389
6390 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6391 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6392 if (ParsedMode.isValid()) {
6393 DenormalFPMath = ParsedMode;
6394 AttrsToRemove.addAttribute("denormal-fp-math");
6395 AddingAttrs = RemovingAttrs = true;
6396 HandleDenormalMode = true;
6397 }
6398 }
6399
6400 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6401 Attr.isValid()) {
6402 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6403 if (ParsedMode.isValid()) {
6404 DenormalFPMathF32 = ParsedMode;
6405 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6406 AddingAttrs = RemovingAttrs = true;
6407 HandleDenormalMode = true;
6408 }
6409 }
6410
6411 if (HandleDenormalMode)
6412 AttrsToAdd.addDenormalFPEnvAttr(
6413 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6414
6415 if (RemovingAttrs)
6416 F.removeFnAttrs(AttrsToRemove);
6417
6418 if (AddingAttrs)
6419 F.addFnAttrs(AttrsToAdd);
6420}
6421
6422// Check if the function attribute is not present and set it.
6424 StringRef Value) {
6425 if (!F.hasFnAttribute(FnAttrName))
6426 F.addFnAttr(FnAttrName, Value);
6427}
6428
6429// Check if the function attribute is not present and set it if needed.
6430// If the attribute is "false" then removes it.
6431// If the attribute is "true" resets it to a valueless attribute.
6432static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6433 if (!F.hasFnAttribute(FnAttrName)) {
6434 if (Set)
6435 F.addFnAttr(FnAttrName);
6436 } else {
6437 auto A = F.getFnAttribute(FnAttrName);
6438 if ("false" == A.getValueAsString())
6439 F.removeFnAttr(FnAttrName);
6440 else if ("true" == A.getValueAsString()) {
6441 F.removeFnAttr(FnAttrName);
6442 F.addFnAttr(FnAttrName);
6443 }
6444 }
6445}
6446
6448 Triple T(M.getTargetTriple());
6449 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6450 return;
6451
6452 uint64_t BTEValue = 0;
6453 uint64_t BPPLRValue = 0;
6454 uint64_t GCSValue = 0;
6455 uint64_t SRAValue = 0;
6456 uint64_t SRAALLValue = 0;
6457 uint64_t SRABKeyValue = 0;
6458
6459 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6460 if (ModFlags) {
6461 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6462 MDNode *Op = ModFlags->getOperand(I);
6463 if (Op->getNumOperands() != 3)
6464 continue;
6465
6466 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6467 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6468 if (!ID || !CI)
6469 continue;
6470
6471 StringRef IDStr = ID->getString();
6472 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6473 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6474 : IDStr == "guarded-control-stack" ? &GCSValue
6475 : IDStr == "sign-return-address" ? &SRAValue
6476 : IDStr == "sign-return-address-all" ? &SRAALLValue
6477 : IDStr == "sign-return-address-with-bkey"
6478 ? &SRABKeyValue
6479 : nullptr;
6480 if (!ValPtr)
6481 continue;
6482
6483 *ValPtr = CI->getZExtValue();
6484 if (*ValPtr == 2)
6485 return;
6486 }
6487 }
6488
6489 bool BTE = BTEValue == 1;
6490 bool BPPLR = BPPLRValue == 1;
6491 bool GCS = GCSValue == 1;
6492 bool SRA = SRAValue == 1;
6493
6494 StringRef SignTypeValue = "non-leaf";
6495 if (SRA && SRAALLValue == 1)
6496 SignTypeValue = "all";
6497
6498 StringRef SignKeyValue = "a_key";
6499 if (SRA && SRABKeyValue == 1)
6500 SignKeyValue = "b_key";
6501
6502 for (Function &F : M.getFunctionList()) {
6503 if (F.isDeclaration())
6504 continue;
6505
6506 if (SRA) {
6507 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6508 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6509 } else {
6510 if (auto A = F.getFnAttribute("sign-return-address");
6511 A.isValid() && "none" == A.getValueAsString()) {
6512 F.removeFnAttr("sign-return-address");
6513 F.removeFnAttr("sign-return-address-key");
6514 }
6515 }
6516 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6517 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6518 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6519 }
6520
6521 if (BTE)
6522 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6523 if (BPPLR)
6524 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6525 if (GCS)
6526 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6527 if (SRA) {
6528 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6529 if (SRAALLValue == 1)
6530 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6531 if (SRABKeyValue == 1)
6532 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6533 }
6534}
6535
6536static bool isOldLoopArgument(Metadata *MD) {
6537 auto *T = dyn_cast_or_null<MDTuple>(MD);
6538 if (!T)
6539 return false;
6540 if (T->getNumOperands() < 1)
6541 return false;
6542 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6543 if (!S)
6544 return false;
6545 return S->getString().starts_with("llvm.vectorizer.");
6546}
6547
6549 StringRef OldPrefix = "llvm.vectorizer.";
6550 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6551
6552 if (OldTag == "llvm.vectorizer.unroll")
6553 return MDString::get(C, "llvm.loop.interleave.count");
6554
6555 return MDString::get(
6556 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6557 .str());
6558}
6559
6561 auto *T = dyn_cast_or_null<MDTuple>(MD);
6562 if (!T)
6563 return MD;
6564 if (T->getNumOperands() < 1)
6565 return MD;
6566 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6567 if (!OldTag)
6568 return MD;
6569 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6570 return MD;
6571
6572 // This has an old tag. Upgrade it.
6574 Ops.reserve(T->getNumOperands());
6575 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6576 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6577 Ops.push_back(T->getOperand(I));
6578
6579 return MDTuple::get(T->getContext(), Ops);
6580}
6581
6583 auto *T = dyn_cast<MDTuple>(&N);
6584 if (!T)
6585 return &N;
6586
6587 if (none_of(T->operands(), isOldLoopArgument))
6588 return &N;
6589
6591 Ops.reserve(T->getNumOperands());
6592 for (Metadata *MD : T->operands())
6593 Ops.push_back(upgradeLoopArgument(MD));
6594
6595 return MDTuple::get(T->getContext(), Ops);
6596}
6597
6599 Triple T(TT);
6600 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6601 // the address space of globals to 1. This does not apply to SPIRV Logical.
6602 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6603 !DL.contains("-G") && !DL.starts_with("G")) {
6604 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6605 }
6606
6607 if (T.isLoongArch64() || T.isRISCV64()) {
6608 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6609 auto I = DL.find("-n64-");
6610 if (I != StringRef::npos)
6611 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6612 return DL.str();
6613 }
6614
6615 // AMDGPU data layout upgrades.
6616 std::string Res = DL.str();
6617 if (T.isAMDGPU()) {
6618 // Define address spaces for constants.
6619 if (!DL.contains("-G") && !DL.starts_with("G"))
6620 Res.append(Res.empty() ? "G1" : "-G1");
6621
6622 // AMDGCN data layout upgrades.
6623 if (T.isAMDGCN()) {
6624
6625 // Add missing non-integral declarations.
6626 // This goes before adding new address spaces to prevent incoherent string
6627 // values.
6628 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6629 Res.append("-ni:7:8:9");
6630 // Update ni:7 to ni:7:8:9.
6631 if (DL.ends_with("ni:7"))
6632 Res.append(":8:9");
6633 if (DL.ends_with("ni:7:8"))
6634 Res.append(":9");
6635
6636 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6637 // resources) An empty data layout has already been upgraded to G1 by now.
6638 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6639 Res.append("-p7:160:256:256:32");
6640 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6641 Res.append("-p8:128:128:128:48");
6642 constexpr StringRef OldP8("-p8:128:128-");
6643 if (DL.contains(OldP8))
6644 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6645 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6646 Res.append("-p9:192:256:256:32");
6647 }
6648
6649 // Upgrade the ELF mangling mode.
6650 if (!DL.contains("m:e"))
6651 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6652
6653 return Res;
6654 }
6655
6656 if (T.isSystemZ() && !DL.empty()) {
6657 // Make sure the stack alignment is present.
6658 if (!DL.contains("-S64"))
6659 return "E-S64" + DL.drop_front(1).str();
6660 return DL.str();
6661 }
6662
6663 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6664 // If the datalayout matches the expected format, add pointer size address
6665 // spaces to the datalayout.
6666 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6667 if (!DL.contains(AddrSpaces)) {
6669 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6670 if (R.match(Res, &Groups))
6671 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6672 }
6673 };
6674
6675 // AArch64 data layout upgrades.
6676 if (T.isAArch64()) {
6677 // Add "-Fn32"
6678 if (!DL.empty() && !DL.contains("-Fn32"))
6679 Res.append("-Fn32");
6680 AddPtr32Ptr64AddrSpaces();
6681 return Res;
6682 }
6683
6684 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6685 T.isWasm()) {
6686 // Mips64 with o32 ABI did not add "-i128:128".
6687 // Add "-i128:128"
6688 std::string I64 = "-i64:64";
6689 std::string I128 = "-i128:128";
6690 if (!StringRef(Res).contains(I128)) {
6691 size_t Pos = Res.find(I64);
6692 if (Pos != size_t(-1))
6693 Res.insert(Pos + I64.size(), I128);
6694 }
6695 }
6696
6697 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6698 size_t Pos = Res.find("-S128");
6699 if (Pos == StringRef::npos)
6700 Pos = Res.size();
6701 Res.insert(Pos, "-f64:32:64");
6702 }
6703
6704 if (!T.isX86())
6705 return Res;
6706
6707 AddPtr32Ptr64AddrSpaces();
6708
6709 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6710 // for i128 operations prior to this being reflected in the data layout, and
6711 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6712 // boundaries, so although this is a breaking change, the upgrade is expected
6713 // to fix more IR than it breaks.
6714 // Intel MCU is an exception and uses 4-byte-alignment.
6715 if (!T.isOSIAMCU()) {
6716 std::string I128 = "-i128:128";
6717 if (StringRef Ref = Res; !Ref.contains(I128)) {
6719 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6720 if (R.match(Res, &Groups))
6721 Res = (Groups[1] + I128 + Groups[3]).str();
6722 }
6723 }
6724
6725 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6726 // Raising the alignment is safe because Clang did not produce f80 values in
6727 // the MSVC environment before this upgrade was added.
6728 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6729 StringRef Ref = Res;
6730 auto I = Ref.find("-f80:32-");
6731 if (I != StringRef::npos)
6732 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6733 }
6734
6735 return Res;
6736}
6737
6738void llvm::UpgradeAttributes(AttrBuilder &B) {
6739 StringRef FramePointer;
6740 Attribute A = B.getAttribute("no-frame-pointer-elim");
6741 if (A.isValid()) {
6742 // The value can be "true" or "false".
6743 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6744 B.removeAttribute("no-frame-pointer-elim");
6745 }
6746 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6747 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6748 if (FramePointer != "all")
6749 FramePointer = "non-leaf";
6750 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6751 }
6752 if (!FramePointer.empty())
6753 B.addAttribute("frame-pointer", FramePointer);
6754
6755 A = B.getAttribute("null-pointer-is-valid");
6756 if (A.isValid()) {
6757 // The value can be "true" or "false".
6758 bool NullPointerIsValid = A.getValueAsString() == "true";
6759 B.removeAttribute("null-pointer-is-valid");
6760 if (NullPointerIsValid)
6761 B.addAttribute(Attribute::NullPointerIsValid);
6762 }
6763
6764 A = B.getAttribute("uniform-work-group-size");
6765 if (A.isValid()) {
6766 StringRef Val = A.getValueAsString();
6767 if (!Val.empty()) {
6768 bool IsTrue = Val == "true";
6769 B.removeAttribute("uniform-work-group-size");
6770 if (IsTrue)
6771 B.addAttribute("uniform-work-group-size");
6772 }
6773 }
6774}
6775
6776void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6777 // clang.arc.attachedcall bundles are now required to have an operand.
6778 // If they don't, it's okay to drop them entirely: when there is an operand,
6779 // the "attachedcall" is meaningful and required, but without an operand,
6780 // it's just a marker NOP. Dropping it merely prevents an optimization.
6781 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6782 return OBD.getTag() == "clang.arc.attachedcall" &&
6783 OBD.inputs().empty();
6784 });
6785}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:622
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:895
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:427
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:713
bool use_empty() const
Definition Value.h:347
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106