LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
52#include "llvm/Support/Regex.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(llvm::errs());
72 llvm::errs() << "\n";
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(0);
83 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(F);
88 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(F);
104 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(F);
117 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
132 return false;
133
134 rename(F);
135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(F);
173 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
174 return true;
175}
176
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front("avx."))
184 return (Name.starts_with("blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with("movnt.") || // Added in 3.2
189 Name.starts_with("sqrt.p") || // Added in 7.0
190 Name.starts_with("storeu.") || // Added in 3.9
191 Name.starts_with("vbroadcast.s") || // Added in 3.5
192 Name.starts_with("vbroadcastf128") || // Added in 4.0
193 Name.starts_with("vextractf128.") || // Added in 3.7
194 Name.starts_with("vinsertf128.") || // Added in 3.7
195 Name.starts_with("vperm2f128.") || // Added in 6.0
196 Name.starts_with("vpermil.")); // Added in 3.1
197
198 if (Name.consume_front("avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with("pabs.") || // Added in 6.0
201 Name.starts_with("padds.") || // Added in 8.0
202 Name.starts_with("paddus.") || // Added in 8.0
203 Name.starts_with("pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with("pbroadcast") || // Added in 3.8
206 Name.starts_with("pcmpeq.") || // Added in 3.1
207 Name.starts_with("pcmpgt.") || // Added in 3.1
208 Name.starts_with("pmax") || // Added in 3.9
209 Name.starts_with("pmin") || // Added in 3.9
210 Name.starts_with("pmovsx") || // Added in 3.9
211 Name.starts_with("pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with("psll.dq") || // Added in 3.7
215 Name.starts_with("psrl.dq") || // Added in 3.7
216 Name.starts_with("psubs.") || // Added in 8.0
217 Name.starts_with("psubus.") || // Added in 8.0
218 Name.starts_with("vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front("avx512.")) {
225 if (Name.consume_front("mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with("and.") || // Added in 3.9
229 Name.starts_with("andn.") || // Added in 3.9
230 Name.starts_with("broadcast.s") || // Added in 3.9
231 Name.starts_with("broadcastf32x4.") || // Added in 6.0
232 Name.starts_with("broadcastf32x8.") || // Added in 6.0
233 Name.starts_with("broadcastf64x2.") || // Added in 6.0
234 Name.starts_with("broadcastf64x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x8.") || // Added in 6.0
237 Name.starts_with("broadcasti64x2.") || // Added in 6.0
238 Name.starts_with("broadcasti64x4.") || // Added in 6.0
239 Name.starts_with("cmp.b") || // Added in 5.0
240 Name.starts_with("cmp.d") || // Added in 5.0
241 Name.starts_with("cmp.q") || // Added in 5.0
242 Name.starts_with("cmp.w") || // Added in 5.0
243 Name.starts_with("compress.b") || // Added in 9.0
244 Name.starts_with("compress.d") || // Added in 9.0
245 Name.starts_with("compress.p") || // Added in 9.0
246 Name.starts_with("compress.q") || // Added in 9.0
247 Name.starts_with("compress.store.") || // Added in 7.0
248 Name.starts_with("compress.w") || // Added in 9.0
249 Name.starts_with("conflict.") || // Added in 9.0
250 Name.starts_with("cvtdq2pd.") || // Added in 4.0
251 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with("cvtudq2pd.") || // Added in 4.0
263 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with("dbpsadbw.") || // Added in 7.0
268 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with("expand.b") || // Added in 9.0
270 Name.starts_with("expand.d") || // Added in 9.0
271 Name.starts_with("expand.load.") || // Added in 7.0
272 Name.starts_with("expand.p") || // Added in 9.0
273 Name.starts_with("expand.q") || // Added in 9.0
274 Name.starts_with("expand.w") || // Added in 9.0
275 Name.starts_with("fpclass.p") || // Added in 7.0
276 Name.starts_with("insert") || // Added in 4.0
277 Name.starts_with("load.") || // Added in 3.9
278 Name.starts_with("loadu.") || // Added in 3.9
279 Name.starts_with("lzcnt.") || // Added in 5.0
280 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("movddup") || // Added in 3.9
283 Name.starts_with("move.s") || // Added in 4.0
284 Name.starts_with("movshdup") || // Added in 3.9
285 Name.starts_with("movsldup") || // Added in 3.9
286 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with("or.") || // Added in 3.9
288 Name.starts_with("pabs.") || // Added in 6.0
289 Name.starts_with("packssdw.") || // Added in 5.0
290 Name.starts_with("packsswb.") || // Added in 5.0
291 Name.starts_with("packusdw.") || // Added in 5.0
292 Name.starts_with("packuswb.") || // Added in 5.0
293 Name.starts_with("padd.") || // Added in 4.0
294 Name.starts_with("padds.") || // Added in 8.0
295 Name.starts_with("paddus.") || // Added in 8.0
296 Name.starts_with("palignr.") || // Added in 3.9
297 Name.starts_with("pand.") || // Added in 3.9
298 Name.starts_with("pandn.") || // Added in 3.9
299 Name.starts_with("pavg") || // Added in 6.0
300 Name.starts_with("pbroadcast") || // Added in 6.0
301 Name.starts_with("pcmpeq.") || // Added in 3.9
302 Name.starts_with("pcmpgt.") || // Added in 3.9
303 Name.starts_with("perm.df.") || // Added in 3.9
304 Name.starts_with("perm.di.") || // Added in 3.9
305 Name.starts_with("permvar.") || // Added in 7.0
306 Name.starts_with("pmaddubs.w.") || // Added in 7.0
307 Name.starts_with("pmaddw.d.") || // Added in 7.0
308 Name.starts_with("pmax") || // Added in 4.0
309 Name.starts_with("pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with("pmovsx") || // Added in 4.0
315 Name.starts_with("pmovzx") || // Added in 4.0
316 Name.starts_with("pmul.dq.") || // Added in 4.0
317 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with("pmulh.w.") || // Added in 7.0
319 Name.starts_with("pmulhu.w.") || // Added in 7.0
320 Name.starts_with("pmull.") || // Added in 4.0
321 Name.starts_with("pmultishift.qb.") || // Added in 8.0
322 Name.starts_with("pmulu.dq.") || // Added in 4.0
323 Name.starts_with("por.") || // Added in 3.9
324 Name.starts_with("prol.") || // Added in 8.0
325 Name.starts_with("prolv.") || // Added in 8.0
326 Name.starts_with("pror.") || // Added in 8.0
327 Name.starts_with("prorv.") || // Added in 8.0
328 Name.starts_with("pshuf.b.") || // Added in 4.0
329 Name.starts_with("pshuf.d.") || // Added in 3.9
330 Name.starts_with("pshufh.w.") || // Added in 3.9
331 Name.starts_with("pshufl.w.") || // Added in 3.9
332 Name.starts_with("psll.d") || // Added in 4.0
333 Name.starts_with("psll.q") || // Added in 4.0
334 Name.starts_with("psll.w") || // Added in 4.0
335 Name.starts_with("pslli") || // Added in 4.0
336 Name.starts_with("psllv") || // Added in 4.0
337 Name.starts_with("psra.d") || // Added in 4.0
338 Name.starts_with("psra.q") || // Added in 4.0
339 Name.starts_with("psra.w") || // Added in 4.0
340 Name.starts_with("psrai") || // Added in 4.0
341 Name.starts_with("psrav") || // Added in 4.0
342 Name.starts_with("psrl.d") || // Added in 4.0
343 Name.starts_with("psrl.q") || // Added in 4.0
344 Name.starts_with("psrl.w") || // Added in 4.0
345 Name.starts_with("psrli") || // Added in 4.0
346 Name.starts_with("psrlv") || // Added in 4.0
347 Name.starts_with("psub.") || // Added in 4.0
348 Name.starts_with("psubs.") || // Added in 8.0
349 Name.starts_with("psubus.") || // Added in 8.0
350 Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("punpckh") || // Added in 3.9
352 Name.starts_with("punpckl") || // Added in 3.9
353 Name.starts_with("pxor.") || // Added in 3.9
354 Name.starts_with("shuf.f") || // Added in 6.0
355 Name.starts_with("shuf.i") || // Added in 6.0
356 Name.starts_with("shuf.p") || // Added in 4.0
357 Name.starts_with("sqrt.p") || // Added in 7.0
358 Name.starts_with("store.b.") || // Added in 3.9
359 Name.starts_with("store.d.") || // Added in 3.9
360 Name.starts_with("store.p") || // Added in 3.9
361 Name.starts_with("store.q.") || // Added in 3.9
362 Name.starts_with("store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with("storeu.") || // Added in 3.9
365 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with("ucmp.") || // Added in 5.0
367 Name.starts_with("unpckh.") || // Added in 3.9
368 Name.starts_with("unpckl.") || // Added in 3.9
369 Name.starts_with("valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with("vextract") || // Added in 4.0
373 Name.starts_with("vfmadd.") || // Added in 7.0
374 Name.starts_with("vfmaddsub.") || // Added in 7.0
375 Name.starts_with("vfnmadd.") || // Added in 7.0
376 Name.starts_with("vfnmsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermi2var.") || // Added in 7.0
382 Name.starts_with("vpermil.p") || // Added in 3.9
383 Name.starts_with("vpermilvar.") || // Added in 4.0
384 Name.starts_with("vpermt2var.") || // Added in 7.0
385 Name.starts_with("vpmadd52") || // Added in 7.0
386 Name.starts_with("vpshld.") || // Added in 7.0
387 Name.starts_with("vpshldv.") || // Added in 8.0
388 Name.starts_with("vpshrd.") || // Added in 7.0
389 Name.starts_with("vpshrdv.") || // Added in 8.0
390 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with("xor.")); // Added in 3.9
392
393 if (Name.consume_front("mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with("vfmadd.") || // Added in 7.0
396 Name.starts_with("vfmaddsub.") || // Added in 7.0
397 Name.starts_with("vfmsub.") || // Added in 7.0
398 Name.starts_with("vfmsubadd.") || // Added in 7.0
399 Name.starts_with("vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front("maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with("pternlog.") || // Added in 7.0
404 Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmaddsub.") || // Added in 7.0
406 Name.starts_with("vpdpbusd.") || // Added in 7.0
407 Name.starts_with("vpdpbusds.") || // Added in 7.0
408 Name.starts_with("vpdpwssd.") || // Added in 7.0
409 Name.starts_with("vpdpwssds.") || // Added in 7.0
410 Name.starts_with("vpermt2var.") || // Added in 7.0
411 Name.starts_with("vpmadd52") || // Added in 7.0
412 Name.starts_with("vpshldv.") || // Added in 8.0
413 Name.starts_with("vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with("broadcastm") || // Added in 6.0
420 Name.starts_with("cmp.p") || // Added in 12.0
421 Name.starts_with("cvtb2mask.") || // Added in 7.0
422 Name.starts_with("cvtd2mask.") || // Added in 7.0
423 Name.starts_with("cvtmask2") || // Added in 5.0
424 Name.starts_with("cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with("cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with("kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with("padds.") || // Added in 8.0
437 Name.starts_with("pbroadcast") || // Added in 3.9
438 Name.starts_with("prol") || // Added in 8.0
439 Name.starts_with("pror") || // Added in 8.0
440 Name.starts_with("psll.dq") || // Added in 3.9
441 Name.starts_with("psrl.dq") || // Added in 3.9
442 Name.starts_with("psubs.") || // Added in 8.0
443 Name.starts_with("ptestm") || // Added in 6.0
444 Name.starts_with("ptestnm") || // Added in 6.0
445 Name.starts_with("storent.") || // Added in 3.9
446 Name.starts_with("vbroadcast.s") || // Added in 7.0
447 Name.starts_with("vpshld.") || // Added in 8.0
448 Name.starts_with("vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front("fma."))
452 return (Name.starts_with("vfmadd.") || // Added in 7.0
453 Name.starts_with("vfmsub.") || // Added in 7.0
454 Name.starts_with("vfmsubadd.") || // Added in 7.0
455 Name.starts_with("vfnmadd.") || // Added in 7.0
456 Name.starts_with("vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front("fma4."))
459 return Name.starts_with("vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front("sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with("storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front("sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with("padds.") || // Added in 8.0
483 Name.starts_with("paddus.") || // Added in 8.0
484 Name.starts_with("pcmpeq.") || // Added in 3.1
485 Name.starts_with("pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with("pshuf") || // Added in 3.9
492 Name.starts_with("psll.dq") || // Added in 3.7
493 Name.starts_with("psrl.dq") || // Added in 3.7
494 Name.starts_with("psubs.") || // Added in 8.0
495 Name.starts_with("psubus.") || // Added in 8.0
496 Name.starts_with("sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with("storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front("sse41."))
503 return (Name.starts_with("blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with("pmovsx") || // Added in 3.8
515 Name.starts_with("pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front("sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front("sse4a."))
522 return Name.starts_with("movnt."); // Added in 3.9
523
524 if (Name.consume_front("ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front("xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with("vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with("vcvtph2ps.")); // Added in 11.0
542}
543
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front("x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(F);
561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
562 Intrinsic::x86_rdtscp);
563 return true;
564 }
565
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
571 .Case("c", Intrinsic::x86_sse41_ptestc)
572 .Case("z", Intrinsic::x86_sse41_ptestz)
573 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
576 return upgradePTESTIntrinsic(F, ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
586 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
594 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
595
596 if (Name.consume_front("avx512.")) {
597 if (Name.consume_front("mask.cmp.")) {
598 // Added in 7.0
600 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 return upgradeX86MaskedFPCompare(F, ID, NewFn);
609 } else if (Name.starts_with("vpdpbusd.") ||
610 Name.starts_with("vpdpbusds.")) {
611 // Added in 21.1
613 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
622 } else if (Name.starts_with("vpdpwssd.") ||
623 Name.starts_with("vpdpwssds.")) {
624 // Added in 21.1
626 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
634 return upgradeX86MultiplyAddWords(F, ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front("avx2.")) {
640 if (Name.consume_front("vpdpb")) {
641 // Added in 21.1
643 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
658 } else if (Name.consume_front("vpdpw")) {
659 // Added in 21.1
661 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
675 return upgradeX86MultiplyAddWords(F, ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front("avx10.")) {
681 if (Name.consume_front("vpdpb")) {
682 // Added in 21.1
684 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
693 } else if (Name.consume_front("vpdpw")) {
695 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
703 return upgradeX86MultiplyAddWords(F, ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front("avx512bf16.")) {
709 // Added in 9.0
711 .Case("cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case("cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case("cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case("mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case("cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case("cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
725 return upgradeX86BF16Intrinsic(F, ID, NewFn);
726
727 // Added in 9.0
729 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
734 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front("xop.")) {
740 if (Name.starts_with("vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
759 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
762
764 rename(F);
765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
773 Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with("rbit")) {
786 // '(arm|aarch64).rbit'.
788 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
795 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front("neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front("bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
808 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
809 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 {F->getReturnType(),
819 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front("bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
834 .Case("mla",
835 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case("lalb",
838 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case("lalt",
841 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
861 .StartsWith("vclz.", Intrinsic::ctlz)
862 .StartsWith("vcnt.", Intrinsic::ctpop)
863 .StartsWith("vqadds.", Intrinsic::sadd_sat)
864 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
865 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
866 .StartsWith("vqsubu.", Intrinsic::usub_sat)
867 .StartsWith("vrinta.", Intrinsic::round)
868 .StartsWith("vrintn.", Intrinsic::roundeven)
869 .StartsWith("vrintm.", Intrinsic::floor)
870 .StartsWith("vrintp.", Intrinsic::ceil)
871 .StartsWith("vrintx.", Intrinsic::rint)
872 .StartsWith("vrintz.", Intrinsic::trunc)
875 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
876 F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front("vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
884 if (vstRegex.match(Name, &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
898 else
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front("mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with("vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front("vldr.gather.");
937 if (IsGather || Name.consume_front("vstr.scatter.")) {
938 if (Name.consume_front("base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front("wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front("offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front("cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
975 .StartsWith("frintn", Intrinsic::roundeven)
976 .StartsWith("rbit", Intrinsic::bitreverse)
979 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
980 F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with("addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with("bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front("sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front("bf")) {
1007 if (Name.consume_back(".lane")) {
1008 // 'aarch64.sve.bf*.lane'.
1011 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1012 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1013 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1016 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1017 return true;
1018 }
1019 return false; // No other 'aarch64.sve.bf*.lane'.
1020 }
1021 return false; // No other 'aarch64.sve.bf*'.
1022 }
1023
1024 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1025 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1026 NewFn = nullptr;
1027 return true;
1028 }
1029
1030 if (Name.consume_front("addqv")) {
1031 // 'aarch64.sve.addqv'.
1032 if (!F->getReturnType()->isFPOrFPVectorTy())
1033 return false;
1034
1035 auto Args = F->getFunctionType()->params();
1036 Type *Tys[] = {F->getReturnType(), Args[1]};
1038 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1039 return true;
1040 }
1041
1042 if (Name.consume_front("ld")) {
1043 // 'aarch64.sve.ld*'.
1044 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1045 if (LdRegex.match(Name)) {
1046 Type *ScalarTy =
1047 cast<VectorType>(F->getReturnType())->getElementType();
1048 ElementCount EC =
1049 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1050 assert(F->arg_size() == 2 &&
1051 "Expected 2 arguments for ld* intrinsic.");
1052 Type *PtrTy = F->getArg(1)->getType();
1053 Type *Ty = VectorType::get(ScalarTy, EC);
1054 static const Intrinsic::ID LoadIDs[] = {
1055 Intrinsic::aarch64_sve_ld2_sret,
1056 Intrinsic::aarch64_sve_ld3_sret,
1057 Intrinsic::aarch64_sve_ld4_sret,
1058 };
1060 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1061 return true;
1062 }
1063 return false; // No other 'aarch64.sve.ld*'.
1064 }
1065
1066 if (Name.consume_front("tuple.")) {
1067 // 'aarch64.sve.tuple.*'.
1068 if (Name.starts_with("get")) {
1069 // 'aarch64.sve.tuple.get*'.
1070 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1072 F->getParent(), Intrinsic::vector_extract, Tys);
1073 return true;
1074 }
1075
1076 if (Name.starts_with("set")) {
1077 // 'aarch64.sve.tuple.set*'.
1078 auto Args = F->getFunctionType()->params();
1079 Type *Tys[] = {Args[0], Args[2], Args[1]};
1081 F->getParent(), Intrinsic::vector_insert, Tys);
1082 return true;
1083 }
1084
1085 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1086 if (CreateTupleRegex.match(Name)) {
1087 // 'aarch64.sve.tuple.create*'.
1088 auto Args = F->getFunctionType()->params();
1089 Type *Tys[] = {F->getReturnType(), Args[1]};
1091 F->getParent(), Intrinsic::vector_insert, Tys);
1092 return true;
1093 }
1094 return false; // No other 'aarch64.sve.tuple.*'.
1095 }
1096
1097 if (Name.starts_with("rev.nxv")) {
1098 // 'aarch64.sve.rev.<Ty>'
1100 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1101 return true;
1102 }
1103
1104 return false; // No other 'aarch64.sve.*'.
1105 }
1106 }
1107 return false; // No other 'arm.*', 'aarch64.*'.
1108}
1109
1111 StringRef Name) {
1112 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1115 .Case("im2col.3d",
1116 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1117 .Case("im2col.4d",
1118 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1119 .Case("im2col.5d",
1120 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1121 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1122 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1123 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1124 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1125 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1127
1129 return ID;
1130
1131 // These intrinsics may need upgrade for two reasons:
1132 // (1) When the address-space of the first argument is shared[AS=3]
1133 // (and we upgrade it to use shared_cluster address-space[AS=7])
1134 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1136 return ID;
1137
1138 // (2) When there are only two boolean flag arguments at the end:
1139 //
1140 // The last three parameters of the older version of these
1141 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1142 //
1143 // The newer version reads as:
1144 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1145 //
1146 // So, when the type of the [N-3]rd argument is "not i1", then
1147 // it is the older version and we need to upgrade.
1148 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1149 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1150 if (!ArgType->isIntegerTy(1))
1151 return ID;
1152 }
1153
1155}
1156
1158 StringRef Name) {
1159 if (Name.consume_front("mapa.shared.cluster"))
1160 if (F->getReturnType()->getPointerAddressSpace() ==
1162 return Intrinsic::nvvm_mapa_shared_cluster;
1163
1164 if (Name.consume_front("cp.async.bulk.")) {
1167 .Case("global.to.shared.cluster",
1168 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1169 .Case("shared.cta.to.cluster",
1170 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1172
1174 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1176 return ID;
1177 }
1178
1180}
1181
1183 if (Name.consume_front("fma.rn."))
1184 return StringSwitch<Intrinsic::ID>(Name)
1185 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1186 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1187 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1188 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1190
1191 if (Name.consume_front("fmax."))
1192 return StringSwitch<Intrinsic::ID>(Name)
1193 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1194 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1195 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1196 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1197 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1198 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1199 .Case("ftz.nan.xorsign.abs.bf16",
1200 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1201 .Case("ftz.nan.xorsign.abs.bf16x2",
1202 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1203 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1204 .Case("ftz.xorsign.abs.bf16x2",
1205 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1206 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1207 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1208 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1209 .Case("nan.xorsign.abs.bf16x2",
1210 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1211 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1212 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1214
1215 if (Name.consume_front("fmin."))
1216 return StringSwitch<Intrinsic::ID>(Name)
1217 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1218 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1219 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1220 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1221 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1222 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1223 .Case("ftz.nan.xorsign.abs.bf16",
1224 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1225 .Case("ftz.nan.xorsign.abs.bf16x2",
1226 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1227 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1228 .Case("ftz.xorsign.abs.bf16x2",
1229 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1230 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1231 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1232 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1233 .Case("nan.xorsign.abs.bf16x2",
1234 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1235 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1236 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1238
1239 if (Name.consume_front("neg."))
1240 return StringSwitch<Intrinsic::ID>(Name)
1241 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1242 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1244
1246}
1247
1249 return Name.consume_front("local") || Name.consume_front("shared") ||
1250 Name.consume_front("global") || Name.consume_front("constant") ||
1251 Name.consume_front("param");
1252}
1253
1255 const FunctionType *FuncTy) {
1256 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1257 if (Name.starts_with("to.fp16")) {
1258 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1259 HalfTy) &&
1260 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1261 FuncTy->getReturnType());
1262 }
1263
1264 if (Name.starts_with("from.fp16")) {
1265 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1266 HalfTy) &&
1267 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1268 FuncTy->getReturnType());
1269 }
1270
1271 return false;
1272}
1273
1275 bool CanUpgradeDebugIntrinsicsToRecords) {
1276 assert(F && "Illegal to upgrade a non-existent Function.");
1277
1278 StringRef Name = F->getName();
1279
1280 // Quickly eliminate it, if it's not a candidate.
1281 if (!Name.consume_front("llvm.") || Name.empty())
1282 return false;
1283
1284 switch (Name[0]) {
1285 default: break;
1286 case 'a': {
1287 bool IsArm = Name.consume_front("arm.");
1288 if (IsArm || Name.consume_front("aarch64.")) {
1289 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1290 return true;
1291 break;
1292 }
1293
1294 if (Name.consume_front("amdgcn.")) {
1295 if (Name == "alignbit") {
1296 // Target specific intrinsic became redundant
1298 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1299 return true;
1300 }
1301
1302 if (Name.consume_front("atomic.")) {
1303 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1304 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1305 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1306 // and usub_sat so there's no new declaration.
1307 NewFn = nullptr;
1308 return true;
1309 }
1310 break; // No other 'amdgcn.atomic.*'
1311 }
1312
1313 switch (F->getIntrinsicID()) {
1314 default:
1315 break;
1316 // Legacy wmma iu intrinsics without the optional clamp operand.
1317 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1318 if (F->arg_size() == 7) {
1319 NewFn = nullptr;
1320 return true;
1321 }
1322 break;
1323 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1324 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1325 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1326 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1327 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1328 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1329 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1330 if (F->arg_size() == 8) {
1331 NewFn = nullptr;
1332 return true;
1333 }
1334 break;
1335 }
1336
1337 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1338 Name.consume_front("flat.atomic.")) {
1339 if (Name.starts_with("fadd") ||
1340 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1341 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1342 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1343 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1344 // declaration.
1345 NewFn = nullptr;
1346 return true;
1347 }
1348 }
1349
1350 if (Name.starts_with("ldexp.")) {
1351 // Target specific intrinsic became redundant
1353 F->getParent(), Intrinsic::ldexp,
1354 {F->getReturnType(), F->getArg(1)->getType()});
1355 return true;
1356 }
1357 break; // No other 'amdgcn.*'
1358 }
1359
1360 break;
1361 }
1362 case 'c': {
1363 if (F->arg_size() == 1) {
1364 if (Name.consume_front("convert.")) {
1365 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1366 NewFn = nullptr;
1367 return true;
1368 }
1369 }
1370
1372 .StartsWith("ctlz.", Intrinsic::ctlz)
1373 .StartsWith("cttz.", Intrinsic::cttz)
1376 rename(F);
1377 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1378 F->arg_begin()->getType());
1379 return true;
1380 }
1381 }
1382
1383 if (F->arg_size() == 2 && Name == "coro.end") {
1384 rename(F);
1385 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1386 Intrinsic::coro_end);
1387 return true;
1388 }
1389
1390 break;
1391 }
1392 case 'd':
1393 if (Name.consume_front("dbg.")) {
1394 // Mark debug intrinsics for upgrade to new debug format.
1395 if (CanUpgradeDebugIntrinsicsToRecords) {
1396 if (Name == "addr" || Name == "value" || Name == "assign" ||
1397 Name == "declare" || Name == "label") {
1398 // There's no function to replace these with.
1399 NewFn = nullptr;
1400 // But we do want these to get upgraded.
1401 return true;
1402 }
1403 }
1404 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1405 // converted to DbgVariableRecords later.
1406 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1407 rename(F);
1408 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1409 Intrinsic::dbg_value);
1410 return true;
1411 }
1412 break; // No other 'dbg.*'.
1413 }
1414 break;
1415 case 'e':
1416 if (Name.consume_front("experimental.vector.")) {
1419 // Skip over extract.last.active, otherwise it will be 'upgraded'
1420 // to a regular vector extract which is a different operation.
1421 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1422 .StartsWith("extract.", Intrinsic::vector_extract)
1423 .StartsWith("insert.", Intrinsic::vector_insert)
1424 .StartsWith("reverse.", Intrinsic::vector_reverse)
1425 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1426 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1427 .StartsWith("partial.reduce.add",
1428 Intrinsic::vector_partial_reduce_add)
1431 const auto *FT = F->getFunctionType();
1433 if (ID == Intrinsic::vector_extract ||
1434 ID == Intrinsic::vector_interleave2)
1435 // Extracting overloads the return type.
1436 Tys.push_back(FT->getReturnType());
1437 if (ID != Intrinsic::vector_interleave2)
1438 Tys.push_back(FT->getParamType(0));
1439 if (ID == Intrinsic::vector_insert ||
1440 ID == Intrinsic::vector_partial_reduce_add)
1441 // Inserting overloads the inserted type.
1442 Tys.push_back(FT->getParamType(1));
1443 rename(F);
1444 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1445 return true;
1446 }
1447
1448 if (Name.consume_front("reduce.")) {
1450 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1451 if (R.match(Name, &Groups))
1453 .Case("add", Intrinsic::vector_reduce_add)
1454 .Case("mul", Intrinsic::vector_reduce_mul)
1455 .Case("and", Intrinsic::vector_reduce_and)
1456 .Case("or", Intrinsic::vector_reduce_or)
1457 .Case("xor", Intrinsic::vector_reduce_xor)
1458 .Case("smax", Intrinsic::vector_reduce_smax)
1459 .Case("smin", Intrinsic::vector_reduce_smin)
1460 .Case("umax", Intrinsic::vector_reduce_umax)
1461 .Case("umin", Intrinsic::vector_reduce_umin)
1462 .Case("fmax", Intrinsic::vector_reduce_fmax)
1463 .Case("fmin", Intrinsic::vector_reduce_fmin)
1465
1466 bool V2 = false;
1468 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1469 Groups.clear();
1470 V2 = true;
1471 if (R2.match(Name, &Groups))
1473 .Case("fadd", Intrinsic::vector_reduce_fadd)
1474 .Case("fmul", Intrinsic::vector_reduce_fmul)
1476 }
1478 rename(F);
1479 auto Args = F->getFunctionType()->params();
1480 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1481 {Args[V2 ? 1 : 0]});
1482 return true;
1483 }
1484 break; // No other 'expermental.vector.reduce.*'.
1485 }
1486
1487 if (Name.consume_front("splice"))
1488 return true;
1489 break; // No other 'experimental.vector.*'.
1490 }
1491 if (Name.consume_front("experimental.stepvector.")) {
1492 Intrinsic::ID ID = Intrinsic::stepvector;
1493 rename(F);
1495 F->getParent(), ID, F->getFunctionType()->getReturnType());
1496 return true;
1497 }
1498 break; // No other 'e*'.
1499 case 'f':
1500 if (Name.starts_with("flt.rounds")) {
1501 rename(F);
1502 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1503 Intrinsic::get_rounding);
1504 return true;
1505 }
1506 break;
1507 case 'i':
1508 if (Name.starts_with("invariant.group.barrier")) {
1509 // Rename invariant.group.barrier to launder.invariant.group
1510 auto Args = F->getFunctionType()->params();
1511 Type* ObjectPtr[1] = {Args[0]};
1512 rename(F);
1514 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1515 return true;
1516 }
1517 break;
1518 case 'l':
1519 if ((Name.starts_with("lifetime.start") ||
1520 Name.starts_with("lifetime.end")) &&
1521 F->arg_size() == 2) {
1522 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1523 ? Intrinsic::lifetime_start
1524 : Intrinsic::lifetime_end;
1525 rename(F);
1526 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1527 F->getArg(0)->getType());
1528 return true;
1529 }
1530 break;
1531 case 'm': {
1532 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1533 // alignment parameter to embedding the alignment as an attribute of
1534 // the pointer args.
1535 if (unsigned ID = StringSwitch<unsigned>(Name)
1536 .StartsWith("memcpy.", Intrinsic::memcpy)
1537 .StartsWith("memmove.", Intrinsic::memmove)
1538 .Default(0)) {
1539 if (F->arg_size() == 5) {
1540 rename(F);
1541 // Get the types of dest, src, and len
1542 ArrayRef<Type *> ParamTypes =
1543 F->getFunctionType()->params().slice(0, 3);
1544 NewFn =
1545 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1546 return true;
1547 }
1548 }
1549 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1550 rename(F);
1551 // Get the types of dest, and len
1552 const auto *FT = F->getFunctionType();
1553 Type *ParamTypes[2] = {
1554 FT->getParamType(0), // Dest
1555 FT->getParamType(2) // len
1556 };
1557 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1558 Intrinsic::memset, ParamTypes);
1559 return true;
1560 }
1561
1562 unsigned MaskedID =
1564 .StartsWith("masked.load", Intrinsic::masked_load)
1565 .StartsWith("masked.gather", Intrinsic::masked_gather)
1566 .StartsWith("masked.store", Intrinsic::masked_store)
1567 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1568 .Default(0);
1569 if (MaskedID && F->arg_size() == 4) {
1570 rename(F);
1571 if (MaskedID == Intrinsic::masked_load ||
1572 MaskedID == Intrinsic::masked_gather) {
1574 F->getParent(), MaskedID,
1575 {F->getReturnType(), F->getArg(0)->getType()});
1576 return true;
1577 }
1579 F->getParent(), MaskedID,
1580 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1581 return true;
1582 }
1583 break;
1584 }
1585 case 'n': {
1586 if (Name.consume_front("nvvm.")) {
1587 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1588 if (F->arg_size() == 1) {
1589 Intrinsic::ID IID =
1591 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1592 .Case("clz.i", Intrinsic::ctlz)
1593 .Case("popc.i", Intrinsic::ctpop)
1595 if (IID != Intrinsic::not_intrinsic) {
1596 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1597 {F->getReturnType()});
1598 return true;
1599 }
1600 } else if (F->arg_size() == 2) {
1601 Intrinsic::ID IID =
1603 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1604 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1605 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1606 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1608 if (IID != Intrinsic::not_intrinsic) {
1609 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1610 {F->getReturnType()});
1611 return true;
1612 }
1613 }
1614
1615 // Check for nvvm intrinsics that need a return type adjustment.
1616 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1618 if (IID != Intrinsic::not_intrinsic) {
1619 NewFn = nullptr;
1620 return true;
1621 }
1622 }
1623
1624 // Upgrade Distributed Shared Memory Intrinsics
1626 if (IID != Intrinsic::not_intrinsic) {
1627 rename(F);
1628 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1629 return true;
1630 }
1631
1632 // Upgrade TMA copy G2S Intrinsics
1634 if (IID != Intrinsic::not_intrinsic) {
1635 rename(F);
1636 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1637 return true;
1638 }
1639
1640 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1641 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1642 //
1643 // TODO: We could add lohi.i2d.
1644 bool Expand = false;
1645 if (Name.consume_front("abs."))
1646 // nvvm.abs.{i,ii}
1647 Expand =
1648 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1649 else if (Name.consume_front("fabs."))
1650 // nvvm.fabs.{f,ftz.f,d}
1651 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1652 else if (Name.consume_front("ex2.approx."))
1653 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1654 Expand =
1655 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1656 else if (Name.consume_front("atomic.load."))
1657 // nvvm.atomic.load.add.{f32,f64}.p
1658 // nvvm.atomic.load.{inc,dec}.32.p
1659 Expand = StringSwitch<bool>(Name)
1660 .StartsWith("add.f32.p", true)
1661 .StartsWith("add.f64.p", true)
1662 .StartsWith("inc.32.p", true)
1663 .StartsWith("dec.32.p", true)
1664 .Default(false);
1665 else if (Name.consume_front("bitcast."))
1666 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1667 Expand =
1668 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1669 else if (Name.consume_front("rotate."))
1670 // nvvm.rotate.{b32,b64,right.b64}
1671 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1672 else if (Name.consume_front("ptr.gen.to."))
1673 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1674 Expand = consumeNVVMPtrAddrSpace(Name);
1675 else if (Name.consume_front("ptr."))
1676 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1677 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1678 else if (Name.consume_front("ldg.global."))
1679 // nvvm.ldg.global.{i,p,f}
1680 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1681 Name.starts_with("p."));
1682 else
1683 Expand = StringSwitch<bool>(Name)
1684 .Case("barrier0", true)
1685 .Case("barrier.n", true)
1686 .Case("barrier.sync.cnt", true)
1687 .Case("barrier.sync", true)
1688 .Case("barrier", true)
1689 .Case("bar.sync", true)
1690 .Case("barrier0.popc", true)
1691 .Case("barrier0.and", true)
1692 .Case("barrier0.or", true)
1693 .Case("clz.ll", true)
1694 .Case("popc.ll", true)
1695 .Case("h2f", true)
1696 .Case("swap.lo.hi.b64", true)
1697 .Case("tanh.approx.f32", true)
1698 .Default(false);
1699
1700 if (Expand) {
1701 NewFn = nullptr;
1702 return true;
1703 }
1704 break; // No other 'nvvm.*'.
1705 }
1706 break;
1707 }
1708 case 'o':
1709 if (Name.starts_with("objectsize.")) {
1710 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1711 if (F->arg_size() == 2 || F->arg_size() == 3) {
1712 rename(F);
1713 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1714 Intrinsic::objectsize, Tys);
1715 return true;
1716 }
1717 }
1718 break;
1719
1720 case 'p':
1721 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1722 rename(F);
1724 F->getParent(), Intrinsic::ptr_annotation,
1725 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1726 return true;
1727 }
1728 break;
1729
1730 case 'r': {
1731 if (Name.consume_front("riscv.")) {
1734 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1735 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1736 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1737 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1740 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1741 rename(F);
1742 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1743 return true;
1744 }
1745 break; // No other applicable upgrades.
1746 }
1747
1749 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1750 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1753 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1754 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1755 rename(F);
1756 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1757 return true;
1758 }
1759 break; // No other applicable upgrades.
1760 }
1761
1763 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1764 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1765 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1766 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1767 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1768 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1771 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1772 rename(F);
1773 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1774 return true;
1775 }
1776 break; // No other applicable upgrades.
1777 }
1778
1779 // Replace llvm.riscv.clmul with llvm.clmul.
1780 if (Name == "clmul.i32" || Name == "clmul.i64") {
1782 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1783 return true;
1784 }
1785
1786 break; // No other 'riscv.*' intrinsics
1787 }
1788 } break;
1789
1790 case 's':
1791 if (Name == "stackprotectorcheck") {
1792 NewFn = nullptr;
1793 return true;
1794 }
1795 break;
1796
1797 case 't':
1798 if (Name == "thread.pointer") {
1800 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1801 return true;
1802 }
1803 break;
1804
1805 case 'v': {
1806 if (Name == "var.annotation" && F->arg_size() == 4) {
1807 rename(F);
1809 F->getParent(), Intrinsic::var_annotation,
1810 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1811 return true;
1812 }
1813 if (Name.consume_front("vector.splice")) {
1814 if (Name.starts_with(".left") || Name.starts_with(".right"))
1815 break;
1816 return true;
1817 }
1818 break;
1819 }
1820
1821 case 'w':
1822 if (Name.consume_front("wasm.")) {
1825 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1826 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1827 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1830 rename(F);
1831 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1832 F->getReturnType());
1833 return true;
1834 }
1835
1836 if (Name.consume_front("dot.i8x16.i7x16.")) {
1838 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1839 .Case("add.signed",
1840 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1843 rename(F);
1844 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1845 return true;
1846 }
1847 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1848 }
1849 break; // No other 'wasm.*'.
1850 }
1851 break;
1852
1853 case 'x':
1854 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1855 return true;
1856 }
1857
1858 auto *ST = dyn_cast<StructType>(F->getReturnType());
1859 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1860 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1861 // Replace return type with literal non-packed struct. Only do this for
1862 // intrinsics declared to return a struct, not for intrinsics with
1863 // overloaded return type, in which case the exact struct type will be
1864 // mangled into the name.
1867 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1868 auto *FT = F->getFunctionType();
1869 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1870 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1871 std::string Name = F->getName().str();
1872 rename(F);
1873 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1874 Name, F->getParent());
1875
1876 // The new function may also need remangling.
1877 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1878 NewFn = *Result;
1879 return true;
1880 }
1881 }
1882
1883 // Remangle our intrinsic since we upgrade the mangling
1885 if (Result != std::nullopt) {
1886 NewFn = *Result;
1887 return true;
1888 }
1889
1890 // This may not belong here. This function is effectively being overloaded
1891 // to both detect an intrinsic which needs upgrading, and to provide the
1892 // upgraded form of the intrinsic. We should perhaps have two separate
1893 // functions for this.
1894 return false;
1895}
1896
1898 bool CanUpgradeDebugIntrinsicsToRecords) {
1899 NewFn = nullptr;
1900 bool Upgraded =
1901 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1902
1903 // Upgrade intrinsic attributes. This does not change the function.
1904 if (NewFn)
1905 F = NewFn;
1906 if (Intrinsic::ID id = F->getIntrinsicID()) {
1907 // Only do this if the intrinsic signature is valid.
1908 SmallVector<Type *> OverloadTys;
1909 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1910 F->setAttributes(
1911 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1912 }
1913 return Upgraded;
1914}
1915
1917 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1918 GV->getName() == "llvm.global_dtors")) ||
1919 !GV->hasInitializer())
1920 return nullptr;
1922 if (!ATy)
1923 return nullptr;
1925 if (!STy || STy->getNumElements() != 2)
1926 return nullptr;
1927
1928 LLVMContext &C = GV->getContext();
1929 IRBuilder<> IRB(C);
1930 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1931 IRB.getPtrTy());
1932 Constant *Init = GV->getInitializer();
1933 unsigned N = Init->getNumOperands();
1934 std::vector<Constant *> NewCtors(N);
1935 for (unsigned i = 0; i != N; ++i) {
1936 auto Ctor = cast<Constant>(Init->getOperand(i));
1937 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1938 Ctor->getAggregateElement(1),
1940 }
1941 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1942
1943 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1944 NewInit, GV->getName());
1945}
1946
1947// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1948// to byte shuffles.
1950 unsigned Shift) {
1951 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1952 unsigned NumElts = ResultTy->getNumElements() * 8;
1953
1954 // Bitcast from a 64-bit element type to a byte element type.
1955 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1956 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1957
1958 // We'll be shuffling in zeroes.
1959 Value *Res = Constant::getNullValue(VecTy);
1960
1961 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1962 // we'll just return the zero vector.
1963 if (Shift < 16) {
1964 int Idxs[64];
1965 // 256/512-bit version is split into 2/4 16-byte lanes.
1966 for (unsigned l = 0; l != NumElts; l += 16)
1967 for (unsigned i = 0; i != 16; ++i) {
1968 unsigned Idx = NumElts + i - Shift;
1969 if (Idx < NumElts)
1970 Idx -= NumElts - 16; // end of lane, switch operand.
1971 Idxs[l + i] = Idx + l;
1972 }
1973
1974 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1975 }
1976
1977 // Bitcast back to a 64-bit element type.
1978 return Builder.CreateBitCast(Res, ResultTy, "cast");
1979}
1980
1981// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1982// to byte shuffles.
1984 unsigned Shift) {
1985 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1986 unsigned NumElts = ResultTy->getNumElements() * 8;
1987
1988 // Bitcast from a 64-bit element type to a byte element type.
1989 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1990 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1991
1992 // We'll be shuffling in zeroes.
1993 Value *Res = Constant::getNullValue(VecTy);
1994
1995 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1996 // we'll just return the zero vector.
1997 if (Shift < 16) {
1998 int Idxs[64];
1999 // 256/512-bit version is split into 2/4 16-byte lanes.
2000 for (unsigned l = 0; l != NumElts; l += 16)
2001 for (unsigned i = 0; i != 16; ++i) {
2002 unsigned Idx = i + Shift;
2003 if (Idx >= 16)
2004 Idx += NumElts - 16; // end of lane, switch operand.
2005 Idxs[l + i] = Idx + l;
2006 }
2007
2008 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2009 }
2010
2011 // Bitcast back to a 64-bit element type.
2012 return Builder.CreateBitCast(Res, ResultTy, "cast");
2013}
2014
2015static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2016 unsigned NumElts) {
2017 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2019 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2020 Mask = Builder.CreateBitCast(Mask, MaskTy);
2021
2022 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2023 // i8 and we need to extract down to the right number of elements.
2024 if (NumElts <= 4) {
2025 int Indices[4];
2026 for (unsigned i = 0; i != NumElts; ++i)
2027 Indices[i] = i;
2028 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2029 "extract");
2030 }
2031
2032 return Mask;
2033}
2034
2035static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2036 Value *Op1) {
2037 // If the mask is all ones just emit the first operation.
2038 if (const auto *C = dyn_cast<Constant>(Mask))
2039 if (C->isAllOnesValue())
2040 return Op0;
2041
2042 Mask = getX86MaskVec(Builder, Mask,
2043 cast<FixedVectorType>(Op0->getType())->getNumElements());
2044 return Builder.CreateSelect(Mask, Op0, Op1);
2045}
2046
2047static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2048 Value *Op1) {
2049 // If the mask is all ones just emit the first operation.
2050 if (const auto *C = dyn_cast<Constant>(Mask))
2051 if (C->isAllOnesValue())
2052 return Op0;
2053
2054 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2055 Mask->getType()->getIntegerBitWidth());
2056 Mask = Builder.CreateBitCast(Mask, MaskTy);
2057 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2058 return Builder.CreateSelect(Mask, Op0, Op1);
2059}
2060
2061// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2062// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2063// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2065 Value *Op1, Value *Shift,
2066 Value *Passthru, Value *Mask,
2067 bool IsVALIGN) {
2068 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2069
2070 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2071 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2072 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2073 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2074
2075 // Mask the immediate for VALIGN.
2076 if (IsVALIGN)
2077 ShiftVal &= (NumElts - 1);
2078
2079 // If palignr is shifting the pair of vectors more than the size of two
2080 // lanes, emit zero.
2081 if (ShiftVal >= 32)
2083
2084 // If palignr is shifting the pair of input vectors more than one lane,
2085 // but less than two lanes, convert to shifting in zeroes.
2086 if (ShiftVal > 16) {
2087 ShiftVal -= 16;
2088 Op1 = Op0;
2090 }
2091
2092 int Indices[64];
2093 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2094 for (unsigned l = 0; l < NumElts; l += 16) {
2095 for (unsigned i = 0; i != 16; ++i) {
2096 unsigned Idx = ShiftVal + i;
2097 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2098 Idx += NumElts - 16; // End of lane, switch operand.
2099 Indices[l + i] = Idx + l;
2100 }
2101 }
2102
2103 Value *Align = Builder.CreateShuffleVector(
2104 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2105
2106 return emitX86Select(Builder, Mask, Align, Passthru);
2107}
2108
2110 bool ZeroMask, bool IndexForm) {
2111 Type *Ty = CI.getType();
2112 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2113 unsigned EltWidth = Ty->getScalarSizeInBits();
2114 bool IsFloat = Ty->isFPOrFPVectorTy();
2115 Intrinsic::ID IID;
2116 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2117 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2118 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2119 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2120 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2121 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2122 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2123 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2124 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2125 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2126 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2127 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2128 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2129 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2130 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2131 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2132 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2133 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2134 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2135 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2136 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2137 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2138 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2139 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2140 else if (VecWidth == 128 && EltWidth == 16)
2141 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2142 else if (VecWidth == 256 && EltWidth == 16)
2143 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2144 else if (VecWidth == 512 && EltWidth == 16)
2145 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2146 else if (VecWidth == 128 && EltWidth == 8)
2147 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2148 else if (VecWidth == 256 && EltWidth == 8)
2149 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2150 else if (VecWidth == 512 && EltWidth == 8)
2151 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2152 else
2153 llvm_unreachable("Unexpected intrinsic");
2154
2155 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2156 CI.getArgOperand(2) };
2157
2158 // If this isn't index form we need to swap operand 0 and 1.
2159 if (!IndexForm)
2160 std::swap(Args[0], Args[1]);
2161
2162 Value *V = Builder.CreateIntrinsic(IID, Args);
2163 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2164 : Builder.CreateBitCast(CI.getArgOperand(1),
2165 Ty);
2166 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2167}
2168
2170 Intrinsic::ID IID) {
2171 Type *Ty = CI.getType();
2172 Value *Op0 = CI.getOperand(0);
2173 Value *Op1 = CI.getOperand(1);
2174 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2175
2176 if (CI.arg_size() == 4) { // For masked intrinsics.
2177 Value *VecSrc = CI.getOperand(2);
2178 Value *Mask = CI.getOperand(3);
2179 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2180 }
2181 return Res;
2182}
2183
2185 bool IsRotateRight) {
2186 Type *Ty = CI.getType();
2187 Value *Src = CI.getArgOperand(0);
2188 Value *Amt = CI.getArgOperand(1);
2189
2190 // Amount may be scalar immediate, in which case create a splat vector.
2191 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2192 // we only care about the lowest log2 bits anyway.
2193 if (Amt->getType() != Ty) {
2194 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2195 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2196 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2197 }
2198
2199 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2200 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2201
2202 if (CI.arg_size() == 4) { // For masked intrinsics.
2203 Value *VecSrc = CI.getOperand(2);
2204 Value *Mask = CI.getOperand(3);
2205 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2206 }
2207 return Res;
2208}
2209
2210static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2211 bool IsSigned) {
2212 Type *Ty = CI.getType();
2213 Value *LHS = CI.getArgOperand(0);
2214 Value *RHS = CI.getArgOperand(1);
2215
2216 CmpInst::Predicate Pred;
2217 switch (Imm) {
2218 case 0x0:
2219 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2220 break;
2221 case 0x1:
2222 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2223 break;
2224 case 0x2:
2225 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2226 break;
2227 case 0x3:
2228 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2229 break;
2230 case 0x4:
2231 Pred = ICmpInst::ICMP_EQ;
2232 break;
2233 case 0x5:
2234 Pred = ICmpInst::ICMP_NE;
2235 break;
2236 case 0x6:
2237 return Constant::getNullValue(Ty); // FALSE
2238 case 0x7:
2239 return Constant::getAllOnesValue(Ty); // TRUE
2240 default:
2241 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2242 }
2243
2244 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2245 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2246 return Ext;
2247}
2248
2250 bool IsShiftRight, bool ZeroMask) {
2251 Type *Ty = CI.getType();
2252 Value *Op0 = CI.getArgOperand(0);
2253 Value *Op1 = CI.getArgOperand(1);
2254 Value *Amt = CI.getArgOperand(2);
2255
2256 if (IsShiftRight)
2257 std::swap(Op0, Op1);
2258
2259 // Amount may be scalar immediate, in which case create a splat vector.
2260 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2261 // we only care about the lowest log2 bits anyway.
2262 if (Amt->getType() != Ty) {
2263 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2264 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2265 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2266 }
2267
2268 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2269 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2270
2271 unsigned NumArgs = CI.arg_size();
2272 if (NumArgs >= 4) { // For masked intrinsics.
2273 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2274 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2275 CI.getArgOperand(0);
2276 Value *Mask = CI.getOperand(NumArgs - 1);
2277 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2278 }
2279 return Res;
2280}
2281
2283 Value *Mask, bool Aligned) {
2284 const Align Alignment =
2285 Aligned
2286 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2287 : Align(1);
2288
2289 // If the mask is all ones just emit a regular store.
2290 if (const auto *C = dyn_cast<Constant>(Mask))
2291 if (C->isAllOnesValue())
2292 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2293
2294 // Convert the mask from an integer type to a vector of i1.
2295 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2296 Mask = getX86MaskVec(Builder, Mask, NumElts);
2297 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2298}
2299
2301 Value *Passthru, Value *Mask, bool Aligned) {
2302 Type *ValTy = Passthru->getType();
2303 const Align Alignment =
2304 Aligned
2305 ? Align(
2307 8)
2308 : Align(1);
2309
2310 // If the mask is all ones just emit a regular store.
2311 if (const auto *C = dyn_cast<Constant>(Mask))
2312 if (C->isAllOnesValue())
2313 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2314
2315 // Convert the mask from an integer type to a vector of i1.
2316 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2317 Mask = getX86MaskVec(Builder, Mask, NumElts);
2318 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2319}
2320
2321static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2322 Type *Ty = CI.getType();
2323 Value *Op0 = CI.getArgOperand(0);
2324 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2325 {Op0, Builder.getInt1(false)});
2326 if (CI.arg_size() == 3)
2327 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2328 return Res;
2329}
2330
2331static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2332 Type *Ty = CI.getType();
2333
2334 // Arguments have a vXi32 type so cast to vXi64.
2335 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2336 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2337
2338 if (IsSigned) {
2339 // Shift left then arithmetic shift right.
2340 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2341 LHS = Builder.CreateShl(LHS, ShiftAmt);
2342 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2343 RHS = Builder.CreateShl(RHS, ShiftAmt);
2344 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2345 } else {
2346 // Clear the upper bits.
2347 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2348 LHS = Builder.CreateAnd(LHS, Mask);
2349 RHS = Builder.CreateAnd(RHS, Mask);
2350 }
2351
2352 Value *Res = Builder.CreateMul(LHS, RHS);
2353
2354 if (CI.arg_size() == 4)
2355 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2356
2357 return Res;
2358}
2359
2360// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2362 Value *Mask) {
2363 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2364 if (Mask) {
2365 const auto *C = dyn_cast<Constant>(Mask);
2366 if (!C || !C->isAllOnesValue())
2367 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2368 }
2369
2370 if (NumElts < 8) {
2371 int Indices[8];
2372 for (unsigned i = 0; i != NumElts; ++i)
2373 Indices[i] = i;
2374 for (unsigned i = NumElts; i != 8; ++i)
2375 Indices[i] = NumElts + i % NumElts;
2376 Vec = Builder.CreateShuffleVector(Vec,
2378 Indices);
2379 }
2380 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2381}
2382
2384 unsigned CC, bool Signed) {
2385 Value *Op0 = CI.getArgOperand(0);
2386 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2387
2388 Value *Cmp;
2389 if (CC == 3) {
2391 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2392 } else if (CC == 7) {
2394 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2395 } else {
2397 switch (CC) {
2398 default: llvm_unreachable("Unknown condition code");
2399 case 0: Pred = ICmpInst::ICMP_EQ; break;
2400 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2401 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2402 case 4: Pred = ICmpInst::ICMP_NE; break;
2403 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2404 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2405 }
2406 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2407 }
2408
2409 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2410
2411 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2412}
2413
2414// Replace a masked intrinsic with an older unmasked intrinsic.
2416 Intrinsic::ID IID) {
2417 Value *Rep =
2418 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2419 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2420}
2421
2423 Value* A = CI.getArgOperand(0);
2424 Value* B = CI.getArgOperand(1);
2425 Value* Src = CI.getArgOperand(2);
2426 Value* Mask = CI.getArgOperand(3);
2427
2428 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2429 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2430 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2431 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2432 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2433 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2434}
2435
2437 Value* Op = CI.getArgOperand(0);
2438 Type* ReturnOp = CI.getType();
2439 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2440 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2441 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2442}
2443
2444// Replace intrinsic with unmasked version and a select.
2446 CallBase &CI, Value *&Rep) {
2447 Name = Name.substr(12); // Remove avx512.mask.
2448
2449 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2450 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2451 Intrinsic::ID IID;
2452 if (Name.starts_with("max.p")) {
2453 if (VecWidth == 128 && EltWidth == 32)
2454 IID = Intrinsic::x86_sse_max_ps;
2455 else if (VecWidth == 128 && EltWidth == 64)
2456 IID = Intrinsic::x86_sse2_max_pd;
2457 else if (VecWidth == 256 && EltWidth == 32)
2458 IID = Intrinsic::x86_avx_max_ps_256;
2459 else if (VecWidth == 256 && EltWidth == 64)
2460 IID = Intrinsic::x86_avx_max_pd_256;
2461 else
2462 llvm_unreachable("Unexpected intrinsic");
2463 } else if (Name.starts_with("min.p")) {
2464 if (VecWidth == 128 && EltWidth == 32)
2465 IID = Intrinsic::x86_sse_min_ps;
2466 else if (VecWidth == 128 && EltWidth == 64)
2467 IID = Intrinsic::x86_sse2_min_pd;
2468 else if (VecWidth == 256 && EltWidth == 32)
2469 IID = Intrinsic::x86_avx_min_ps_256;
2470 else if (VecWidth == 256 && EltWidth == 64)
2471 IID = Intrinsic::x86_avx_min_pd_256;
2472 else
2473 llvm_unreachable("Unexpected intrinsic");
2474 } else if (Name.starts_with("pshuf.b.")) {
2475 if (VecWidth == 128)
2476 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2477 else if (VecWidth == 256)
2478 IID = Intrinsic::x86_avx2_pshuf_b;
2479 else if (VecWidth == 512)
2480 IID = Intrinsic::x86_avx512_pshuf_b_512;
2481 else
2482 llvm_unreachable("Unexpected intrinsic");
2483 } else if (Name.starts_with("pmul.hr.sw.")) {
2484 if (VecWidth == 128)
2485 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2486 else if (VecWidth == 256)
2487 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2488 else if (VecWidth == 512)
2489 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2490 else
2491 llvm_unreachable("Unexpected intrinsic");
2492 } else if (Name.starts_with("pmulh.w.")) {
2493 if (VecWidth == 128)
2494 IID = Intrinsic::x86_sse2_pmulh_w;
2495 else if (VecWidth == 256)
2496 IID = Intrinsic::x86_avx2_pmulh_w;
2497 else if (VecWidth == 512)
2498 IID = Intrinsic::x86_avx512_pmulh_w_512;
2499 else
2500 llvm_unreachable("Unexpected intrinsic");
2501 } else if (Name.starts_with("pmulhu.w.")) {
2502 if (VecWidth == 128)
2503 IID = Intrinsic::x86_sse2_pmulhu_w;
2504 else if (VecWidth == 256)
2505 IID = Intrinsic::x86_avx2_pmulhu_w;
2506 else if (VecWidth == 512)
2507 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2508 else
2509 llvm_unreachable("Unexpected intrinsic");
2510 } else if (Name.starts_with("pmaddw.d.")) {
2511 if (VecWidth == 128)
2512 IID = Intrinsic::x86_sse2_pmadd_wd;
2513 else if (VecWidth == 256)
2514 IID = Intrinsic::x86_avx2_pmadd_wd;
2515 else if (VecWidth == 512)
2516 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2517 else
2518 llvm_unreachable("Unexpected intrinsic");
2519 } else if (Name.starts_with("pmaddubs.w.")) {
2520 if (VecWidth == 128)
2521 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2522 else if (VecWidth == 256)
2523 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2524 else if (VecWidth == 512)
2525 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2526 else
2527 llvm_unreachable("Unexpected intrinsic");
2528 } else if (Name.starts_with("packsswb.")) {
2529 if (VecWidth == 128)
2530 IID = Intrinsic::x86_sse2_packsswb_128;
2531 else if (VecWidth == 256)
2532 IID = Intrinsic::x86_avx2_packsswb;
2533 else if (VecWidth == 512)
2534 IID = Intrinsic::x86_avx512_packsswb_512;
2535 else
2536 llvm_unreachable("Unexpected intrinsic");
2537 } else if (Name.starts_with("packssdw.")) {
2538 if (VecWidth == 128)
2539 IID = Intrinsic::x86_sse2_packssdw_128;
2540 else if (VecWidth == 256)
2541 IID = Intrinsic::x86_avx2_packssdw;
2542 else if (VecWidth == 512)
2543 IID = Intrinsic::x86_avx512_packssdw_512;
2544 else
2545 llvm_unreachable("Unexpected intrinsic");
2546 } else if (Name.starts_with("packuswb.")) {
2547 if (VecWidth == 128)
2548 IID = Intrinsic::x86_sse2_packuswb_128;
2549 else if (VecWidth == 256)
2550 IID = Intrinsic::x86_avx2_packuswb;
2551 else if (VecWidth == 512)
2552 IID = Intrinsic::x86_avx512_packuswb_512;
2553 else
2554 llvm_unreachable("Unexpected intrinsic");
2555 } else if (Name.starts_with("packusdw.")) {
2556 if (VecWidth == 128)
2557 IID = Intrinsic::x86_sse41_packusdw;
2558 else if (VecWidth == 256)
2559 IID = Intrinsic::x86_avx2_packusdw;
2560 else if (VecWidth == 512)
2561 IID = Intrinsic::x86_avx512_packusdw_512;
2562 else
2563 llvm_unreachable("Unexpected intrinsic");
2564 } else if (Name.starts_with("vpermilvar.")) {
2565 if (VecWidth == 128 && EltWidth == 32)
2566 IID = Intrinsic::x86_avx_vpermilvar_ps;
2567 else if (VecWidth == 128 && EltWidth == 64)
2568 IID = Intrinsic::x86_avx_vpermilvar_pd;
2569 else if (VecWidth == 256 && EltWidth == 32)
2570 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2571 else if (VecWidth == 256 && EltWidth == 64)
2572 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2573 else if (VecWidth == 512 && EltWidth == 32)
2574 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2575 else if (VecWidth == 512 && EltWidth == 64)
2576 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2577 else
2578 llvm_unreachable("Unexpected intrinsic");
2579 } else if (Name == "cvtpd2dq.256") {
2580 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2581 } else if (Name == "cvtpd2ps.256") {
2582 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2583 } else if (Name == "cvttpd2dq.256") {
2584 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2585 } else if (Name == "cvttps2dq.128") {
2586 IID = Intrinsic::x86_sse2_cvttps2dq;
2587 } else if (Name == "cvttps2dq.256") {
2588 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2589 } else if (Name.starts_with("permvar.")) {
2590 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2591 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2592 IID = Intrinsic::x86_avx2_permps;
2593 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2594 IID = Intrinsic::x86_avx2_permd;
2595 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2596 IID = Intrinsic::x86_avx512_permvar_df_256;
2597 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2598 IID = Intrinsic::x86_avx512_permvar_di_256;
2599 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2600 IID = Intrinsic::x86_avx512_permvar_sf_512;
2601 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2602 IID = Intrinsic::x86_avx512_permvar_si_512;
2603 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2604 IID = Intrinsic::x86_avx512_permvar_df_512;
2605 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2606 IID = Intrinsic::x86_avx512_permvar_di_512;
2607 else if (VecWidth == 128 && EltWidth == 16)
2608 IID = Intrinsic::x86_avx512_permvar_hi_128;
2609 else if (VecWidth == 256 && EltWidth == 16)
2610 IID = Intrinsic::x86_avx512_permvar_hi_256;
2611 else if (VecWidth == 512 && EltWidth == 16)
2612 IID = Intrinsic::x86_avx512_permvar_hi_512;
2613 else if (VecWidth == 128 && EltWidth == 8)
2614 IID = Intrinsic::x86_avx512_permvar_qi_128;
2615 else if (VecWidth == 256 && EltWidth == 8)
2616 IID = Intrinsic::x86_avx512_permvar_qi_256;
2617 else if (VecWidth == 512 && EltWidth == 8)
2618 IID = Intrinsic::x86_avx512_permvar_qi_512;
2619 else
2620 llvm_unreachable("Unexpected intrinsic");
2621 } else if (Name.starts_with("dbpsadbw.")) {
2622 if (VecWidth == 128)
2623 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2624 else if (VecWidth == 256)
2625 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2626 else if (VecWidth == 512)
2627 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2628 else
2629 llvm_unreachable("Unexpected intrinsic");
2630 } else if (Name.starts_with("pmultishift.qb.")) {
2631 if (VecWidth == 128)
2632 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2633 else if (VecWidth == 256)
2634 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2635 else if (VecWidth == 512)
2636 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2637 else
2638 llvm_unreachable("Unexpected intrinsic");
2639 } else if (Name.starts_with("conflict.")) {
2640 if (Name[9] == 'd' && VecWidth == 128)
2641 IID = Intrinsic::x86_avx512_conflict_d_128;
2642 else if (Name[9] == 'd' && VecWidth == 256)
2643 IID = Intrinsic::x86_avx512_conflict_d_256;
2644 else if (Name[9] == 'd' && VecWidth == 512)
2645 IID = Intrinsic::x86_avx512_conflict_d_512;
2646 else if (Name[9] == 'q' && VecWidth == 128)
2647 IID = Intrinsic::x86_avx512_conflict_q_128;
2648 else if (Name[9] == 'q' && VecWidth == 256)
2649 IID = Intrinsic::x86_avx512_conflict_q_256;
2650 else if (Name[9] == 'q' && VecWidth == 512)
2651 IID = Intrinsic::x86_avx512_conflict_q_512;
2652 else
2653 llvm_unreachable("Unexpected intrinsic");
2654 } else if (Name.starts_with("pavg.")) {
2655 if (Name[5] == 'b' && VecWidth == 128)
2656 IID = Intrinsic::x86_sse2_pavg_b;
2657 else if (Name[5] == 'b' && VecWidth == 256)
2658 IID = Intrinsic::x86_avx2_pavg_b;
2659 else if (Name[5] == 'b' && VecWidth == 512)
2660 IID = Intrinsic::x86_avx512_pavg_b_512;
2661 else if (Name[5] == 'w' && VecWidth == 128)
2662 IID = Intrinsic::x86_sse2_pavg_w;
2663 else if (Name[5] == 'w' && VecWidth == 256)
2664 IID = Intrinsic::x86_avx2_pavg_w;
2665 else if (Name[5] == 'w' && VecWidth == 512)
2666 IID = Intrinsic::x86_avx512_pavg_w_512;
2667 else
2668 llvm_unreachable("Unexpected intrinsic");
2669 } else
2670 return false;
2671
2672 SmallVector<Value *, 4> Args(CI.args());
2673 Args.pop_back();
2674 Args.pop_back();
2675 Rep = Builder.CreateIntrinsic(IID, Args);
2676 unsigned NumArgs = CI.arg_size();
2677 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2678 CI.getArgOperand(NumArgs - 2));
2679 return true;
2680}
2681
2682/// Upgrade comment in call to inline asm that represents an objc retain release
2683/// marker.
2684void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2685 size_t Pos;
2686 if (AsmStr->find("mov\tfp") == 0 &&
2687 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2688 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2689 AsmStr->replace(Pos, 1, ";");
2690 }
2691}
2692
2694 Function *F, IRBuilder<> &Builder) {
2695 Value *Rep = nullptr;
2696
2697 if (Name == "abs.i" || Name == "abs.ll") {
2698 Value *Arg = CI->getArgOperand(0);
2699 Value *Neg = Builder.CreateNeg(Arg, "neg");
2700 Value *Cmp = Builder.CreateICmpSGE(
2701 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2702 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2703 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2704 Type *Ty = (Name == "abs.bf16")
2705 ? Builder.getBFloatTy()
2706 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2707 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2708 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2709 Rep = Builder.CreateBitCast(Abs, CI->getType());
2710 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2711 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2712 : Intrinsic::nvvm_fabs;
2713 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2714 } else if (Name.consume_front("ex2.approx.")) {
2715 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2716 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2717 : Intrinsic::nvvm_ex2_approx;
2718 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2719 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2720 Name.starts_with("atomic.load.add.f64.p")) {
2721 Value *Ptr = CI->getArgOperand(0);
2722 Value *Val = CI->getArgOperand(1);
2723 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2725 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2726 Name.starts_with("atomic.load.dec.32.p")) {
2727 Value *Ptr = CI->getArgOperand(0);
2728 Value *Val = CI->getArgOperand(1);
2729 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2731 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2733 } else if (Name == "clz.ll") {
2734 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2735 Value *Arg = CI->getArgOperand(0);
2736 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2737 {Arg, Builder.getFalse()},
2738 /*FMFSource=*/nullptr, "ctlz");
2739 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2740 } else if (Name == "popc.ll") {
2741 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2742 // i64.
2743 Value *Arg = CI->getArgOperand(0);
2744 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2745 Arg, /*FMFSource=*/nullptr, "ctpop");
2746 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2747 } else if (Name == "h2f") {
2748 Value *Cast =
2749 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2750 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2751 } else if (Name.consume_front("bitcast.") &&
2752 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2753 Name == "d2ll")) {
2754 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2755 } else if (Name == "rotate.b32") {
2756 Value *Arg = CI->getOperand(0);
2757 Value *ShiftAmt = CI->getOperand(1);
2758 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2759 {Arg, Arg, ShiftAmt});
2760 } else if (Name == "rotate.b64") {
2761 Type *Int64Ty = Builder.getInt64Ty();
2762 Value *Arg = CI->getOperand(0);
2763 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2764 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2765 {Arg, Arg, ZExtShiftAmt});
2766 } else if (Name == "rotate.right.b64") {
2767 Type *Int64Ty = Builder.getInt64Ty();
2768 Value *Arg = CI->getOperand(0);
2769 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2770 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2771 {Arg, Arg, ZExtShiftAmt});
2772 } else if (Name == "swap.lo.hi.b64") {
2773 Type *Int64Ty = Builder.getInt64Ty();
2774 Value *Arg = CI->getOperand(0);
2775 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2776 {Arg, Arg, Builder.getInt64(32)});
2777 } else if ((Name.consume_front("ptr.gen.to.") &&
2778 consumeNVVMPtrAddrSpace(Name)) ||
2779 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2780 Name.starts_with(".to.gen"))) {
2781 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2782 } else if (Name.consume_front("ldg.global")) {
2783 Value *Ptr = CI->getArgOperand(0);
2784 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2785 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2786 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2787 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2788 MDNode *MD = MDNode::get(Builder.getContext(), {});
2789 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2790 return LD;
2791 } else if (Name == "tanh.approx.f32") {
2792 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2793 FastMathFlags FMF;
2794 FMF.setApproxFunc();
2795 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2796 FMF);
2797 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2798 Value *Arg =
2799 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2800 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2801 {}, {Arg});
2802 } else if (Name == "barrier") {
2803 Rep = Builder.CreateIntrinsic(
2804 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2805 {CI->getArgOperand(0), CI->getArgOperand(1)});
2806 } else if (Name == "barrier.sync") {
2807 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2808 {CI->getArgOperand(0)});
2809 } else if (Name == "barrier.sync.cnt") {
2810 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2811 {CI->getArgOperand(0), CI->getArgOperand(1)});
2812 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2813 Name == "barrier0.or") {
2814 Value *C = CI->getArgOperand(0);
2815 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2816
2817 Intrinsic::ID IID =
2819 .Case("barrier0.popc",
2820 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2821 .Case("barrier0.and",
2822 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2823 .Case("barrier0.or",
2824 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2825 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2826 Rep = Builder.CreateZExt(Bar, CI->getType());
2827 } else {
2829 if (IID != Intrinsic::not_intrinsic &&
2830 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2831 rename(F);
2832 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2834 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2835 Value *Arg = CI->getArgOperand(I);
2836 Type *OldType = Arg->getType();
2837 Type *NewType = NewFn->getArg(I)->getType();
2838 Args.push_back(
2839 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2840 ? Builder.CreateBitCast(Arg, NewType)
2841 : Arg);
2842 }
2843 Rep = Builder.CreateCall(NewFn, Args);
2844 if (F->getReturnType()->isIntegerTy())
2845 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2846 }
2847 }
2848
2849 return Rep;
2850}
2851
2853 IRBuilder<> &Builder) {
2854 LLVMContext &C = F->getContext();
2855 Value *Rep = nullptr;
2856
2857 if (Name.starts_with("sse4a.movnt.")) {
2859 Elts.push_back(
2860 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2861 MDNode *Node = MDNode::get(C, Elts);
2862
2863 Value *Arg0 = CI->getArgOperand(0);
2864 Value *Arg1 = CI->getArgOperand(1);
2865
2866 // Nontemporal (unaligned) store of the 0'th element of the float/double
2867 // vector.
2868 Value *Extract =
2869 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2870
2871 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2872 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2873 } else if (Name.starts_with("avx.movnt.") ||
2874 Name.starts_with("avx512.storent.")) {
2876 Elts.push_back(
2877 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2878 MDNode *Node = MDNode::get(C, Elts);
2879
2880 Value *Arg0 = CI->getArgOperand(0);
2881 Value *Arg1 = CI->getArgOperand(1);
2882
2883 StoreInst *SI = Builder.CreateAlignedStore(
2884 Arg1, Arg0,
2886 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2887 } else if (Name == "sse2.storel.dq") {
2888 Value *Arg0 = CI->getArgOperand(0);
2889 Value *Arg1 = CI->getArgOperand(1);
2890
2891 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2892 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2893 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2894 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2895 } else if (Name.starts_with("sse.storeu.") ||
2896 Name.starts_with("sse2.storeu.") ||
2897 Name.starts_with("avx.storeu.")) {
2898 Value *Arg0 = CI->getArgOperand(0);
2899 Value *Arg1 = CI->getArgOperand(1);
2900 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2901 } else if (Name == "avx512.mask.store.ss") {
2902 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2903 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2904 Mask, false);
2905 } else if (Name.starts_with("avx512.mask.store")) {
2906 // "avx512.mask.storeu." or "avx512.mask.store."
2907 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2908 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2909 CI->getArgOperand(2), Aligned);
2910 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2911 // Upgrade packed integer vector compare intrinsics to compare instructions.
2912 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2913 bool CmpEq = Name[9] == 'e';
2914 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2915 CI->getArgOperand(0), CI->getArgOperand(1));
2916 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2917 } else if (Name.starts_with("avx512.broadcastm")) {
2918 Type *ExtTy = Type::getInt32Ty(C);
2919 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2920 ExtTy = Type::getInt64Ty(C);
2921 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2922 ExtTy->getPrimitiveSizeInBits();
2923 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2924 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2925 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2926 Value *Vec = CI->getArgOperand(0);
2927 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2928 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2929 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2930 } else if (Name.starts_with("avx.sqrt.p") ||
2931 Name.starts_with("sse2.sqrt.p") ||
2932 Name.starts_with("sse.sqrt.p")) {
2933 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2934 {CI->getArgOperand(0)});
2935 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2936 if (CI->arg_size() == 4 &&
2937 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2938 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2939 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2940 : Intrinsic::x86_avx512_sqrt_pd_512;
2941
2942 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2943 Rep = Builder.CreateIntrinsic(IID, Args);
2944 } else {
2945 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2946 {CI->getArgOperand(0)});
2947 }
2948 Rep =
2949 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2950 } else if (Name.starts_with("avx512.ptestm") ||
2951 Name.starts_with("avx512.ptestnm")) {
2952 Value *Op0 = CI->getArgOperand(0);
2953 Value *Op1 = CI->getArgOperand(1);
2954 Value *Mask = CI->getArgOperand(2);
2955 Rep = Builder.CreateAnd(Op0, Op1);
2956 llvm::Type *Ty = Op0->getType();
2958 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2961 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2962 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2963 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2964 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2965 ->getNumElements();
2966 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2967 Rep =
2968 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2969 } else if (Name.starts_with("avx512.kunpck")) {
2970 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2971 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2972 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2973 int Indices[64];
2974 for (unsigned i = 0; i != NumElts; ++i)
2975 Indices[i] = i;
2976
2977 // First extract half of each vector. This gives better codegen than
2978 // doing it in a single shuffle.
2979 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2980 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2981 // Concat the vectors.
2982 // NOTE: Operands have to be swapped to match intrinsic definition.
2983 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2984 Rep = Builder.CreateBitCast(Rep, CI->getType());
2985 } else if (Name == "avx512.kand.w") {
2986 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2987 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2988 Rep = Builder.CreateAnd(LHS, RHS);
2989 Rep = Builder.CreateBitCast(Rep, CI->getType());
2990 } else if (Name == "avx512.kandn.w") {
2991 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2992 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2993 LHS = Builder.CreateNot(LHS);
2994 Rep = Builder.CreateAnd(LHS, RHS);
2995 Rep = Builder.CreateBitCast(Rep, CI->getType());
2996 } else if (Name == "avx512.kor.w") {
2997 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2998 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2999 Rep = Builder.CreateOr(LHS, RHS);
3000 Rep = Builder.CreateBitCast(Rep, CI->getType());
3001 } else if (Name == "avx512.kxor.w") {
3002 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3003 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3004 Rep = Builder.CreateXor(LHS, RHS);
3005 Rep = Builder.CreateBitCast(Rep, CI->getType());
3006 } else if (Name == "avx512.kxnor.w") {
3007 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3008 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3009 LHS = Builder.CreateNot(LHS);
3010 Rep = Builder.CreateXor(LHS, RHS);
3011 Rep = Builder.CreateBitCast(Rep, CI->getType());
3012 } else if (Name == "avx512.knot.w") {
3013 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3014 Rep = Builder.CreateNot(Rep);
3015 Rep = Builder.CreateBitCast(Rep, CI->getType());
3016 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3017 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3018 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3019 Rep = Builder.CreateOr(LHS, RHS);
3020 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3021 Value *C;
3022 if (Name[14] == 'c')
3023 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3024 else
3025 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3026 Rep = Builder.CreateICmpEQ(Rep, C);
3027 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3028 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3029 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3030 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3031 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3032 Type *I32Ty = Type::getInt32Ty(C);
3033 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3034 ConstantInt::get(I32Ty, 0));
3035 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3036 ConstantInt::get(I32Ty, 0));
3037 Value *EltOp;
3038 if (Name.contains(".add."))
3039 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3040 else if (Name.contains(".sub."))
3041 EltOp = Builder.CreateFSub(Elt0, Elt1);
3042 else if (Name.contains(".mul."))
3043 EltOp = Builder.CreateFMul(Elt0, Elt1);
3044 else
3045 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3046 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3047 ConstantInt::get(I32Ty, 0));
3048 } else if (Name.starts_with("avx512.mask.pcmp")) {
3049 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3050 bool CmpEq = Name[16] == 'e';
3051 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3052 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3053 Type *OpTy = CI->getArgOperand(0)->getType();
3054 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3055 Intrinsic::ID IID;
3056 switch (VecWidth) {
3057 default:
3058 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3059 break;
3060 case 128:
3061 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3062 break;
3063 case 256:
3064 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3065 break;
3066 case 512:
3067 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3068 break;
3069 }
3070
3071 Rep =
3072 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3073 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3074 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3075 Type *OpTy = CI->getArgOperand(0)->getType();
3076 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3077 unsigned EltWidth = OpTy->getScalarSizeInBits();
3078 Intrinsic::ID IID;
3079 if (VecWidth == 128 && EltWidth == 32)
3080 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3081 else if (VecWidth == 256 && EltWidth == 32)
3082 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3083 else if (VecWidth == 512 && EltWidth == 32)
3084 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3085 else if (VecWidth == 128 && EltWidth == 64)
3086 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3087 else if (VecWidth == 256 && EltWidth == 64)
3088 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3089 else if (VecWidth == 512 && EltWidth == 64)
3090 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3091 else
3092 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3093
3094 Rep =
3095 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3096 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3097 } else if (Name.starts_with("avx512.cmp.p")) {
3098 SmallVector<Value *, 4> Args(CI->args());
3099 Type *OpTy = Args[0]->getType();
3100 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3101 unsigned EltWidth = OpTy->getScalarSizeInBits();
3102 Intrinsic::ID IID;
3103 if (VecWidth == 128 && EltWidth == 32)
3104 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3105 else if (VecWidth == 256 && EltWidth == 32)
3106 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3107 else if (VecWidth == 512 && EltWidth == 32)
3108 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3109 else if (VecWidth == 128 && EltWidth == 64)
3110 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3111 else if (VecWidth == 256 && EltWidth == 64)
3112 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3113 else if (VecWidth == 512 && EltWidth == 64)
3114 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3115 else
3116 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3117
3119 if (VecWidth == 512)
3120 std::swap(Mask, Args.back());
3121 Args.push_back(Mask);
3122
3123 Rep = Builder.CreateIntrinsic(IID, Args);
3124 } else if (Name.starts_with("avx512.mask.cmp.")) {
3125 // Integer compare intrinsics.
3126 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3127 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3128 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3129 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3130 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3131 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3132 Name.starts_with("avx512.cvtw2mask.") ||
3133 Name.starts_with("avx512.cvtd2mask.") ||
3134 Name.starts_with("avx512.cvtq2mask.")) {
3135 Value *Op = CI->getArgOperand(0);
3136 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3137 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3138 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3139 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3140 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3141 Name.starts_with("avx512.mask.pabs")) {
3142 Rep = upgradeAbs(Builder, *CI);
3143 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3144 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3145 Name.starts_with("avx512.mask.pmaxs")) {
3146 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3147 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3148 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3149 Name.starts_with("avx512.mask.pmaxu")) {
3150 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3151 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3152 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3153 Name.starts_with("avx512.mask.pmins")) {
3154 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3155 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3156 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3157 Name.starts_with("avx512.mask.pminu")) {
3158 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3159 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3160 Name == "avx512.pmulu.dq.512" ||
3161 Name.starts_with("avx512.mask.pmulu.dq.")) {
3162 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3163 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3164 Name == "avx512.pmul.dq.512" ||
3165 Name.starts_with("avx512.mask.pmul.dq.")) {
3166 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3167 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3168 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3169 Rep =
3170 Builder.CreateSIToFP(CI->getArgOperand(1),
3171 cast<VectorType>(CI->getType())->getElementType());
3172 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3173 } else if (Name == "avx512.cvtusi2sd") {
3174 Rep =
3175 Builder.CreateUIToFP(CI->getArgOperand(1),
3176 cast<VectorType>(CI->getType())->getElementType());
3177 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3178 } else if (Name == "sse2.cvtss2sd") {
3179 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3180 Rep = Builder.CreateFPExt(
3181 Rep, cast<VectorType>(CI->getType())->getElementType());
3182 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3183 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3184 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3185 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3186 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3187 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3188 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3189 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3190 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3191 Name == "avx512.mask.cvtqq2ps.256" ||
3192 Name == "avx512.mask.cvtqq2ps.512" ||
3193 Name == "avx512.mask.cvtuqq2ps.256" ||
3194 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3195 Name == "avx.cvt.ps2.pd.256" ||
3196 Name == "avx512.mask.cvtps2pd.128" ||
3197 Name == "avx512.mask.cvtps2pd.256") {
3198 auto *DstTy = cast<FixedVectorType>(CI->getType());
3199 Rep = CI->getArgOperand(0);
3200 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3201
3202 unsigned NumDstElts = DstTy->getNumElements();
3203 if (NumDstElts < SrcTy->getNumElements()) {
3204 assert(NumDstElts == 2 && "Unexpected vector size");
3205 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3206 }
3207
3208 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3209 bool IsUnsigned = Name.contains("cvtu");
3210 if (IsPS2PD)
3211 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3212 else if (CI->arg_size() == 4 &&
3213 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3214 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3215 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3216 : Intrinsic::x86_avx512_sitofp_round;
3217 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3218 {Rep, CI->getArgOperand(3)});
3219 } else {
3220 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3221 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3222 }
3223
3224 if (CI->arg_size() >= 3)
3225 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3226 CI->getArgOperand(1));
3227 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3228 Name.starts_with("vcvtph2ps.")) {
3229 auto *DstTy = cast<FixedVectorType>(CI->getType());
3230 Rep = CI->getArgOperand(0);
3231 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3232 unsigned NumDstElts = DstTy->getNumElements();
3233 if (NumDstElts != SrcTy->getNumElements()) {
3234 assert(NumDstElts == 4 && "Unexpected vector size");
3235 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3236 }
3237 Rep = Builder.CreateBitCast(
3238 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3239 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3240 if (CI->arg_size() >= 3)
3241 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3242 CI->getArgOperand(1));
3243 } else if (Name.starts_with("avx512.mask.load")) {
3244 // "avx512.mask.loadu." or "avx512.mask.load."
3245 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3246 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3247 CI->getArgOperand(2), Aligned);
3248 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3249 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3250 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3251 ResultTy->getNumElements());
3252
3253 Rep = Builder.CreateIntrinsic(
3254 Intrinsic::masked_expandload, ResultTy,
3255 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3256 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3257 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3258 Value *MaskVec =
3259 getX86MaskVec(Builder, CI->getArgOperand(2),
3260 cast<FixedVectorType>(ResultTy)->getNumElements());
3261
3262 Rep = Builder.CreateIntrinsic(
3263 Intrinsic::masked_compressstore, ResultTy,
3264 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3265 } else if (Name.starts_with("avx512.mask.compress.") ||
3266 Name.starts_with("avx512.mask.expand.")) {
3267 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3268
3269 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3270 ResultTy->getNumElements());
3271
3272 bool IsCompress = Name[12] == 'c';
3273 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3274 : Intrinsic::x86_avx512_mask_expand;
3275 Rep = Builder.CreateIntrinsic(
3276 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3277 } else if (Name.starts_with("xop.vpcom")) {
3278 bool IsSigned;
3279 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3280 Name.ends_with("uq"))
3281 IsSigned = false;
3282 else if (Name.ends_with("b") || Name.ends_with("w") ||
3283 Name.ends_with("d") || Name.ends_with("q"))
3284 IsSigned = true;
3285 else
3286 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3287
3288 unsigned Imm;
3289 if (CI->arg_size() == 3) {
3290 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3291 } else {
3292 Name = Name.substr(9); // strip off "xop.vpcom"
3293 if (Name.starts_with("lt"))
3294 Imm = 0;
3295 else if (Name.starts_with("le"))
3296 Imm = 1;
3297 else if (Name.starts_with("gt"))
3298 Imm = 2;
3299 else if (Name.starts_with("ge"))
3300 Imm = 3;
3301 else if (Name.starts_with("eq"))
3302 Imm = 4;
3303 else if (Name.starts_with("ne"))
3304 Imm = 5;
3305 else if (Name.starts_with("false"))
3306 Imm = 6;
3307 else if (Name.starts_with("true"))
3308 Imm = 7;
3309 else
3310 llvm_unreachable("Unknown condition");
3311 }
3312
3313 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3314 } else if (Name.starts_with("xop.vpcmov")) {
3315 Value *Sel = CI->getArgOperand(2);
3316 Value *NotSel = Builder.CreateNot(Sel);
3317 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3318 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3319 Rep = Builder.CreateOr(Sel0, Sel1);
3320 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3321 Name.starts_with("avx512.mask.prol")) {
3322 Rep = upgradeX86Rotate(Builder, *CI, false);
3323 } else if (Name.starts_with("avx512.pror") ||
3324 Name.starts_with("avx512.mask.pror")) {
3325 Rep = upgradeX86Rotate(Builder, *CI, true);
3326 } else if (Name.starts_with("avx512.vpshld.") ||
3327 Name.starts_with("avx512.mask.vpshld") ||
3328 Name.starts_with("avx512.maskz.vpshld")) {
3329 bool ZeroMask = Name[11] == 'z';
3330 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3331 } else if (Name.starts_with("avx512.vpshrd.") ||
3332 Name.starts_with("avx512.mask.vpshrd") ||
3333 Name.starts_with("avx512.maskz.vpshrd")) {
3334 bool ZeroMask = Name[11] == 'z';
3335 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3336 } else if (Name == "sse42.crc32.64.8") {
3337 Value *Trunc0 =
3338 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3339 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3340 {Trunc0, CI->getArgOperand(1)});
3341 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3342 } else if (Name.starts_with("avx.vbroadcast.s") ||
3343 Name.starts_with("avx512.vbroadcast.s")) {
3344 // Replace broadcasts with a series of insertelements.
3345 auto *VecTy = cast<FixedVectorType>(CI->getType());
3346 Type *EltTy = VecTy->getElementType();
3347 unsigned EltNum = VecTy->getNumElements();
3348 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3349 Type *I32Ty = Type::getInt32Ty(C);
3350 Rep = PoisonValue::get(VecTy);
3351 for (unsigned I = 0; I < EltNum; ++I)
3352 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3353 } else if (Name.starts_with("sse41.pmovsx") ||
3354 Name.starts_with("sse41.pmovzx") ||
3355 Name.starts_with("avx2.pmovsx") ||
3356 Name.starts_with("avx2.pmovzx") ||
3357 Name.starts_with("avx512.mask.pmovsx") ||
3358 Name.starts_with("avx512.mask.pmovzx")) {
3359 auto *DstTy = cast<FixedVectorType>(CI->getType());
3360 unsigned NumDstElts = DstTy->getNumElements();
3361
3362 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3363 SmallVector<int, 8> ShuffleMask(NumDstElts);
3364 for (unsigned i = 0; i != NumDstElts; ++i)
3365 ShuffleMask[i] = i;
3366
3367 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3368
3369 bool DoSext = Name.contains("pmovsx");
3370 Rep =
3371 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3372 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3373 if (CI->arg_size() == 3)
3374 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3375 CI->getArgOperand(1));
3376 } else if (Name == "avx512.mask.pmov.qd.256" ||
3377 Name == "avx512.mask.pmov.qd.512" ||
3378 Name == "avx512.mask.pmov.wb.256" ||
3379 Name == "avx512.mask.pmov.wb.512") {
3380 Type *Ty = CI->getArgOperand(1)->getType();
3381 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3382 Rep =
3383 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3384 } else if (Name.starts_with("avx.vbroadcastf128") ||
3385 Name == "avx2.vbroadcasti128") {
3386 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3387 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3388 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3389 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3390 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3391 if (NumSrcElts == 2)
3392 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3393 else
3394 Rep = Builder.CreateShuffleVector(Load,
3395 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3396 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3397 Name.starts_with("avx512.mask.shuf.f")) {
3398 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3399 Type *VT = CI->getType();
3400 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3401 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3402 unsigned ControlBitsMask = NumLanes - 1;
3403 unsigned NumControlBits = NumLanes / 2;
3404 SmallVector<int, 8> ShuffleMask(0);
3405
3406 for (unsigned l = 0; l != NumLanes; ++l) {
3407 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3408 // We actually need the other source.
3409 if (l >= NumLanes / 2)
3410 LaneMask += NumLanes;
3411 for (unsigned i = 0; i != NumElementsInLane; ++i)
3412 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3413 }
3414 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3415 CI->getArgOperand(1), ShuffleMask);
3416 Rep =
3417 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3418 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3419 Name.starts_with("avx512.mask.broadcasti")) {
3420 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3421 ->getNumElements();
3422 unsigned NumDstElts =
3423 cast<FixedVectorType>(CI->getType())->getNumElements();
3424
3425 SmallVector<int, 8> ShuffleMask(NumDstElts);
3426 for (unsigned i = 0; i != NumDstElts; ++i)
3427 ShuffleMask[i] = i % NumSrcElts;
3428
3429 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3430 CI->getArgOperand(0), ShuffleMask);
3431 Rep =
3432 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3433 } else if (Name.starts_with("avx2.pbroadcast") ||
3434 Name.starts_with("avx2.vbroadcast") ||
3435 Name.starts_with("avx512.pbroadcast") ||
3436 Name.starts_with("avx512.mask.broadcast.s")) {
3437 // Replace vp?broadcasts with a vector shuffle.
3438 Value *Op = CI->getArgOperand(0);
3439 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3440 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3443 Rep = Builder.CreateShuffleVector(Op, M);
3444
3445 if (CI->arg_size() == 3)
3446 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3447 CI->getArgOperand(1));
3448 } else if (Name.starts_with("sse2.padds.") ||
3449 Name.starts_with("avx2.padds.") ||
3450 Name.starts_with("avx512.padds.") ||
3451 Name.starts_with("avx512.mask.padds.")) {
3452 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3453 } else if (Name.starts_with("sse2.psubs.") ||
3454 Name.starts_with("avx2.psubs.") ||
3455 Name.starts_with("avx512.psubs.") ||
3456 Name.starts_with("avx512.mask.psubs.")) {
3457 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3458 } else if (Name.starts_with("sse2.paddus.") ||
3459 Name.starts_with("avx2.paddus.") ||
3460 Name.starts_with("avx512.mask.paddus.")) {
3461 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3462 } else if (Name.starts_with("sse2.psubus.") ||
3463 Name.starts_with("avx2.psubus.") ||
3464 Name.starts_with("avx512.mask.psubus.")) {
3465 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3466 } else if (Name.starts_with("avx512.mask.palignr.")) {
3467 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3468 CI->getArgOperand(1), CI->getArgOperand(2),
3469 CI->getArgOperand(3), CI->getArgOperand(4),
3470 false);
3471 } else if (Name.starts_with("avx512.mask.valign.")) {
3473 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3474 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3475 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3476 // 128/256-bit shift left specified in bits.
3477 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3478 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3479 Shift / 8); // Shift is in bits.
3480 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3481 // 128/256-bit shift right specified in bits.
3482 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3483 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3484 Shift / 8); // Shift is in bits.
3485 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3486 Name == "avx512.psll.dq.512") {
3487 // 128/256/512-bit shift left specified in bytes.
3488 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3489 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3490 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3491 Name == "avx512.psrl.dq.512") {
3492 // 128/256/512-bit shift right specified in bytes.
3493 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3494 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3495 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3496 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3497 Name.starts_with("avx2.pblendd.")) {
3498 Value *Op0 = CI->getArgOperand(0);
3499 Value *Op1 = CI->getArgOperand(1);
3500 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3501 auto *VecTy = cast<FixedVectorType>(CI->getType());
3502 unsigned NumElts = VecTy->getNumElements();
3503
3504 SmallVector<int, 16> Idxs(NumElts);
3505 for (unsigned i = 0; i != NumElts; ++i)
3506 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3507
3508 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3509 } else if (Name.starts_with("avx.vinsertf128.") ||
3510 Name == "avx2.vinserti128" ||
3511 Name.starts_with("avx512.mask.insert")) {
3512 Value *Op0 = CI->getArgOperand(0);
3513 Value *Op1 = CI->getArgOperand(1);
3514 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3515 unsigned DstNumElts =
3516 cast<FixedVectorType>(CI->getType())->getNumElements();
3517 unsigned SrcNumElts =
3518 cast<FixedVectorType>(Op1->getType())->getNumElements();
3519 unsigned Scale = DstNumElts / SrcNumElts;
3520
3521 // Mask off the high bits of the immediate value; hardware ignores those.
3522 Imm = Imm % Scale;
3523
3524 // Extend the second operand into a vector the size of the destination.
3525 SmallVector<int, 8> Idxs(DstNumElts);
3526 for (unsigned i = 0; i != SrcNumElts; ++i)
3527 Idxs[i] = i;
3528 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3529 Idxs[i] = SrcNumElts;
3530 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3531
3532 // Insert the second operand into the first operand.
3533
3534 // Note that there is no guarantee that instruction lowering will actually
3535 // produce a vinsertf128 instruction for the created shuffles. In
3536 // particular, the 0 immediate case involves no lane changes, so it can
3537 // be handled as a blend.
3538
3539 // Example of shuffle mask for 32-bit elements:
3540 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3541 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3542
3543 // First fill with identify mask.
3544 for (unsigned i = 0; i != DstNumElts; ++i)
3545 Idxs[i] = i;
3546 // Then replace the elements where we need to insert.
3547 for (unsigned i = 0; i != SrcNumElts; ++i)
3548 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3549 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3550
3551 // If the intrinsic has a mask operand, handle that.
3552 if (CI->arg_size() == 5)
3553 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3554 CI->getArgOperand(3));
3555 } else if (Name.starts_with("avx.vextractf128.") ||
3556 Name == "avx2.vextracti128" ||
3557 Name.starts_with("avx512.mask.vextract")) {
3558 Value *Op0 = CI->getArgOperand(0);
3559 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3560 unsigned DstNumElts =
3561 cast<FixedVectorType>(CI->getType())->getNumElements();
3562 unsigned SrcNumElts =
3563 cast<FixedVectorType>(Op0->getType())->getNumElements();
3564 unsigned Scale = SrcNumElts / DstNumElts;
3565
3566 // Mask off the high bits of the immediate value; hardware ignores those.
3567 Imm = Imm % Scale;
3568
3569 // Get indexes for the subvector of the input vector.
3570 SmallVector<int, 8> Idxs(DstNumElts);
3571 for (unsigned i = 0; i != DstNumElts; ++i) {
3572 Idxs[i] = i + (Imm * DstNumElts);
3573 }
3574 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3575
3576 // If the intrinsic has a mask operand, handle that.
3577 if (CI->arg_size() == 4)
3578 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3579 CI->getArgOperand(2));
3580 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3581 Name.starts_with("avx512.mask.perm.di.")) {
3582 Value *Op0 = CI->getArgOperand(0);
3583 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3584 auto *VecTy = cast<FixedVectorType>(CI->getType());
3585 unsigned NumElts = VecTy->getNumElements();
3586
3587 SmallVector<int, 8> Idxs(NumElts);
3588 for (unsigned i = 0; i != NumElts; ++i)
3589 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3590
3591 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3592
3593 if (CI->arg_size() == 4)
3594 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3595 CI->getArgOperand(2));
3596 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3597 // The immediate permute control byte looks like this:
3598 // [1:0] - select 128 bits from sources for low half of destination
3599 // [2] - ignore
3600 // [3] - zero low half of destination
3601 // [5:4] - select 128 bits from sources for high half of destination
3602 // [6] - ignore
3603 // [7] - zero high half of destination
3604
3605 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3606
3607 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3608 unsigned HalfSize = NumElts / 2;
3609 SmallVector<int, 8> ShuffleMask(NumElts);
3610
3611 // Determine which operand(s) are actually in use for this instruction.
3612 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3613 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3614
3615 // If needed, replace operands based on zero mask.
3616 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3617 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3618
3619 // Permute low half of result.
3620 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3621 for (unsigned i = 0; i < HalfSize; ++i)
3622 ShuffleMask[i] = StartIndex + i;
3623
3624 // Permute high half of result.
3625 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3626 for (unsigned i = 0; i < HalfSize; ++i)
3627 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3628
3629 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3630
3631 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3632 Name.starts_with("avx512.mask.vpermil.p") ||
3633 Name.starts_with("avx512.mask.pshuf.d.")) {
3634 Value *Op0 = CI->getArgOperand(0);
3635 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3636 auto *VecTy = cast<FixedVectorType>(CI->getType());
3637 unsigned NumElts = VecTy->getNumElements();
3638 // Calculate the size of each index in the immediate.
3639 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3640 unsigned IdxMask = ((1 << IdxSize) - 1);
3641
3642 SmallVector<int, 8> Idxs(NumElts);
3643 // Lookup the bits for this element, wrapping around the immediate every
3644 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3645 // to offset by the first index of each group.
3646 for (unsigned i = 0; i != NumElts; ++i)
3647 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3648
3649 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3650
3651 if (CI->arg_size() == 4)
3652 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3653 CI->getArgOperand(2));
3654 } else if (Name == "sse2.pshufl.w" ||
3655 Name.starts_with("avx512.mask.pshufl.w.")) {
3656 Value *Op0 = CI->getArgOperand(0);
3657 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3658 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3659
3660 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3661 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3662
3663 SmallVector<int, 16> Idxs(NumElts);
3664 for (unsigned l = 0; l != NumElts; l += 8) {
3665 for (unsigned i = 0; i != 4; ++i)
3666 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3667 for (unsigned i = 4; i != 8; ++i)
3668 Idxs[i + l] = i + l;
3669 }
3670
3671 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3672
3673 if (CI->arg_size() == 4)
3674 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3675 CI->getArgOperand(2));
3676 } else if (Name == "sse2.pshufh.w" ||
3677 Name.starts_with("avx512.mask.pshufh.w.")) {
3678 Value *Op0 = CI->getArgOperand(0);
3679 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3680 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3681
3682 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3683 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3684
3685 SmallVector<int, 16> Idxs(NumElts);
3686 for (unsigned l = 0; l != NumElts; l += 8) {
3687 for (unsigned i = 0; i != 4; ++i)
3688 Idxs[i + l] = i + l;
3689 for (unsigned i = 0; i != 4; ++i)
3690 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3691 }
3692
3693 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3694
3695 if (CI->arg_size() == 4)
3696 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3697 CI->getArgOperand(2));
3698 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3699 Value *Op0 = CI->getArgOperand(0);
3700 Value *Op1 = CI->getArgOperand(1);
3701 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3702 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3703
3704 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3705 unsigned HalfLaneElts = NumLaneElts / 2;
3706
3707 SmallVector<int, 16> Idxs(NumElts);
3708 for (unsigned i = 0; i != NumElts; ++i) {
3709 // Base index is the starting element of the lane.
3710 Idxs[i] = i - (i % NumLaneElts);
3711 // If we are half way through the lane switch to the other source.
3712 if ((i % NumLaneElts) >= HalfLaneElts)
3713 Idxs[i] += NumElts;
3714 // Now select the specific element. By adding HalfLaneElts bits from
3715 // the immediate. Wrapping around the immediate every 8-bits.
3716 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3717 }
3718
3719 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3720
3721 Rep =
3722 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3723 } else if (Name.starts_with("avx512.mask.movddup") ||
3724 Name.starts_with("avx512.mask.movshdup") ||
3725 Name.starts_with("avx512.mask.movsldup")) {
3726 Value *Op0 = CI->getArgOperand(0);
3727 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3728 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3729
3730 unsigned Offset = 0;
3731 if (Name.starts_with("avx512.mask.movshdup."))
3732 Offset = 1;
3733
3734 SmallVector<int, 16> Idxs(NumElts);
3735 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3736 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3737 Idxs[i + l + 0] = i + l + Offset;
3738 Idxs[i + l + 1] = i + l + Offset;
3739 }
3740
3741 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3742
3743 Rep =
3744 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3745 } else if (Name.starts_with("avx512.mask.punpckl") ||
3746 Name.starts_with("avx512.mask.unpckl.")) {
3747 Value *Op0 = CI->getArgOperand(0);
3748 Value *Op1 = CI->getArgOperand(1);
3749 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3750 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3751
3752 SmallVector<int, 64> Idxs(NumElts);
3753 for (int l = 0; l != NumElts; l += NumLaneElts)
3754 for (int i = 0; i != NumLaneElts; ++i)
3755 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3756
3757 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3758
3759 Rep =
3760 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3761 } else if (Name.starts_with("avx512.mask.punpckh") ||
3762 Name.starts_with("avx512.mask.unpckh.")) {
3763 Value *Op0 = CI->getArgOperand(0);
3764 Value *Op1 = CI->getArgOperand(1);
3765 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3766 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3767
3768 SmallVector<int, 64> Idxs(NumElts);
3769 for (int l = 0; l != NumElts; l += NumLaneElts)
3770 for (int i = 0; i != NumLaneElts; ++i)
3771 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3772
3773 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3774
3775 Rep =
3776 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3777 } else if (Name.starts_with("avx512.mask.and.") ||
3778 Name.starts_with("avx512.mask.pand.")) {
3779 VectorType *FTy = cast<VectorType>(CI->getType());
3781 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3782 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3783 Rep = Builder.CreateBitCast(Rep, FTy);
3784 Rep =
3785 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3786 } else if (Name.starts_with("avx512.mask.andn.") ||
3787 Name.starts_with("avx512.mask.pandn.")) {
3788 VectorType *FTy = cast<VectorType>(CI->getType());
3790 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3791 Rep = Builder.CreateAnd(Rep,
3792 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3793 Rep = Builder.CreateBitCast(Rep, FTy);
3794 Rep =
3795 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3796 } else if (Name.starts_with("avx512.mask.or.") ||
3797 Name.starts_with("avx512.mask.por.")) {
3798 VectorType *FTy = cast<VectorType>(CI->getType());
3800 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3801 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3802 Rep = Builder.CreateBitCast(Rep, FTy);
3803 Rep =
3804 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3805 } else if (Name.starts_with("avx512.mask.xor.") ||
3806 Name.starts_with("avx512.mask.pxor.")) {
3807 VectorType *FTy = cast<VectorType>(CI->getType());
3809 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3810 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3811 Rep = Builder.CreateBitCast(Rep, FTy);
3812 Rep =
3813 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3814 } else if (Name.starts_with("avx512.mask.padd.")) {
3815 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3816 Rep =
3817 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3818 } else if (Name.starts_with("avx512.mask.psub.")) {
3819 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3820 Rep =
3821 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3822 } else if (Name.starts_with("avx512.mask.pmull.")) {
3823 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3824 Rep =
3825 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3826 } else if (Name.starts_with("avx512.mask.add.p")) {
3827 if (Name.ends_with(".512")) {
3828 Intrinsic::ID IID;
3829 if (Name[17] == 's')
3830 IID = Intrinsic::x86_avx512_add_ps_512;
3831 else
3832 IID = Intrinsic::x86_avx512_add_pd_512;
3833
3834 Rep = Builder.CreateIntrinsic(
3835 IID,
3836 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3837 } else {
3838 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3839 }
3840 Rep =
3841 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3842 } else if (Name.starts_with("avx512.mask.div.p")) {
3843 if (Name.ends_with(".512")) {
3844 Intrinsic::ID IID;
3845 if (Name[17] == 's')
3846 IID = Intrinsic::x86_avx512_div_ps_512;
3847 else
3848 IID = Intrinsic::x86_avx512_div_pd_512;
3849
3850 Rep = Builder.CreateIntrinsic(
3851 IID,
3852 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3853 } else {
3854 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3855 }
3856 Rep =
3857 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3858 } else if (Name.starts_with("avx512.mask.mul.p")) {
3859 if (Name.ends_with(".512")) {
3860 Intrinsic::ID IID;
3861 if (Name[17] == 's')
3862 IID = Intrinsic::x86_avx512_mul_ps_512;
3863 else
3864 IID = Intrinsic::x86_avx512_mul_pd_512;
3865
3866 Rep = Builder.CreateIntrinsic(
3867 IID,
3868 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3869 } else {
3870 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3871 }
3872 Rep =
3873 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3874 } else if (Name.starts_with("avx512.mask.sub.p")) {
3875 if (Name.ends_with(".512")) {
3876 Intrinsic::ID IID;
3877 if (Name[17] == 's')
3878 IID = Intrinsic::x86_avx512_sub_ps_512;
3879 else
3880 IID = Intrinsic::x86_avx512_sub_pd_512;
3881
3882 Rep = Builder.CreateIntrinsic(
3883 IID,
3884 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3885 } else {
3886 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3887 }
3888 Rep =
3889 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3890 } else if ((Name.starts_with("avx512.mask.max.p") ||
3891 Name.starts_with("avx512.mask.min.p")) &&
3892 Name.drop_front(18) == ".512") {
3893 bool IsDouble = Name[17] == 'd';
3894 bool IsMin = Name[13] == 'i';
3895 static const Intrinsic::ID MinMaxTbl[2][2] = {
3896 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3897 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3898 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3899
3900 Rep = Builder.CreateIntrinsic(
3901 IID,
3902 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3903 Rep =
3904 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3905 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3906 Rep =
3907 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3908 {CI->getArgOperand(0), Builder.getInt1(false)});
3909 Rep =
3910 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3911 } else if (Name.starts_with("avx512.mask.psll")) {
3912 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3913 bool IsVariable = Name[16] == 'v';
3914 char Size = Name[16] == '.' ? Name[17]
3915 : Name[17] == '.' ? Name[18]
3916 : Name[18] == '.' ? Name[19]
3917 : Name[20];
3918
3919 Intrinsic::ID IID;
3920 if (IsVariable && Name[17] != '.') {
3921 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3922 IID = Intrinsic::x86_avx2_psllv_q;
3923 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3924 IID = Intrinsic::x86_avx2_psllv_q_256;
3925 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3926 IID = Intrinsic::x86_avx2_psllv_d;
3927 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3928 IID = Intrinsic::x86_avx2_psllv_d_256;
3929 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3930 IID = Intrinsic::x86_avx512_psllv_w_128;
3931 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3932 IID = Intrinsic::x86_avx512_psllv_w_256;
3933 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3934 IID = Intrinsic::x86_avx512_psllv_w_512;
3935 else
3936 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3937 } else if (Name.ends_with(".128")) {
3938 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3939 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3940 : Intrinsic::x86_sse2_psll_d;
3941 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3942 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3943 : Intrinsic::x86_sse2_psll_q;
3944 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3945 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3946 : Intrinsic::x86_sse2_psll_w;
3947 else
3948 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3949 } else if (Name.ends_with(".256")) {
3950 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3951 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3952 : Intrinsic::x86_avx2_psll_d;
3953 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3954 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3955 : Intrinsic::x86_avx2_psll_q;
3956 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3957 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3958 : Intrinsic::x86_avx2_psll_w;
3959 else
3960 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3961 } else {
3962 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3963 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3964 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3965 : Intrinsic::x86_avx512_psll_d_512;
3966 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3967 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3968 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3969 : Intrinsic::x86_avx512_psll_q_512;
3970 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3971 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3972 : Intrinsic::x86_avx512_psll_w_512;
3973 else
3974 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3975 }
3976
3977 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3978 } else if (Name.starts_with("avx512.mask.psrl")) {
3979 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3980 bool IsVariable = Name[16] == 'v';
3981 char Size = Name[16] == '.' ? Name[17]
3982 : Name[17] == '.' ? Name[18]
3983 : Name[18] == '.' ? Name[19]
3984 : Name[20];
3985
3986 Intrinsic::ID IID;
3987 if (IsVariable && Name[17] != '.') {
3988 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3989 IID = Intrinsic::x86_avx2_psrlv_q;
3990 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3991 IID = Intrinsic::x86_avx2_psrlv_q_256;
3992 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3993 IID = Intrinsic::x86_avx2_psrlv_d;
3994 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3995 IID = Intrinsic::x86_avx2_psrlv_d_256;
3996 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3997 IID = Intrinsic::x86_avx512_psrlv_w_128;
3998 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3999 IID = Intrinsic::x86_avx512_psrlv_w_256;
4000 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4001 IID = Intrinsic::x86_avx512_psrlv_w_512;
4002 else
4003 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4004 } else if (Name.ends_with(".128")) {
4005 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4006 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4007 : Intrinsic::x86_sse2_psrl_d;
4008 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4009 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4010 : Intrinsic::x86_sse2_psrl_q;
4011 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4012 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4013 : Intrinsic::x86_sse2_psrl_w;
4014 else
4015 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4016 } else if (Name.ends_with(".256")) {
4017 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4018 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4019 : Intrinsic::x86_avx2_psrl_d;
4020 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4021 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4022 : Intrinsic::x86_avx2_psrl_q;
4023 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4024 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4025 : Intrinsic::x86_avx2_psrl_w;
4026 else
4027 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4028 } else {
4029 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4030 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4031 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4032 : Intrinsic::x86_avx512_psrl_d_512;
4033 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4034 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4035 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4036 : Intrinsic::x86_avx512_psrl_q_512;
4037 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4038 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4039 : Intrinsic::x86_avx512_psrl_w_512;
4040 else
4041 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4042 }
4043
4044 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4045 } else if (Name.starts_with("avx512.mask.psra")) {
4046 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4047 bool IsVariable = Name[16] == 'v';
4048 char Size = Name[16] == '.' ? Name[17]
4049 : Name[17] == '.' ? Name[18]
4050 : Name[18] == '.' ? Name[19]
4051 : Name[20];
4052
4053 Intrinsic::ID IID;
4054 if (IsVariable && Name[17] != '.') {
4055 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4056 IID = Intrinsic::x86_avx2_psrav_d;
4057 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4058 IID = Intrinsic::x86_avx2_psrav_d_256;
4059 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4060 IID = Intrinsic::x86_avx512_psrav_w_128;
4061 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4062 IID = Intrinsic::x86_avx512_psrav_w_256;
4063 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4064 IID = Intrinsic::x86_avx512_psrav_w_512;
4065 else
4066 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4067 } else if (Name.ends_with(".128")) {
4068 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4069 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4070 : Intrinsic::x86_sse2_psra_d;
4071 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4072 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4073 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4074 : Intrinsic::x86_avx512_psra_q_128;
4075 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4076 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4077 : Intrinsic::x86_sse2_psra_w;
4078 else
4079 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4080 } else if (Name.ends_with(".256")) {
4081 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4082 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4083 : Intrinsic::x86_avx2_psra_d;
4084 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4085 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4086 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4087 : Intrinsic::x86_avx512_psra_q_256;
4088 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4089 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4090 : Intrinsic::x86_avx2_psra_w;
4091 else
4092 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4093 } else {
4094 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4095 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4096 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4097 : Intrinsic::x86_avx512_psra_d_512;
4098 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4099 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4100 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4101 : Intrinsic::x86_avx512_psra_q_512;
4102 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4103 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4104 : Intrinsic::x86_avx512_psra_w_512;
4105 else
4106 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4107 }
4108
4109 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4110 } else if (Name.starts_with("avx512.mask.move.s")) {
4111 Rep = upgradeMaskedMove(Builder, *CI);
4112 } else if (Name.starts_with("avx512.cvtmask2")) {
4113 Rep = upgradeMaskToInt(Builder, *CI);
4114 } else if (Name.ends_with(".movntdqa")) {
4116 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4117
4118 LoadInst *LI = Builder.CreateAlignedLoad(
4119 CI->getType(), CI->getArgOperand(0),
4121 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4122 Rep = LI;
4123 } else if (Name.starts_with("fma.vfmadd.") ||
4124 Name.starts_with("fma.vfmsub.") ||
4125 Name.starts_with("fma.vfnmadd.") ||
4126 Name.starts_with("fma.vfnmsub.")) {
4127 bool NegMul = Name[6] == 'n';
4128 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4129 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4130
4131 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4132 CI->getArgOperand(2)};
4133
4134 if (IsScalar) {
4135 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4136 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4137 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4138 }
4139
4140 if (NegMul && !IsScalar)
4141 Ops[0] = Builder.CreateFNeg(Ops[0]);
4142 if (NegMul && IsScalar)
4143 Ops[1] = Builder.CreateFNeg(Ops[1]);
4144 if (NegAcc)
4145 Ops[2] = Builder.CreateFNeg(Ops[2]);
4146
4147 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4148
4149 if (IsScalar)
4150 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4151 } else if (Name.starts_with("fma4.vfmadd.s")) {
4152 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4153 CI->getArgOperand(2)};
4154
4155 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4156 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4157 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4158
4159 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4160
4161 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4162 Rep, (uint64_t)0);
4163 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4164 Name.starts_with("avx512.maskz.vfmadd.s") ||
4165 Name.starts_with("avx512.mask3.vfmadd.s") ||
4166 Name.starts_with("avx512.mask3.vfmsub.s") ||
4167 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4168 bool IsMask3 = Name[11] == '3';
4169 bool IsMaskZ = Name[11] == 'z';
4170 // Drop the "avx512.mask." to make it easier.
4171 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4172 bool NegMul = Name[2] == 'n';
4173 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4174
4175 Value *A = CI->getArgOperand(0);
4176 Value *B = CI->getArgOperand(1);
4177 Value *C = CI->getArgOperand(2);
4178
4179 if (NegMul && (IsMask3 || IsMaskZ))
4180 A = Builder.CreateFNeg(A);
4181 if (NegMul && !(IsMask3 || IsMaskZ))
4182 B = Builder.CreateFNeg(B);
4183 if (NegAcc)
4184 C = Builder.CreateFNeg(C);
4185
4186 A = Builder.CreateExtractElement(A, (uint64_t)0);
4187 B = Builder.CreateExtractElement(B, (uint64_t)0);
4188 C = Builder.CreateExtractElement(C, (uint64_t)0);
4189
4190 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4191 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4192 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4193
4194 Intrinsic::ID IID;
4195 if (Name.back() == 'd')
4196 IID = Intrinsic::x86_avx512_vfmadd_f64;
4197 else
4198 IID = Intrinsic::x86_avx512_vfmadd_f32;
4199 Rep = Builder.CreateIntrinsic(IID, Ops);
4200 } else {
4201 Rep = Builder.CreateFMA(A, B, C);
4202 }
4203
4204 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4205 : IsMask3 ? C
4206 : A;
4207
4208 // For Mask3 with NegAcc, we need to create a new extractelement that
4209 // avoids the negation above.
4210 if (NegAcc && IsMask3)
4211 PassThru =
4212 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4213
4214 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4215 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4216 (uint64_t)0);
4217 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4218 Name.starts_with("avx512.mask.vfnmadd.p") ||
4219 Name.starts_with("avx512.mask.vfnmsub.p") ||
4220 Name.starts_with("avx512.mask3.vfmadd.p") ||
4221 Name.starts_with("avx512.mask3.vfmsub.p") ||
4222 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4223 Name.starts_with("avx512.maskz.vfmadd.p")) {
4224 bool IsMask3 = Name[11] == '3';
4225 bool IsMaskZ = Name[11] == 'z';
4226 // Drop the "avx512.mask." to make it easier.
4227 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4228 bool NegMul = Name[2] == 'n';
4229 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4230
4231 Value *A = CI->getArgOperand(0);
4232 Value *B = CI->getArgOperand(1);
4233 Value *C = CI->getArgOperand(2);
4234
4235 if (NegMul && (IsMask3 || IsMaskZ))
4236 A = Builder.CreateFNeg(A);
4237 if (NegMul && !(IsMask3 || IsMaskZ))
4238 B = Builder.CreateFNeg(B);
4239 if (NegAcc)
4240 C = Builder.CreateFNeg(C);
4241
4242 if (CI->arg_size() == 5 &&
4243 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4244 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4245 Intrinsic::ID IID;
4246 // Check the character before ".512" in string.
4247 if (Name[Name.size() - 5] == 's')
4248 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4249 else
4250 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4251
4252 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4253 } else {
4254 Rep = Builder.CreateFMA(A, B, C);
4255 }
4256
4257 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4258 : IsMask3 ? CI->getArgOperand(2)
4259 : CI->getArgOperand(0);
4260
4261 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4262 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4263 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4264 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4265 Intrinsic::ID IID;
4266 if (VecWidth == 128 && EltWidth == 32)
4267 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4268 else if (VecWidth == 256 && EltWidth == 32)
4269 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4270 else if (VecWidth == 128 && EltWidth == 64)
4271 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4272 else if (VecWidth == 256 && EltWidth == 64)
4273 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4274 else
4275 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4276
4277 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4278 CI->getArgOperand(2)};
4279 Ops[2] = Builder.CreateFNeg(Ops[2]);
4280 Rep = Builder.CreateIntrinsic(IID, Ops);
4281 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4282 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4283 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4284 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4285 bool IsMask3 = Name[11] == '3';
4286 bool IsMaskZ = Name[11] == 'z';
4287 // Drop the "avx512.mask." to make it easier.
4288 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4289 bool IsSubAdd = Name[3] == 's';
4290 if (CI->arg_size() == 5) {
4291 Intrinsic::ID IID;
4292 // Check the character before ".512" in string.
4293 if (Name[Name.size() - 5] == 's')
4294 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4295 else
4296 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4297
4298 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4299 CI->getArgOperand(2), CI->getArgOperand(4)};
4300 if (IsSubAdd)
4301 Ops[2] = Builder.CreateFNeg(Ops[2]);
4302
4303 Rep = Builder.CreateIntrinsic(IID, Ops);
4304 } else {
4305 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4306
4307 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4308 CI->getArgOperand(2)};
4309
4311 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4312 Value *Odd = Builder.CreateCall(FMA, Ops);
4313 Ops[2] = Builder.CreateFNeg(Ops[2]);
4314 Value *Even = Builder.CreateCall(FMA, Ops);
4315
4316 if (IsSubAdd)
4317 std::swap(Even, Odd);
4318
4319 SmallVector<int, 32> Idxs(NumElts);
4320 for (int i = 0; i != NumElts; ++i)
4321 Idxs[i] = i + (i % 2) * NumElts;
4322
4323 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4324 }
4325
4326 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4327 : IsMask3 ? CI->getArgOperand(2)
4328 : CI->getArgOperand(0);
4329
4330 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4331 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4332 Name.starts_with("avx512.maskz.pternlog.")) {
4333 bool ZeroMask = Name[11] == 'z';
4334 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4335 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4336 Intrinsic::ID IID;
4337 if (VecWidth == 128 && EltWidth == 32)
4338 IID = Intrinsic::x86_avx512_pternlog_d_128;
4339 else if (VecWidth == 256 && EltWidth == 32)
4340 IID = Intrinsic::x86_avx512_pternlog_d_256;
4341 else if (VecWidth == 512 && EltWidth == 32)
4342 IID = Intrinsic::x86_avx512_pternlog_d_512;
4343 else if (VecWidth == 128 && EltWidth == 64)
4344 IID = Intrinsic::x86_avx512_pternlog_q_128;
4345 else if (VecWidth == 256 && EltWidth == 64)
4346 IID = Intrinsic::x86_avx512_pternlog_q_256;
4347 else if (VecWidth == 512 && EltWidth == 64)
4348 IID = Intrinsic::x86_avx512_pternlog_q_512;
4349 else
4350 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4351
4352 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4353 CI->getArgOperand(2), CI->getArgOperand(3)};
4354 Rep = Builder.CreateIntrinsic(IID, Args);
4355 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4356 : CI->getArgOperand(0);
4357 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4358 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4359 Name.starts_with("avx512.maskz.vpmadd52")) {
4360 bool ZeroMask = Name[11] == 'z';
4361 bool High = Name[20] == 'h' || Name[21] == 'h';
4362 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4363 Intrinsic::ID IID;
4364 if (VecWidth == 128 && !High)
4365 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4366 else if (VecWidth == 256 && !High)
4367 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4368 else if (VecWidth == 512 && !High)
4369 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4370 else if (VecWidth == 128 && High)
4371 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4372 else if (VecWidth == 256 && High)
4373 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4374 else if (VecWidth == 512 && High)
4375 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4376 else
4377 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4378
4379 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4380 CI->getArgOperand(2)};
4381 Rep = Builder.CreateIntrinsic(IID, Args);
4382 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4383 : CI->getArgOperand(0);
4384 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4385 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4386 Name.starts_with("avx512.mask.vpermt2var.") ||
4387 Name.starts_with("avx512.maskz.vpermt2var.")) {
4388 bool ZeroMask = Name[11] == 'z';
4389 bool IndexForm = Name[17] == 'i';
4390 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4391 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4392 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4393 Name.starts_with("avx512.mask.vpdpbusds.") ||
4394 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4395 bool ZeroMask = Name[11] == 'z';
4396 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4397 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4398 Intrinsic::ID IID;
4399 if (VecWidth == 128 && !IsSaturating)
4400 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4401 else if (VecWidth == 256 && !IsSaturating)
4402 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4403 else if (VecWidth == 512 && !IsSaturating)
4404 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4405 else if (VecWidth == 128 && IsSaturating)
4406 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4407 else if (VecWidth == 256 && IsSaturating)
4408 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4409 else if (VecWidth == 512 && IsSaturating)
4410 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4411 else
4412 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4413
4414 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4415 CI->getArgOperand(2)};
4416
4417 // Input arguments types were incorrectly set to vectors of i32 before but
4418 // they should be vectors of i8. Insert bit cast when encountering the old
4419 // types
4420 if (Args[1]->getType()->isVectorTy() &&
4421 cast<VectorType>(Args[1]->getType())
4422 ->getElementType()
4423 ->isIntegerTy(32) &&
4424 Args[2]->getType()->isVectorTy() &&
4425 cast<VectorType>(Args[2]->getType())
4426 ->getElementType()
4427 ->isIntegerTy(32)) {
4428 Type *NewArgType = nullptr;
4429 if (VecWidth == 128)
4430 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4431 else if (VecWidth == 256)
4432 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4433 else if (VecWidth == 512)
4434 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4435 else
4436 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4437 CI);
4438
4439 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4440 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4441 }
4442
4443 Rep = Builder.CreateIntrinsic(IID, Args);
4444 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4445 : CI->getArgOperand(0);
4446 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4447 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4448 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4449 Name.starts_with("avx512.mask.vpdpwssds.") ||
4450 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4451 bool ZeroMask = Name[11] == 'z';
4452 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4453 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4454 Intrinsic::ID IID;
4455 if (VecWidth == 128 && !IsSaturating)
4456 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4457 else if (VecWidth == 256 && !IsSaturating)
4458 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4459 else if (VecWidth == 512 && !IsSaturating)
4460 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4461 else if (VecWidth == 128 && IsSaturating)
4462 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4463 else if (VecWidth == 256 && IsSaturating)
4464 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4465 else if (VecWidth == 512 && IsSaturating)
4466 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4467 else
4468 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4469
4470 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4471 CI->getArgOperand(2)};
4472
4473 // Input arguments types were incorrectly set to vectors of i32 before but
4474 // they should be vectors of i16. Insert bit cast when encountering the old
4475 // types
4476 if (Args[1]->getType()->isVectorTy() &&
4477 cast<VectorType>(Args[1]->getType())
4478 ->getElementType()
4479 ->isIntegerTy(32) &&
4480 Args[2]->getType()->isVectorTy() &&
4481 cast<VectorType>(Args[2]->getType())
4482 ->getElementType()
4483 ->isIntegerTy(32)) {
4484 Type *NewArgType = nullptr;
4485 if (VecWidth == 128)
4486 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4487 else if (VecWidth == 256)
4488 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4489 else if (VecWidth == 512)
4490 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4491 else
4492 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4493 CI);
4494
4495 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4496 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4497 }
4498
4499 Rep = Builder.CreateIntrinsic(IID, Args);
4500 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4501 : CI->getArgOperand(0);
4502 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4503 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4504 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4505 Name == "subborrow.u32" || Name == "subborrow.u64") {
4506 Intrinsic::ID IID;
4507 if (Name[0] == 'a' && Name.back() == '2')
4508 IID = Intrinsic::x86_addcarry_32;
4509 else if (Name[0] == 'a' && Name.back() == '4')
4510 IID = Intrinsic::x86_addcarry_64;
4511 else if (Name[0] == 's' && Name.back() == '2')
4512 IID = Intrinsic::x86_subborrow_32;
4513 else if (Name[0] == 's' && Name.back() == '4')
4514 IID = Intrinsic::x86_subborrow_64;
4515 else
4516 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4517
4518 // Make a call with 3 operands.
4519 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4520 CI->getArgOperand(2)};
4521 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4522
4523 // Extract the second result and store it.
4524 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4525 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4526 // Replace the original call result with the first result of the new call.
4527 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4528
4529 CI->replaceAllUsesWith(CF);
4530 Rep = nullptr;
4531 } else if (Name.starts_with("avx512.mask.") &&
4532 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4533 // Rep will be updated by the call in the condition.
4534 } else
4535 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4536
4537 return Rep;
4538}
4539
4541 Function *F, IRBuilder<> &Builder) {
4542 if (Name.starts_with("neon.bfcvt")) {
4543 if (Name.starts_with("neon.bfcvtn2")) {
4544 SmallVector<int, 32> LoMask(4);
4545 std::iota(LoMask.begin(), LoMask.end(), 0);
4546 SmallVector<int, 32> ConcatMask(8);
4547 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4548 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4549 Value *Trunc =
4550 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4551 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4552 } else if (Name.starts_with("neon.bfcvtn")) {
4553 SmallVector<int, 32> ConcatMask(8);
4554 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4555 Type *V4BF16 =
4556 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4557 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4558 dbgs() << "Trunc: " << *Trunc << "\n";
4559 return Builder.CreateShuffleVector(
4560 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4561 } else {
4562 return Builder.CreateFPTrunc(CI->getOperand(0),
4563 Type::getBFloatTy(F->getContext()));
4564 }
4565 } else if (Name.starts_with("sve.fcvt")) {
4566 Intrinsic::ID NewID =
4568 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4569 .Case("sve.fcvtnt.bf16f32",
4570 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4572 if (NewID == Intrinsic::not_intrinsic)
4573 llvm_unreachable("Unhandled Intrinsic!");
4574
4575 SmallVector<Value *, 3> Args(CI->args());
4576
4577 // The original intrinsics incorrectly used a predicate based on the
4578 // smallest element type rather than the largest.
4579 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4580 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4581
4582 if (Args[1]->getType() != BadPredTy)
4583 llvm_unreachable("Unexpected predicate type!");
4584
4585 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4586 BadPredTy, Args[1]);
4587 Args[1] = Builder.CreateIntrinsic(
4588 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4589
4590 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4591 CI->getName());
4592 }
4593
4594 llvm_unreachable("Unhandled Intrinsic!");
4595}
4596
4598 IRBuilder<> &Builder) {
4599 if (Name == "mve.vctp64.old") {
4600 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4601 // correct type.
4602 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4603 CI->getArgOperand(0),
4604 /*FMFSource=*/nullptr, CI->getName());
4605 Value *C1 = Builder.CreateIntrinsic(
4606 Intrinsic::arm_mve_pred_v2i,
4607 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4608 return Builder.CreateIntrinsic(
4609 Intrinsic::arm_mve_pred_i2v,
4610 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4611 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4612 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4613 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4614 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4615 Name ==
4616 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4617 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4618 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4619 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4620 Name ==
4621 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4622 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4623 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4624 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4625 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4626 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4627 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4628 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4629 std::vector<Type *> Tys;
4630 unsigned ID = CI->getIntrinsicID();
4631 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4632 switch (ID) {
4633 case Intrinsic::arm_mve_mull_int_predicated:
4634 case Intrinsic::arm_mve_vqdmull_predicated:
4635 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4636 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4637 break;
4638 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4639 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4640 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4641 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4642 V2I1Ty};
4643 break;
4644 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4645 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4646 CI->getOperand(1)->getType(), V2I1Ty};
4647 break;
4648 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4649 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4650 CI->getOperand(2)->getType(), V2I1Ty};
4651 break;
4652 case Intrinsic::arm_cde_vcx1q_predicated:
4653 case Intrinsic::arm_cde_vcx1qa_predicated:
4654 case Intrinsic::arm_cde_vcx2q_predicated:
4655 case Intrinsic::arm_cde_vcx2qa_predicated:
4656 case Intrinsic::arm_cde_vcx3q_predicated:
4657 case Intrinsic::arm_cde_vcx3qa_predicated:
4658 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4659 break;
4660 default:
4661 llvm_unreachable("Unhandled Intrinsic!");
4662 }
4663
4664 std::vector<Value *> Ops;
4665 for (Value *Op : CI->args()) {
4666 Type *Ty = Op->getType();
4667 if (Ty->getScalarSizeInBits() == 1) {
4668 Value *C1 = Builder.CreateIntrinsic(
4669 Intrinsic::arm_mve_pred_v2i,
4670 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4671 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4672 }
4673 Ops.push_back(Op);
4674 }
4675
4676 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4677 CI->getName());
4678 }
4679 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4680}
4681
4682// These are expected to have the arguments:
4683// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4684//
4685// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4686//
4688 Function *F, IRBuilder<> &Builder) {
4689 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4690 // for compatibility.
4691 auto UpgradeLegacyWMMAIUIntrinsicCall =
4692 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4693 ArrayRef<Type *> OverloadTys) -> Value * {
4694 // Prepare arguments, append clamp=0 for compatibility
4695 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4696 Args.push_back(Builder.getFalse());
4697
4698 // Insert the declaration for the right overload types
4700 F->getParent(), F->getIntrinsicID(), OverloadTys);
4701
4702 // Copy operand bundles if any
4704 CI->getOperandBundlesAsDefs(Bundles);
4705
4706 // Create the new call and copy calling properties
4707 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4708 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4709 NewCall->setCallingConv(CI->getCallingConv());
4710 NewCall->setAttributes(CI->getAttributes());
4711 NewCall->setDebugLoc(CI->getDebugLoc());
4712 NewCall->copyMetadata(*CI);
4713 return NewCall;
4714 };
4715
4716 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4717 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4718 "intrinsic should have 7 arguments");
4719 Type *T1 = CI->getArgOperand(4)->getType();
4720 Type *T2 = CI->getArgOperand(1)->getType();
4721 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4722 }
4723 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4724 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4725 "intrinsic should have 8 arguments");
4726 Type *T1 = CI->getArgOperand(4)->getType();
4727 Type *T2 = CI->getArgOperand(1)->getType();
4728 Type *T3 = CI->getArgOperand(3)->getType();
4729 Type *T4 = CI->getArgOperand(5)->getType();
4730 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4731 }
4732
4733 switch (F->getIntrinsicID()) {
4734 default:
4735 break;
4736 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4737 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4738 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4739 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4740 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4741 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4742 // Drop src0 and src1 modifiers.
4743 const Value *Op0 = CI->getArgOperand(0);
4744 const Value *Op2 = CI->getArgOperand(2);
4745 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4746 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4747 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4748 if (!ModA->isZero() || !ModB->isZero())
4749 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4750
4752 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4753 Args.push_back(CI->getArgOperand(I));
4754
4755 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4756 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4757 Overloads.push_back(Args[3]->getType());
4759 F->getParent(), F->getIntrinsicID(), Overloads);
4760
4762 CI->getOperandBundlesAsDefs(Bundles);
4763
4764 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4765 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4766 NewCall->setCallingConv(CI->getCallingConv());
4767 NewCall->setAttributes(CI->getAttributes());
4768 NewCall->setDebugLoc(CI->getDebugLoc());
4769 NewCall->copyMetadata(*CI);
4770 NewCall->takeName(CI);
4771 return NewCall;
4772 }
4773 }
4774
4775 AtomicRMWInst::BinOp RMWOp =
4777 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4778 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4779 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4780 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4781 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4782 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4783 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4784 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4785 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4786 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4787 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4788 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4789 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4790
4791 unsigned NumOperands = CI->getNumOperands();
4792 if (NumOperands < 3) // Malformed bitcode.
4793 return nullptr;
4794
4795 Value *Ptr = CI->getArgOperand(0);
4796 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4797 if (!PtrTy) // Malformed.
4798 return nullptr;
4799
4800 Value *Val = CI->getArgOperand(1);
4801 if (Val->getType() != CI->getType()) // Malformed.
4802 return nullptr;
4803
4804 ConstantInt *OrderArg = nullptr;
4805 bool IsVolatile = false;
4806
4807 // These should have 5 arguments (plus the callee). A separate version of the
4808 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4809 if (NumOperands > 3)
4810 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4811
4812 // Ignore scope argument at 3
4813
4814 if (NumOperands > 5) {
4815 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4816 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4817 }
4818
4820 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4821 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4824
4825 LLVMContext &Ctx = F->getContext();
4826
4827 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4828 Type *RetTy = CI->getType();
4829 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4830 if (VT->getElementType()->isIntegerTy(16)) {
4831 VectorType *AsBF16 =
4832 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4833 Val = Builder.CreateBitCast(Val, AsBF16);
4834 }
4835 }
4836
4837 // The scope argument never really worked correctly. Use agent as the most
4838 // conservative option which should still always produce the instruction.
4839 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4840 AtomicRMWInst *RMW =
4841 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4842
4843 unsigned AddrSpace = PtrTy->getAddressSpace();
4844 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4845 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4846 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4847 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4848 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4849 }
4850
4851 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4852 MDBuilder MDB(F->getContext());
4853 MDNode *RangeNotPrivate =
4856 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4857 }
4858
4859 if (IsVolatile)
4860 RMW->setVolatile(true);
4861
4862 return Builder.CreateBitCast(RMW, RetTy);
4863}
4864
4865/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4866/// plain MDNode, as it's the verifier's job to check these are the correct
4867/// types later.
4868static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4869 if (Op < CI->arg_size()) {
4870 if (MetadataAsValue *MAV =
4872 Metadata *MD = MAV->getMetadata();
4873 return dyn_cast_if_present<MDNode>(MD);
4874 }
4875 }
4876 return nullptr;
4877}
4878
4879/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4880static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4881 if (Op < CI->arg_size())
4883 return MAV->getMetadata();
4884 return nullptr;
4885}
4886
4888 // The MDNode attached to this instruction might not be the correct type,
4889 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4890 return I->getDebugLoc().getAsMDNode();
4891}
4892
4893/// Convert debug intrinsic calls to non-instruction debug records.
4894/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4895/// \p CI - The debug intrinsic call.
4897 DbgRecord *DR = nullptr;
4898 if (Name == "label") {
4900 CI->getDebugLoc());
4901 } else if (Name == "assign") {
4904 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4905 unwrapMAVMetadataOp(CI, 4),
4906 /*The address is a Value ref, it will be stored as a Metadata */
4907 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4908 } else if (Name == "declare") {
4911 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4912 getDebugLocSafe(CI));
4913 } else if (Name == "addr") {
4914 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4915 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4916 // Don't try to add something to the expression if it's not an expression.
4917 // Instead, allow the verifier to fail later.
4918 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4919 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4920 }
4923 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4924 getDebugLocSafe(CI));
4925 } else if (Name == "value") {
4926 // An old version of dbg.value had an extra offset argument.
4927 unsigned VarOp = 1;
4928 unsigned ExprOp = 2;
4929 if (CI->arg_size() == 4) {
4931 // Nonzero offset dbg.values get dropped without a replacement.
4932 if (!Offset || !Offset->isNullValue())
4933 return;
4934 VarOp = 2;
4935 ExprOp = 3;
4936 }
4939 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4940 nullptr, getDebugLocSafe(CI));
4941 }
4942 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4943 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4944}
4945
4948 if (!Offset)
4949 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4950 int64_t OffsetVal = Offset->getSExtValue();
4951 return Builder.CreateIntrinsic(OffsetVal >= 0
4952 ? Intrinsic::vector_splice_left
4953 : Intrinsic::vector_splice_right,
4954 CI->getType(),
4955 {CI->getArgOperand(0), CI->getArgOperand(1),
4956 Builder.getInt32(std::abs(OffsetVal))});
4957}
4958
4960 Function *F, IRBuilder<> &Builder) {
4961 if (Name.starts_with("to.fp16")) {
4962 Value *Cast =
4963 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4964 return Builder.CreateBitCast(Cast, CI->getType());
4965 }
4966
4967 if (Name.starts_with("from.fp16")) {
4968 Value *Cast =
4969 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4970 return Builder.CreateFPExt(Cast, CI->getType());
4971 }
4972
4973 return nullptr;
4974}
4975
4976/// Upgrade a call to an old intrinsic. All argument and return casting must be
4977/// provided to seamlessly integrate with existing context.
4979 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4980 // checks the callee's function type matches. It's likely we need to handle
4981 // type changes here.
4983 if (!F)
4984 return;
4985
4986 LLVMContext &C = CI->getContext();
4987 IRBuilder<> Builder(C);
4988 if (isa<FPMathOperator>(CI))
4989 Builder.setFastMathFlags(CI->getFastMathFlags());
4990 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4991
4992 if (!NewFn) {
4993 // Get the Function's name.
4994 StringRef Name = F->getName();
4995 if (!Name.consume_front("llvm."))
4996 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4997
4998 bool IsX86 = Name.consume_front("x86.");
4999 bool IsNVVM = Name.consume_front("nvvm.");
5000 bool IsAArch64 = Name.consume_front("aarch64.");
5001 bool IsARM = Name.consume_front("arm.");
5002 bool IsAMDGCN = Name.consume_front("amdgcn.");
5003 bool IsDbg = Name.consume_front("dbg.");
5004 bool IsOldSplice =
5005 (Name.consume_front("experimental.vector.splice") ||
5006 Name.consume_front("vector.splice")) &&
5007 !(Name.starts_with(".left") || Name.starts_with(".right"));
5008 Value *Rep = nullptr;
5009
5010 if (!IsX86 && Name == "stackprotectorcheck") {
5011 Rep = nullptr;
5012 } else if (IsNVVM) {
5013 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5014 } else if (IsX86) {
5015 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5016 } else if (IsAArch64) {
5017 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5018 } else if (IsARM) {
5019 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5020 } else if (IsAMDGCN) {
5021 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5022 } else if (IsDbg) {
5024 } else if (IsOldSplice) {
5025 Rep = upgradeVectorSplice(CI, Builder);
5026 } else if (Name.consume_front("convert.")) {
5027 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5028 } else {
5029 llvm_unreachable("Unknown function for CallBase upgrade.");
5030 }
5031
5032 if (Rep)
5033 CI->replaceAllUsesWith(Rep);
5034 CI->eraseFromParent();
5035 return;
5036 }
5037
5038 const auto &DefaultCase = [&]() -> void {
5039 if (F == NewFn)
5040 return;
5041
5042 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5043 // Handle generic mangling change.
5044 assert(
5045 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5046 "Unknown function for CallBase upgrade and isn't just a name change");
5047 CI->setCalledFunction(NewFn);
5048 return;
5049 }
5050
5051 // This must be an upgrade from a named to a literal struct.
5052 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5053 assert(OldST != NewFn->getReturnType() &&
5054 "Return type must have changed");
5055 assert(OldST->getNumElements() ==
5056 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5057 "Must have same number of elements");
5058
5059 SmallVector<Value *> Args(CI->args());
5060 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5061 NewCI->setAttributes(CI->getAttributes());
5062 Value *Res = PoisonValue::get(OldST);
5063 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5064 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5065 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5066 }
5067 CI->replaceAllUsesWith(Res);
5068 CI->eraseFromParent();
5069 return;
5070 }
5071
5072 // We're probably about to produce something invalid. Let the verifier catch
5073 // it instead of dying here.
5074 CI->setCalledOperand(
5076 return;
5077 };
5078 CallInst *NewCall = nullptr;
5079 switch (NewFn->getIntrinsicID()) {
5080 default: {
5081 DefaultCase();
5082 return;
5083 }
5084 case Intrinsic::arm_neon_vst1:
5085 case Intrinsic::arm_neon_vst2:
5086 case Intrinsic::arm_neon_vst3:
5087 case Intrinsic::arm_neon_vst4:
5088 case Intrinsic::arm_neon_vst2lane:
5089 case Intrinsic::arm_neon_vst3lane:
5090 case Intrinsic::arm_neon_vst4lane: {
5091 SmallVector<Value *, 4> Args(CI->args());
5092 NewCall = Builder.CreateCall(NewFn, Args);
5093 break;
5094 }
5095 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5096 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5097 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5098 LLVMContext &Ctx = F->getParent()->getContext();
5099 SmallVector<Value *, 4> Args(CI->args());
5100 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5101 cast<ConstantInt>(Args[3])->getZExtValue());
5102 NewCall = Builder.CreateCall(NewFn, Args);
5103 break;
5104 }
5105 case Intrinsic::aarch64_sve_ld3_sret:
5106 case Intrinsic::aarch64_sve_ld4_sret:
5107 case Intrinsic::aarch64_sve_ld2_sret: {
5108 // Is this a trivial remangle of the name to support ptr address spaces?
5109 if (isa<StructType>(F->getReturnType())) {
5110 DefaultCase();
5111 return;
5112 }
5113
5114 StringRef Name = F->getName();
5115 Name = Name.substr(5);
5116 unsigned N = StringSwitch<unsigned>(Name)
5117 .StartsWith("aarch64.sve.ld2", 2)
5118 .StartsWith("aarch64.sve.ld3", 3)
5119 .StartsWith("aarch64.sve.ld4", 4)
5120 .Default(0);
5121 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5122 unsigned MinElts = RetTy->getMinNumElements() / N;
5123 SmallVector<Value *, 2> Args(CI->args());
5124 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5125 Value *Ret = llvm::PoisonValue::get(RetTy);
5126 for (unsigned I = 0; I < N; I++) {
5127 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5128 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5129 }
5130 NewCall = dyn_cast<CallInst>(Ret);
5131 break;
5132 }
5133
5134 case Intrinsic::coro_end: {
5135 SmallVector<Value *, 3> Args(CI->args());
5136 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5137 NewCall = Builder.CreateCall(NewFn, Args);
5138 break;
5139 }
5140
5141 case Intrinsic::vector_extract: {
5142 StringRef Name = F->getName();
5143 Name = Name.substr(5); // Strip llvm
5144 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5145 DefaultCase();
5146 return;
5147 }
5148 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5149 unsigned MinElts = RetTy->getMinNumElements();
5150 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5151 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5152 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5153 break;
5154 }
5155
5156 case Intrinsic::vector_insert: {
5157 StringRef Name = F->getName();
5158 Name = Name.substr(5);
5159 if (!Name.starts_with("aarch64.sve.tuple")) {
5160 DefaultCase();
5161 return;
5162 }
5163 if (Name.starts_with("aarch64.sve.tuple.set")) {
5164 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5165 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5166 Value *NewIdx =
5167 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5168 NewCall = Builder.CreateCall(
5169 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5170 break;
5171 }
5172 if (Name.starts_with("aarch64.sve.tuple.create")) {
5173 unsigned N = StringSwitch<unsigned>(Name)
5174 .StartsWith("aarch64.sve.tuple.create2", 2)
5175 .StartsWith("aarch64.sve.tuple.create3", 3)
5176 .StartsWith("aarch64.sve.tuple.create4", 4)
5177 .Default(0);
5178 assert(N > 1 && "Create is expected to be between 2-4");
5179 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5180 Value *Ret = llvm::PoisonValue::get(RetTy);
5181 unsigned MinElts = RetTy->getMinNumElements() / N;
5182 for (unsigned I = 0; I < N; I++) {
5183 Value *V = CI->getArgOperand(I);
5184 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5185 }
5186 NewCall = dyn_cast<CallInst>(Ret);
5187 }
5188 break;
5189 }
5190
5191 case Intrinsic::arm_neon_bfdot:
5192 case Intrinsic::arm_neon_bfmmla:
5193 case Intrinsic::arm_neon_bfmlalb:
5194 case Intrinsic::arm_neon_bfmlalt:
5195 case Intrinsic::aarch64_neon_bfdot:
5196 case Intrinsic::aarch64_neon_bfmmla:
5197 case Intrinsic::aarch64_neon_bfmlalb:
5198 case Intrinsic::aarch64_neon_bfmlalt: {
5200 assert(CI->arg_size() == 3 &&
5201 "Mismatch between function args and call args");
5202 size_t OperandWidth =
5204 assert((OperandWidth == 64 || OperandWidth == 128) &&
5205 "Unexpected operand width");
5206 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5207 auto Iter = CI->args().begin();
5208 Args.push_back(*Iter++);
5209 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5210 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5211 NewCall = Builder.CreateCall(NewFn, Args);
5212 break;
5213 }
5214
5215 case Intrinsic::bitreverse:
5216 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5217 break;
5218
5219 case Intrinsic::ctlz:
5220 case Intrinsic::cttz: {
5221 if (CI->arg_size() != 1) {
5222 DefaultCase();
5223 return;
5224 }
5225
5226 NewCall =
5227 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5228 break;
5229 }
5230
5231 case Intrinsic::objectsize: {
5232 Value *NullIsUnknownSize =
5233 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5234 Value *Dynamic =
5235 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5236 NewCall = Builder.CreateCall(
5237 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5238 break;
5239 }
5240
5241 case Intrinsic::ctpop:
5242 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5243 break;
5244 case Intrinsic::dbg_value: {
5245 StringRef Name = F->getName();
5246 Name = Name.substr(5); // Strip llvm.
5247 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5248 if (Name.starts_with("dbg.addr")) {
5250 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5251 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5252 NewCall =
5253 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5254 MetadataAsValue::get(C, Expr)});
5255 break;
5256 }
5257
5258 // Upgrade from the old version that had an extra offset argument.
5259 assert(CI->arg_size() == 4);
5260 // Drop nonzero offsets instead of attempting to upgrade them.
5262 if (Offset->isNullValue()) {
5263 NewCall = Builder.CreateCall(
5264 NewFn,
5265 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5266 break;
5267 }
5268 CI->eraseFromParent();
5269 return;
5270 }
5271
5272 case Intrinsic::ptr_annotation:
5273 // Upgrade from versions that lacked the annotation attribute argument.
5274 if (CI->arg_size() != 4) {
5275 DefaultCase();
5276 return;
5277 }
5278
5279 // Create a new call with an added null annotation attribute argument.
5280 NewCall = Builder.CreateCall(
5281 NewFn,
5282 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5283 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5284 NewCall->takeName(CI);
5285 CI->replaceAllUsesWith(NewCall);
5286 CI->eraseFromParent();
5287 return;
5288
5289 case Intrinsic::var_annotation:
5290 // Upgrade from versions that lacked the annotation attribute argument.
5291 if (CI->arg_size() != 4) {
5292 DefaultCase();
5293 return;
5294 }
5295 // Create a new call with an added null annotation attribute argument.
5296 NewCall = Builder.CreateCall(
5297 NewFn,
5298 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5299 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5300 NewCall->takeName(CI);
5301 CI->replaceAllUsesWith(NewCall);
5302 CI->eraseFromParent();
5303 return;
5304
5305 case Intrinsic::riscv_aes32dsi:
5306 case Intrinsic::riscv_aes32dsmi:
5307 case Intrinsic::riscv_aes32esi:
5308 case Intrinsic::riscv_aes32esmi:
5309 case Intrinsic::riscv_sm4ks:
5310 case Intrinsic::riscv_sm4ed: {
5311 // The last argument to these intrinsics used to be i8 and changed to i32.
5312 // The type overload for sm4ks and sm4ed was removed.
5313 Value *Arg2 = CI->getArgOperand(2);
5314 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5315 return;
5316
5317 Value *Arg0 = CI->getArgOperand(0);
5318 Value *Arg1 = CI->getArgOperand(1);
5319 if (CI->getType()->isIntegerTy(64)) {
5320 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5321 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5322 }
5323
5324 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5325 cast<ConstantInt>(Arg2)->getZExtValue());
5326
5327 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5328 Value *Res = NewCall;
5329 if (Res->getType() != CI->getType())
5330 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5331 NewCall->takeName(CI);
5332 CI->replaceAllUsesWith(Res);
5333 CI->eraseFromParent();
5334 return;
5335 }
5336 case Intrinsic::nvvm_mapa_shared_cluster: {
5337 // Create a new call with the correct address space.
5338 NewCall =
5339 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5340 Value *Res = NewCall;
5341 Res = Builder.CreateAddrSpaceCast(
5342 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5343 NewCall->takeName(CI);
5344 CI->replaceAllUsesWith(Res);
5345 CI->eraseFromParent();
5346 return;
5347 }
5348 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5349 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5350 // Create a new call with the correct address space.
5351 SmallVector<Value *, 4> Args(CI->args());
5352 Args[0] = Builder.CreateAddrSpaceCast(
5353 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5354
5355 NewCall = Builder.CreateCall(NewFn, Args);
5356 NewCall->takeName(CI);
5357 CI->replaceAllUsesWith(NewCall);
5358 CI->eraseFromParent();
5359 return;
5360 }
5361 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5362 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5363 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5364 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5365 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5366 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5367 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5368 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5369 SmallVector<Value *, 16> Args(CI->args());
5370
5371 // Create AddrSpaceCast to shared_cluster if needed.
5372 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5373 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5375 Args[0] = Builder.CreateAddrSpaceCast(
5376 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5377
5378 // Attach the flag argument for cta_group, with a
5379 // default value of 0. This handles case (2) in
5380 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5381 size_t NumArgs = CI->arg_size();
5382 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5383 if (!FlagArg->getType()->isIntegerTy(1))
5384 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5385
5386 NewCall = Builder.CreateCall(NewFn, Args);
5387 NewCall->takeName(CI);
5388 CI->replaceAllUsesWith(NewCall);
5389 CI->eraseFromParent();
5390 return;
5391 }
5392 case Intrinsic::riscv_sha256sig0:
5393 case Intrinsic::riscv_sha256sig1:
5394 case Intrinsic::riscv_sha256sum0:
5395 case Intrinsic::riscv_sha256sum1:
5396 case Intrinsic::riscv_sm3p0:
5397 case Intrinsic::riscv_sm3p1: {
5398 // The last argument to these intrinsics used to be i8 and changed to i32.
5399 // The type overload for sm4ks and sm4ed was removed.
5400 if (!CI->getType()->isIntegerTy(64))
5401 return;
5402
5403 Value *Arg =
5404 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5405
5406 NewCall = Builder.CreateCall(NewFn, Arg);
5407 Value *Res =
5408 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5409 NewCall->takeName(CI);
5410 CI->replaceAllUsesWith(Res);
5411 CI->eraseFromParent();
5412 return;
5413 }
5414
5415 case Intrinsic::x86_xop_vfrcz_ss:
5416 case Intrinsic::x86_xop_vfrcz_sd:
5417 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5418 break;
5419
5420 case Intrinsic::x86_xop_vpermil2pd:
5421 case Intrinsic::x86_xop_vpermil2ps:
5422 case Intrinsic::x86_xop_vpermil2pd_256:
5423 case Intrinsic::x86_xop_vpermil2ps_256: {
5424 SmallVector<Value *, 4> Args(CI->args());
5425 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5426 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5427 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5428 NewCall = Builder.CreateCall(NewFn, Args);
5429 break;
5430 }
5431
5432 case Intrinsic::x86_sse41_ptestc:
5433 case Intrinsic::x86_sse41_ptestz:
5434 case Intrinsic::x86_sse41_ptestnzc: {
5435 // The arguments for these intrinsics used to be v4f32, and changed
5436 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5437 // So, the only thing required is a bitcast for both arguments.
5438 // First, check the arguments have the old type.
5439 Value *Arg0 = CI->getArgOperand(0);
5440 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5441 return;
5442
5443 // Old intrinsic, add bitcasts
5444 Value *Arg1 = CI->getArgOperand(1);
5445
5446 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5447
5448 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5449 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5450
5451 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5452 break;
5453 }
5454
5455 case Intrinsic::x86_rdtscp: {
5456 // This used to take 1 arguments. If we have no arguments, it is already
5457 // upgraded.
5458 if (CI->getNumOperands() == 0)
5459 return;
5460
5461 NewCall = Builder.CreateCall(NewFn);
5462 // Extract the second result and store it.
5463 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5464 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5465 // Replace the original call result with the first result of the new call.
5466 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5467
5468 NewCall->takeName(CI);
5469 CI->replaceAllUsesWith(TSC);
5470 CI->eraseFromParent();
5471 return;
5472 }
5473
5474 case Intrinsic::x86_sse41_insertps:
5475 case Intrinsic::x86_sse41_dppd:
5476 case Intrinsic::x86_sse41_dpps:
5477 case Intrinsic::x86_sse41_mpsadbw:
5478 case Intrinsic::x86_avx_dp_ps_256:
5479 case Intrinsic::x86_avx2_mpsadbw: {
5480 // Need to truncate the last argument from i32 to i8 -- this argument models
5481 // an inherently 8-bit immediate operand to these x86 instructions.
5482 SmallVector<Value *, 4> Args(CI->args());
5483
5484 // Replace the last argument with a trunc.
5485 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5486 NewCall = Builder.CreateCall(NewFn, Args);
5487 break;
5488 }
5489
5490 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5491 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5492 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5493 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5494 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5495 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5496 SmallVector<Value *, 4> Args(CI->args());
5497 unsigned NumElts =
5498 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5499 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5500
5501 NewCall = Builder.CreateCall(NewFn, Args);
5502 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5503
5504 NewCall->takeName(CI);
5505 CI->replaceAllUsesWith(Res);
5506 CI->eraseFromParent();
5507 return;
5508 }
5509
5510 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5511 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5512 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5513 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5514 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5515 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5516 SmallVector<Value *, 4> Args(CI->args());
5517 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5518 if (NewFn->getIntrinsicID() ==
5519 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5520 Args[1] = Builder.CreateBitCast(
5521 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5522
5523 NewCall = Builder.CreateCall(NewFn, Args);
5524 Value *Res = Builder.CreateBitCast(
5525 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5526
5527 NewCall->takeName(CI);
5528 CI->replaceAllUsesWith(Res);
5529 CI->eraseFromParent();
5530 return;
5531 }
5532 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5533 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5534 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5535 SmallVector<Value *, 4> Args(CI->args());
5536 unsigned NumElts =
5537 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5538 Args[1] = Builder.CreateBitCast(
5539 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5540 Args[2] = Builder.CreateBitCast(
5541 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5542
5543 NewCall = Builder.CreateCall(NewFn, Args);
5544 break;
5545 }
5546
5547 case Intrinsic::thread_pointer: {
5548 NewCall = Builder.CreateCall(NewFn, {});
5549 break;
5550 }
5551
5552 case Intrinsic::memcpy:
5553 case Intrinsic::memmove:
5554 case Intrinsic::memset: {
5555 // We have to make sure that the call signature is what we're expecting.
5556 // We only want to change the old signatures by removing the alignment arg:
5557 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5558 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5559 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5560 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5561 // Note: i8*'s in the above can be any pointer type
5562 if (CI->arg_size() != 5) {
5563 DefaultCase();
5564 return;
5565 }
5566 // Remove alignment argument (3), and add alignment attributes to the
5567 // dest/src pointers.
5568 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5569 CI->getArgOperand(2), CI->getArgOperand(4)};
5570 NewCall = Builder.CreateCall(NewFn, Args);
5571 AttributeList OldAttrs = CI->getAttributes();
5572 AttributeList NewAttrs = AttributeList::get(
5573 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5574 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5575 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5576 NewCall->setAttributes(NewAttrs);
5577 auto *MemCI = cast<MemIntrinsic>(NewCall);
5578 // All mem intrinsics support dest alignment.
5580 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5581 // Memcpy/Memmove also support source alignment.
5582 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5583 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5584 break;
5585 }
5586
5587 case Intrinsic::masked_load:
5588 case Intrinsic::masked_gather:
5589 case Intrinsic::masked_store:
5590 case Intrinsic::masked_scatter: {
5591 if (CI->arg_size() != 4) {
5592 DefaultCase();
5593 return;
5594 }
5595
5596 auto GetMaybeAlign = [](Value *Op) {
5597 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5598 uint64_t Val = CI->getZExtValue();
5599 if (Val == 0)
5600 return MaybeAlign();
5601 if (isPowerOf2_64(Val))
5602 return MaybeAlign(Val);
5603 }
5604 reportFatalUsageError("Invalid alignment argument");
5605 };
5606 auto GetAlign = [&](Value *Op) {
5607 MaybeAlign Align = GetMaybeAlign(Op);
5608 if (Align)
5609 return *Align;
5610 reportFatalUsageError("Invalid zero alignment argument");
5611 };
5612
5613 const DataLayout &DL = CI->getDataLayout();
5614 switch (NewFn->getIntrinsicID()) {
5615 case Intrinsic::masked_load:
5616 NewCall = Builder.CreateMaskedLoad(
5617 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5618 CI->getArgOperand(2), CI->getArgOperand(3));
5619 break;
5620 case Intrinsic::masked_gather:
5621 NewCall = Builder.CreateMaskedGather(
5622 CI->getType(), CI->getArgOperand(0),
5623 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5624 CI->getType()->getScalarType()),
5625 CI->getArgOperand(2), CI->getArgOperand(3));
5626 break;
5627 case Intrinsic::masked_store:
5628 NewCall = Builder.CreateMaskedStore(
5629 CI->getArgOperand(0), CI->getArgOperand(1),
5630 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5631 break;
5632 case Intrinsic::masked_scatter:
5633 NewCall = Builder.CreateMaskedScatter(
5634 CI->getArgOperand(0), CI->getArgOperand(1),
5635 DL.getValueOrABITypeAlignment(
5636 GetMaybeAlign(CI->getArgOperand(2)),
5637 CI->getArgOperand(0)->getType()->getScalarType()),
5638 CI->getArgOperand(3));
5639 break;
5640 default:
5641 llvm_unreachable("Unexpected intrinsic ID");
5642 }
5643 // Previous metadata is still valid.
5644 NewCall->copyMetadata(*CI);
5645 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5646 break;
5647 }
5648
5649 case Intrinsic::lifetime_start:
5650 case Intrinsic::lifetime_end: {
5651 if (CI->arg_size() != 2) {
5652 DefaultCase();
5653 return;
5654 }
5655
5656 Value *Ptr = CI->getArgOperand(1);
5657 // Try to strip pointer casts, such that the lifetime works on an alloca.
5658 Ptr = Ptr->stripPointerCasts();
5659 if (isa<AllocaInst>(Ptr)) {
5660 // Don't use NewFn, as we might have looked through an addrspacecast.
5661 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5662 NewCall = Builder.CreateLifetimeStart(Ptr);
5663 else
5664 NewCall = Builder.CreateLifetimeEnd(Ptr);
5665 break;
5666 }
5667
5668 // Otherwise remove the lifetime marker.
5669 CI->eraseFromParent();
5670 return;
5671 }
5672
5673 case Intrinsic::x86_avx512_vpdpbusd_128:
5674 case Intrinsic::x86_avx512_vpdpbusd_256:
5675 case Intrinsic::x86_avx512_vpdpbusd_512:
5676 case Intrinsic::x86_avx512_vpdpbusds_128:
5677 case Intrinsic::x86_avx512_vpdpbusds_256:
5678 case Intrinsic::x86_avx512_vpdpbusds_512:
5679 case Intrinsic::x86_avx2_vpdpbssd_128:
5680 case Intrinsic::x86_avx2_vpdpbssd_256:
5681 case Intrinsic::x86_avx10_vpdpbssd_512:
5682 case Intrinsic::x86_avx2_vpdpbssds_128:
5683 case Intrinsic::x86_avx2_vpdpbssds_256:
5684 case Intrinsic::x86_avx10_vpdpbssds_512:
5685 case Intrinsic::x86_avx2_vpdpbsud_128:
5686 case Intrinsic::x86_avx2_vpdpbsud_256:
5687 case Intrinsic::x86_avx10_vpdpbsud_512:
5688 case Intrinsic::x86_avx2_vpdpbsuds_128:
5689 case Intrinsic::x86_avx2_vpdpbsuds_256:
5690 case Intrinsic::x86_avx10_vpdpbsuds_512:
5691 case Intrinsic::x86_avx2_vpdpbuud_128:
5692 case Intrinsic::x86_avx2_vpdpbuud_256:
5693 case Intrinsic::x86_avx10_vpdpbuud_512:
5694 case Intrinsic::x86_avx2_vpdpbuuds_128:
5695 case Intrinsic::x86_avx2_vpdpbuuds_256:
5696 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5697 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5698 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5699 CI->getArgOperand(2)};
5700 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5701 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5702 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5703
5704 NewCall = Builder.CreateCall(NewFn, Args);
5705 break;
5706 }
5707 case Intrinsic::x86_avx512_vpdpwssd_128:
5708 case Intrinsic::x86_avx512_vpdpwssd_256:
5709 case Intrinsic::x86_avx512_vpdpwssd_512:
5710 case Intrinsic::x86_avx512_vpdpwssds_128:
5711 case Intrinsic::x86_avx512_vpdpwssds_256:
5712 case Intrinsic::x86_avx512_vpdpwssds_512:
5713 case Intrinsic::x86_avx2_vpdpwsud_128:
5714 case Intrinsic::x86_avx2_vpdpwsud_256:
5715 case Intrinsic::x86_avx10_vpdpwsud_512:
5716 case Intrinsic::x86_avx2_vpdpwsuds_128:
5717 case Intrinsic::x86_avx2_vpdpwsuds_256:
5718 case Intrinsic::x86_avx10_vpdpwsuds_512:
5719 case Intrinsic::x86_avx2_vpdpwusd_128:
5720 case Intrinsic::x86_avx2_vpdpwusd_256:
5721 case Intrinsic::x86_avx10_vpdpwusd_512:
5722 case Intrinsic::x86_avx2_vpdpwusds_128:
5723 case Intrinsic::x86_avx2_vpdpwusds_256:
5724 case Intrinsic::x86_avx10_vpdpwusds_512:
5725 case Intrinsic::x86_avx2_vpdpwuud_128:
5726 case Intrinsic::x86_avx2_vpdpwuud_256:
5727 case Intrinsic::x86_avx10_vpdpwuud_512:
5728 case Intrinsic::x86_avx2_vpdpwuuds_128:
5729 case Intrinsic::x86_avx2_vpdpwuuds_256:
5730 case Intrinsic::x86_avx10_vpdpwuuds_512:
5731 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5732 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5733 CI->getArgOperand(2)};
5734 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5735 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5736 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5737
5738 NewCall = Builder.CreateCall(NewFn, Args);
5739 break;
5740 }
5741 assert(NewCall && "Should have either set this variable or returned through "
5742 "the default case");
5743 NewCall->takeName(CI);
5744 CI->replaceAllUsesWith(NewCall);
5745 CI->eraseFromParent();
5746}
5747
5749 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5750
5751 // Check if this function should be upgraded and get the replacement function
5752 // if there is one.
5753 Function *NewFn;
5754 if (UpgradeIntrinsicFunction(F, NewFn)) {
5755 // Replace all users of the old function with the new function or new
5756 // instructions. This is not a range loop because the call is deleted.
5757 for (User *U : make_early_inc_range(F->users()))
5758 if (CallBase *CB = dyn_cast<CallBase>(U))
5759 UpgradeIntrinsicCall(CB, NewFn);
5760
5761 // Remove old function, no longer used, from the module.
5762 if (F != NewFn)
5763 F->eraseFromParent();
5764 }
5765}
5766
5768 const unsigned NumOperands = MD.getNumOperands();
5769 if (NumOperands == 0)
5770 return &MD; // Invalid, punt to a verifier error.
5771
5772 // Check if the tag uses struct-path aware TBAA format.
5773 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5774 return &MD;
5775
5776 auto &Context = MD.getContext();
5777 if (NumOperands == 3) {
5778 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5779 MDNode *ScalarType = MDNode::get(Context, Elts);
5780 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5781 Metadata *Elts2[] = {ScalarType, ScalarType,
5784 MD.getOperand(2)};
5785 return MDNode::get(Context, Elts2);
5786 }
5787 // Create a MDNode <MD, MD, offset 0>
5789 Type::getInt64Ty(Context)))};
5790 return MDNode::get(Context, Elts);
5791}
5792
5794 Instruction *&Temp) {
5795 if (Opc != Instruction::BitCast)
5796 return nullptr;
5797
5798 Temp = nullptr;
5799 Type *SrcTy = V->getType();
5800 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5801 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5802 LLVMContext &Context = V->getContext();
5803
5804 // We have no information about target data layout, so we assume that
5805 // the maximum pointer size is 64bit.
5806 Type *MidTy = Type::getInt64Ty(Context);
5807 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5808
5809 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5810 }
5811
5812 return nullptr;
5813}
5814
5816 if (Opc != Instruction::BitCast)
5817 return nullptr;
5818
5819 Type *SrcTy = C->getType();
5820 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5821 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5822 LLVMContext &Context = C->getContext();
5823
5824 // We have no information about target data layout, so we assume that
5825 // the maximum pointer size is 64bit.
5826 Type *MidTy = Type::getInt64Ty(Context);
5827
5829 DestTy);
5830 }
5831
5832 return nullptr;
5833}
5834
5835/// Check the debug info version number, if it is out-dated, drop the debug
5836/// info. Return true if module is modified.
5839 return false;
5840
5841 llvm::TimeTraceScope timeScope("Upgrade debug info");
5842 // We need to get metadata before the module is verified (i.e., getModuleFlag
5843 // makes assumptions that we haven't verified yet). Carefully extract the flag
5844 // from the metadata.
5845 unsigned Version = 0;
5846 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5847 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5848 if (Flag->getNumOperands() < 3)
5849 return false;
5850 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5851 return K->getString() == "Debug Info Version";
5852 return false;
5853 });
5854 if (OpIt != ModFlags->op_end()) {
5855 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5856 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5857 Version = CI->getZExtValue();
5858 }
5859 }
5860
5862 bool BrokenDebugInfo = false;
5863 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5864 report_fatal_error("Broken module found, compilation aborted!");
5865 if (!BrokenDebugInfo)
5866 // Everything is ok.
5867 return false;
5868 else {
5869 // Diagnose malformed debug info.
5871 M.getContext().diagnose(Diag);
5872 }
5873 }
5874 bool Modified = StripDebugInfo(M);
5876 // Diagnose a version mismatch.
5878 M.getContext().diagnose(DiagVersion);
5879 }
5880 return Modified;
5881}
5882
5883static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5884 GlobalValue *GV, const Metadata *V) {
5885 Function *F = cast<Function>(GV);
5886
5887 constexpr StringLiteral DefaultValue = "1";
5888 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5889 unsigned Length = 0;
5890
5891 if (F->hasFnAttribute(Attr)) {
5892 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5893 // parse these elements placing them into Vect3
5894 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5895 for (; Length < 3 && !S.empty(); Length++) {
5896 auto [Part, Rest] = S.split(',');
5897 Vect3[Length] = Part.trim();
5898 S = Rest;
5899 }
5900 }
5901
5902 const unsigned Dim = DimC - 'x';
5903 assert(Dim < 3 && "Unexpected dim char");
5904
5905 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5906
5907 // local variable required for StringRef in Vect3 to point to.
5908 const std::string VStr = llvm::utostr(VInt);
5909 Vect3[Dim] = VStr;
5910 Length = std::max(Length, Dim + 1);
5911
5912 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5913 F->addFnAttr(Attr, NewAttr);
5914}
5915
5916static inline bool isXYZ(StringRef S) {
5917 return S == "x" || S == "y" || S == "z";
5918}
5919
5921 const Metadata *V) {
5922 if (K == "kernel") {
5924 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5925 return true;
5926 }
5927 if (K == "align") {
5928 // V is a bitfeild specifying two 16-bit values. The alignment value is
5929 // specfied in low 16-bits, The index is specified in the high bits. For the
5930 // index, 0 indicates the return value while higher values correspond to
5931 // each parameter (idx = param + 1).
5932 const uint64_t AlignIdxValuePair =
5933 mdconst::extract<ConstantInt>(V)->getZExtValue();
5934 const unsigned Idx = (AlignIdxValuePair >> 16);
5935 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5936 cast<Function>(GV)->addAttributeAtIndex(
5937 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5938 return true;
5939 }
5940 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5941 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5943 return true;
5944 }
5945 if (K == "minctasm") {
5946 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5947 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
5948 return true;
5949 }
5950 if (K == "maxnreg") {
5951 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5952 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
5953 return true;
5954 }
5955 if (K.consume_front("maxntid") && isXYZ(K)) {
5957 return true;
5958 }
5959 if (K.consume_front("reqntid") && isXYZ(K)) {
5961 return true;
5962 }
5963 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5965 return true;
5966 }
5967 if (K == "grid_constant") {
5968 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
5969 for (const auto &Op : cast<MDNode>(V)->operands()) {
5970 // For some reason, the index is 1-based in the metadata. Good thing we're
5971 // able to auto-upgrade it!
5972 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5973 cast<Function>(GV)->addParamAttr(Index, Attr);
5974 }
5975 return true;
5976 }
5977
5978 return false;
5979}
5980
5982 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5983 if (!NamedMD)
5984 return;
5985
5986 SmallVector<MDNode *, 8> NewNodes;
5988 for (MDNode *MD : NamedMD->operands()) {
5989 if (!SeenNodes.insert(MD).second)
5990 continue;
5991
5992 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5993 if (!GV)
5994 continue;
5995
5996 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5997
5998 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5999 // Each nvvm.annotations metadata entry will be of the following form:
6000 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6001 // start index = 1, to skip the global variable key
6002 // increment = 2, to skip the value for each property-value pairs
6003 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6004 MDString *K = cast<MDString>(MD->getOperand(j));
6005 const MDOperand &V = MD->getOperand(j + 1);
6006 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6007 if (!Upgraded)
6008 NewOperands.append({K, V});
6009 }
6010
6011 if (NewOperands.size() > 1)
6012 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6013 }
6014
6015 NamedMD->clearOperands();
6016 for (MDNode *N : NewNodes)
6017 NamedMD->addOperand(N);
6018}
6019
6020/// This checks for objc retain release marker which should be upgraded. It
6021/// returns true if module is modified.
6023 bool Changed = false;
6024 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6025 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6026 if (ModRetainReleaseMarker) {
6027 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6028 if (Op) {
6029 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6030 if (ID) {
6031 SmallVector<StringRef, 4> ValueComp;
6032 ID->getString().split(ValueComp, "#");
6033 if (ValueComp.size() == 2) {
6034 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6035 ID = MDString::get(M.getContext(), NewValue);
6036 }
6037 M.addModuleFlag(Module::Error, MarkerKey, ID);
6038 M.eraseNamedMetadata(ModRetainReleaseMarker);
6039 Changed = true;
6040 }
6041 }
6042 }
6043 return Changed;
6044}
6045
6047 // This lambda converts normal function calls to ARC runtime functions to
6048 // intrinsic calls.
6049 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6050 llvm::Intrinsic::ID IntrinsicFunc) {
6051 Function *Fn = M.getFunction(OldFunc);
6052
6053 if (!Fn)
6054 return;
6055
6056 Function *NewFn =
6057 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6058
6059 for (User *U : make_early_inc_range(Fn->users())) {
6061 if (!CI || CI->getCalledFunction() != Fn)
6062 continue;
6063
6064 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6065 FunctionType *NewFuncTy = NewFn->getFunctionType();
6067
6068 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6069 // value to the return type of the old function.
6070 if (NewFuncTy->getReturnType() != CI->getType() &&
6071 !CastInst::castIsValid(Instruction::BitCast, CI,
6072 NewFuncTy->getReturnType()))
6073 continue;
6074
6075 bool InvalidCast = false;
6076
6077 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6078 Value *Arg = CI->getArgOperand(I);
6079
6080 // Bitcast argument to the parameter type of the new function if it's
6081 // not a variadic argument.
6082 if (I < NewFuncTy->getNumParams()) {
6083 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6084 // to the parameter type of the new function.
6085 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6086 NewFuncTy->getParamType(I))) {
6087 InvalidCast = true;
6088 break;
6089 }
6090 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6091 }
6092 Args.push_back(Arg);
6093 }
6094
6095 if (InvalidCast)
6096 continue;
6097
6098 // Create a call instruction that calls the new function.
6099 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6100 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6101 NewCall->takeName(CI);
6102
6103 // Bitcast the return value back to the type of the old call.
6104 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6105
6106 if (!CI->use_empty())
6107 CI->replaceAllUsesWith(NewRetVal);
6108 CI->eraseFromParent();
6109 }
6110
6111 if (Fn->use_empty())
6112 Fn->eraseFromParent();
6113 };
6114
6115 // Unconditionally convert a call to "clang.arc.use" to a call to
6116 // "llvm.objc.clang.arc.use".
6117 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6118
6119 // Upgrade the retain release marker. If there is no need to upgrade
6120 // the marker, that means either the module is already new enough to contain
6121 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6123 return;
6124
6125 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6126 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6127 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6128 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6129 {"objc_autoreleaseReturnValue",
6130 llvm::Intrinsic::objc_autoreleaseReturnValue},
6131 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6132 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6133 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6134 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6135 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6136 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6137 {"objc_release", llvm::Intrinsic::objc_release},
6138 {"objc_retain", llvm::Intrinsic::objc_retain},
6139 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6140 {"objc_retainAutoreleaseReturnValue",
6141 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6142 {"objc_retainAutoreleasedReturnValue",
6143 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6144 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6145 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6146 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6147 {"objc_unsafeClaimAutoreleasedReturnValue",
6148 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6149 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6150 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6151 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6152 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6153 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6154 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6155 {"objc_arc_annotation_topdown_bbstart",
6156 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6157 {"objc_arc_annotation_topdown_bbend",
6158 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6159 {"objc_arc_annotation_bottomup_bbstart",
6160 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6161 {"objc_arc_annotation_bottomup_bbend",
6162 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6163
6164 for (auto &I : RuntimeFuncs)
6165 UpgradeToIntrinsic(I.first, I.second);
6166}
6167
6169 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6170 if (!ModFlags)
6171 return false;
6172
6173 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6174 bool HasSwiftVersionFlag = false;
6175 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6176 uint32_t SwiftABIVersion;
6177 auto Int8Ty = Type::getInt8Ty(M.getContext());
6178 auto Int32Ty = Type::getInt32Ty(M.getContext());
6179
6180 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6181 MDNode *Op = ModFlags->getOperand(I);
6182 if (Op->getNumOperands() != 3)
6183 continue;
6184 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6185 if (!ID)
6186 continue;
6187 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6188 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6189 Type::getInt32Ty(M.getContext()), B)),
6190 MDString::get(M.getContext(), ID->getString()),
6191 Op->getOperand(2)};
6192 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6193 Changed = true;
6194 };
6195
6196 if (ID->getString() == "Objective-C Image Info Version")
6197 HasObjCFlag = true;
6198 if (ID->getString() == "Objective-C Class Properties")
6199 HasClassProperties = true;
6200 // Upgrade PIC from Error/Max to Min.
6201 if (ID->getString() == "PIC Level") {
6202 if (auto *Behavior =
6204 uint64_t V = Behavior->getLimitedValue();
6205 if (V == Module::Error || V == Module::Max)
6206 SetBehavior(Module::Min);
6207 }
6208 }
6209 // Upgrade "PIE Level" from Error to Max.
6210 if (ID->getString() == "PIE Level")
6211 if (auto *Behavior =
6213 if (Behavior->getLimitedValue() == Module::Error)
6214 SetBehavior(Module::Max);
6215
6216 // Upgrade branch protection and return address signing module flags. The
6217 // module flag behavior for these fields were Error and now they are Min.
6218 if (ID->getString() == "branch-target-enforcement" ||
6219 ID->getString().starts_with("sign-return-address")) {
6220 if (auto *Behavior =
6222 if (Behavior->getLimitedValue() == Module::Error) {
6223 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6224 Metadata *Ops[3] = {
6225 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6226 Op->getOperand(1), Op->getOperand(2)};
6227 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6228 Changed = true;
6229 }
6230 }
6231 }
6232
6233 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6234 // section name so that llvm-lto will not complain about mismatching
6235 // module flags that is functionally the same.
6236 if (ID->getString() == "Objective-C Image Info Section") {
6237 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6238 SmallVector<StringRef, 4> ValueComp;
6239 Value->getString().split(ValueComp, " ");
6240 if (ValueComp.size() != 1) {
6241 std::string NewValue;
6242 for (auto &S : ValueComp)
6243 NewValue += S.str();
6244 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6245 MDString::get(M.getContext(), NewValue)};
6246 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6247 Changed = true;
6248 }
6249 }
6250 }
6251
6252 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6253 // If the higher bits are set, it adds new module flag for swift info.
6254 if (ID->getString() == "Objective-C Garbage Collection") {
6255 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6256 if (Md) {
6257 assert(Md->getValue() && "Expected non-empty metadata");
6258 auto Type = Md->getValue()->getType();
6259 if (Type == Int8Ty)
6260 continue;
6261 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6262 if ((Val & 0xff) != Val) {
6263 HasSwiftVersionFlag = true;
6264 SwiftABIVersion = (Val & 0xff00) >> 8;
6265 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6266 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6267 }
6268 Metadata *Ops[3] = {
6270 Op->getOperand(1),
6271 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6272 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6273 Changed = true;
6274 }
6275 }
6276
6277 if (ID->getString() == "amdgpu_code_object_version") {
6278 Metadata *Ops[3] = {
6279 Op->getOperand(0),
6280 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6281 Op->getOperand(2)};
6282 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6283 Changed = true;
6284 }
6285 }
6286
6287 // "Objective-C Class Properties" is recently added for Objective-C. We
6288 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6289 // flag of value 0, so we can correclty downgrade this flag when trying to
6290 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6291 // this module flag.
6292 if (HasObjCFlag && !HasClassProperties) {
6293 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6294 (uint32_t)0);
6295 Changed = true;
6296 }
6297
6298 if (HasSwiftVersionFlag) {
6299 M.addModuleFlag(Module::Error, "Swift ABI Version",
6300 SwiftABIVersion);
6301 M.addModuleFlag(Module::Error, "Swift Major Version",
6302 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6303 M.addModuleFlag(Module::Error, "Swift Minor Version",
6304 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6305 Changed = true;
6306 }
6307
6308 return Changed;
6309}
6310
6312 auto TrimSpaces = [](StringRef Section) -> std::string {
6313 SmallVector<StringRef, 5> Components;
6314 Section.split(Components, ',');
6315
6316 SmallString<32> Buffer;
6317 raw_svector_ostream OS(Buffer);
6318
6319 for (auto Component : Components)
6320 OS << ',' << Component.trim();
6321
6322 return std::string(OS.str().substr(1));
6323 };
6324
6325 for (auto &GV : M.globals()) {
6326 if (!GV.hasSection())
6327 continue;
6328
6329 StringRef Section = GV.getSection();
6330
6331 if (!Section.starts_with("__DATA, __objc_catlist"))
6332 continue;
6333
6334 // __DATA, __objc_catlist, regular, no_dead_strip
6335 // __DATA,__objc_catlist,regular,no_dead_strip
6336 GV.setSection(TrimSpaces(Section));
6337 }
6338}
6339
6340namespace {
6341// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6342// callsites within a function that did not also have the strictfp attribute.
6343// Since 10.0, if strict FP semantics are needed within a function, the
6344// function must have the strictfp attribute and all calls within the function
6345// must also have the strictfp attribute. This latter restriction is
6346// necessary to prevent unwanted libcall simplification when a function is
6347// being cloned (such as for inlining).
6348//
6349// The "dangling" strictfp attribute usage was only used to prevent constant
6350// folding and other libcall simplification. The nobuiltin attribute on the
6351// callsite has the same effect.
6352struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6353 StrictFPUpgradeVisitor() = default;
6354
6355 void visitCallBase(CallBase &Call) {
6356 if (!Call.isStrictFP())
6357 return;
6359 return;
6360 // If we get here, the caller doesn't have the strictfp attribute
6361 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6362 Call.removeFnAttr(Attribute::StrictFP);
6363 Call.addFnAttr(Attribute::NoBuiltin);
6364 }
6365};
6366
6367/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6368struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6369 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6370 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6371
6372 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6373 if (!RMW.isFloatingPointOperation())
6374 return;
6375
6376 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6377 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6378 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6379 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6380 }
6381};
6382} // namespace
6383
6385 // If a function definition doesn't have the strictfp attribute,
6386 // convert any callsite strictfp attributes to nobuiltin.
6387 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6388 StrictFPUpgradeVisitor SFPV;
6389 SFPV.visit(F);
6390 }
6391
6392 // Remove all incompatibile attributes from function.
6393 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6394 F.getReturnType(), F.getAttributes().getRetAttrs()));
6395 for (auto &Arg : F.args())
6396 Arg.removeAttrs(
6397 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6398
6399 bool AddingAttrs = false, RemovingAttrs = false;
6400 AttrBuilder AttrsToAdd(F.getContext());
6401 AttributeMask AttrsToRemove;
6402
6403 // Older versions of LLVM treated an "implicit-section-name" attribute
6404 // similarly to directly setting the section on a Function.
6405 if (Attribute A = F.getFnAttribute("implicit-section-name");
6406 A.isValid() && A.isStringAttribute()) {
6407 F.setSection(A.getValueAsString());
6408 AttrsToRemove.addAttribute("implicit-section-name");
6409 RemovingAttrs = true;
6410 }
6411
6412 if (Attribute A = F.getFnAttribute("nooutline");
6413 A.isValid() && A.isStringAttribute()) {
6414 AttrsToRemove.addAttribute("nooutline");
6415 AttrsToAdd.addAttribute(Attribute::NoOutline);
6416 AddingAttrs = RemovingAttrs = true;
6417 }
6418
6419 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6420 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6421 AttrsToRemove.addAttribute("uniform-work-group-size");
6422 RemovingAttrs = true;
6423 if (A.getValueAsString() == "true") {
6424 AttrsToAdd.addAttribute("uniform-work-group-size");
6425 AddingAttrs = true;
6426 }
6427 }
6428
6429 if (!F.empty()) {
6430 // For some reason this is called twice, and the first time is before any
6431 // instructions are loaded into the body.
6432
6433 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6434 A.isValid()) {
6435
6436 if (A.getValueAsBool()) {
6437 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6438 Visitor.visit(F);
6439 }
6440
6441 // We will leave behind dead attribute uses on external declarations, but
6442 // clang never added these to declarations anyway.
6443 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6444 RemovingAttrs = true;
6445 }
6446 }
6447
6448 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6449 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6450
6451 bool HandleDenormalMode = false;
6452
6453 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6454 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6455 if (ParsedMode.isValid()) {
6456 DenormalFPMath = ParsedMode;
6457 AttrsToRemove.addAttribute("denormal-fp-math");
6458 AddingAttrs = RemovingAttrs = true;
6459 HandleDenormalMode = true;
6460 }
6461 }
6462
6463 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6464 Attr.isValid()) {
6465 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6466 if (ParsedMode.isValid()) {
6467 DenormalFPMathF32 = ParsedMode;
6468 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6469 AddingAttrs = RemovingAttrs = true;
6470 HandleDenormalMode = true;
6471 }
6472 }
6473
6474 if (HandleDenormalMode)
6475 AttrsToAdd.addDenormalFPEnvAttr(
6476 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6477
6478 if (RemovingAttrs)
6479 F.removeFnAttrs(AttrsToRemove);
6480
6481 if (AddingAttrs)
6482 F.addFnAttrs(AttrsToAdd);
6483}
6484
6485// Check if the function attribute is not present and set it.
6487 StringRef Value) {
6488 if (!F.hasFnAttribute(FnAttrName))
6489 F.addFnAttr(FnAttrName, Value);
6490}
6491
6492// Check if the function attribute is not present and set it if needed.
6493// If the attribute is "false" then removes it.
6494// If the attribute is "true" resets it to a valueless attribute.
6495static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6496 if (!F.hasFnAttribute(FnAttrName)) {
6497 if (Set)
6498 F.addFnAttr(FnAttrName);
6499 } else {
6500 auto A = F.getFnAttribute(FnAttrName);
6501 if ("false" == A.getValueAsString())
6502 F.removeFnAttr(FnAttrName);
6503 else if ("true" == A.getValueAsString()) {
6504 F.removeFnAttr(FnAttrName);
6505 F.addFnAttr(FnAttrName);
6506 }
6507 }
6508}
6509
6511 Triple T(M.getTargetTriple());
6512 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6513 return;
6514
6515 uint64_t BTEValue = 0;
6516 uint64_t BPPLRValue = 0;
6517 uint64_t GCSValue = 0;
6518 uint64_t SRAValue = 0;
6519 uint64_t SRAALLValue = 0;
6520 uint64_t SRABKeyValue = 0;
6521
6522 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6523 if (ModFlags) {
6524 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6525 MDNode *Op = ModFlags->getOperand(I);
6526 if (Op->getNumOperands() != 3)
6527 continue;
6528
6529 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6530 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6531 if (!ID || !CI)
6532 continue;
6533
6534 StringRef IDStr = ID->getString();
6535 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6536 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6537 : IDStr == "guarded-control-stack" ? &GCSValue
6538 : IDStr == "sign-return-address" ? &SRAValue
6539 : IDStr == "sign-return-address-all" ? &SRAALLValue
6540 : IDStr == "sign-return-address-with-bkey"
6541 ? &SRABKeyValue
6542 : nullptr;
6543 if (!ValPtr)
6544 continue;
6545
6546 *ValPtr = CI->getZExtValue();
6547 if (*ValPtr == 2)
6548 return;
6549 }
6550 }
6551
6552 bool BTE = BTEValue == 1;
6553 bool BPPLR = BPPLRValue == 1;
6554 bool GCS = GCSValue == 1;
6555 bool SRA = SRAValue == 1;
6556
6557 StringRef SignTypeValue = "non-leaf";
6558 if (SRA && SRAALLValue == 1)
6559 SignTypeValue = "all";
6560
6561 StringRef SignKeyValue = "a_key";
6562 if (SRA && SRABKeyValue == 1)
6563 SignKeyValue = "b_key";
6564
6565 for (Function &F : M.getFunctionList()) {
6566 if (F.isDeclaration())
6567 continue;
6568
6569 if (SRA) {
6570 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6571 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6572 } else {
6573 if (auto A = F.getFnAttribute("sign-return-address");
6574 A.isValid() && "none" == A.getValueAsString()) {
6575 F.removeFnAttr("sign-return-address");
6576 F.removeFnAttr("sign-return-address-key");
6577 }
6578 }
6579 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6580 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6581 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6582 }
6583
6584 if (BTE)
6585 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6586 if (BPPLR)
6587 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6588 if (GCS)
6589 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6590 if (SRA) {
6591 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6592 if (SRAALLValue == 1)
6593 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6594 if (SRABKeyValue == 1)
6595 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6596 }
6597}
6598
6599static bool isOldLoopArgument(Metadata *MD) {
6600 auto *T = dyn_cast_or_null<MDTuple>(MD);
6601 if (!T)
6602 return false;
6603 if (T->getNumOperands() < 1)
6604 return false;
6605 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6606 if (!S)
6607 return false;
6608 return S->getString().starts_with("llvm.vectorizer.");
6609}
6610
6612 StringRef OldPrefix = "llvm.vectorizer.";
6613 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6614
6615 if (OldTag == "llvm.vectorizer.unroll")
6616 return MDString::get(C, "llvm.loop.interleave.count");
6617
6618 return MDString::get(
6619 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6620 .str());
6621}
6622
6624 auto *T = dyn_cast_or_null<MDTuple>(MD);
6625 if (!T)
6626 return MD;
6627 if (T->getNumOperands() < 1)
6628 return MD;
6629 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6630 if (!OldTag)
6631 return MD;
6632 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6633 return MD;
6634
6635 // This has an old tag. Upgrade it.
6637 Ops.reserve(T->getNumOperands());
6638 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6639 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6640 Ops.push_back(T->getOperand(I));
6641
6642 return MDTuple::get(T->getContext(), Ops);
6643}
6644
6646 auto *T = dyn_cast<MDTuple>(&N);
6647 if (!T)
6648 return &N;
6649
6650 if (none_of(T->operands(), isOldLoopArgument))
6651 return &N;
6652
6654 Ops.reserve(T->getNumOperands());
6655 for (Metadata *MD : T->operands())
6656 Ops.push_back(upgradeLoopArgument(MD));
6657
6658 return MDTuple::get(T->getContext(), Ops);
6659}
6660
6662 Triple T(TT);
6663 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6664 // the address space of globals to 1. This does not apply to SPIRV Logical.
6665 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6666 !DL.contains("-G") && !DL.starts_with("G")) {
6667 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6668 }
6669
6670 if (T.isLoongArch64() || T.isRISCV64()) {
6671 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6672 auto I = DL.find("-n64-");
6673 if (I != StringRef::npos)
6674 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6675 return DL.str();
6676 }
6677
6678 // AMDGPU data layout upgrades.
6679 std::string Res = DL.str();
6680 if (T.isAMDGPU()) {
6681 // Define address spaces for constants.
6682 if (!DL.contains("-G") && !DL.starts_with("G"))
6683 Res.append(Res.empty() ? "G1" : "-G1");
6684
6685 // AMDGCN data layout upgrades.
6686 if (T.isAMDGCN()) {
6687
6688 // Add missing non-integral declarations.
6689 // This goes before adding new address spaces to prevent incoherent string
6690 // values.
6691 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6692 Res.append("-ni:7:8:9");
6693 // Update ni:7 to ni:7:8:9.
6694 if (DL.ends_with("ni:7"))
6695 Res.append(":8:9");
6696 if (DL.ends_with("ni:7:8"))
6697 Res.append(":9");
6698
6699 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6700 // resources) An empty data layout has already been upgraded to G1 by now.
6701 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6702 Res.append("-p7:160:256:256:32");
6703 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6704 Res.append("-p8:128:128:128:48");
6705 constexpr StringRef OldP8("-p8:128:128-");
6706 if (DL.contains(OldP8))
6707 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6708 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6709 Res.append("-p9:192:256:256:32");
6710 }
6711
6712 // Upgrade the ELF mangling mode.
6713 if (!DL.contains("m:e"))
6714 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6715
6716 return Res;
6717 }
6718
6719 if (T.isSystemZ() && !DL.empty()) {
6720 // Make sure the stack alignment is present.
6721 if (!DL.contains("-S64"))
6722 return "E-S64" + DL.drop_front(1).str();
6723 return DL.str();
6724 }
6725
6726 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6727 // If the datalayout matches the expected format, add pointer size address
6728 // spaces to the datalayout.
6729 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6730 if (!DL.contains(AddrSpaces)) {
6732 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6733 if (R.match(Res, &Groups))
6734 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6735 }
6736 };
6737
6738 // AArch64 data layout upgrades.
6739 if (T.isAArch64()) {
6740 // Add "-Fn32"
6741 if (!DL.empty() && !DL.contains("-Fn32"))
6742 Res.append("-Fn32");
6743 AddPtr32Ptr64AddrSpaces();
6744 return Res;
6745 }
6746
6747 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6748 T.isWasm()) {
6749 // Mips64 with o32 ABI did not add "-i128:128".
6750 // Add "-i128:128"
6751 std::string I64 = "-i64:64";
6752 std::string I128 = "-i128:128";
6753 if (!StringRef(Res).contains(I128)) {
6754 size_t Pos = Res.find(I64);
6755 if (Pos != size_t(-1))
6756 Res.insert(Pos + I64.size(), I128);
6757 }
6758 }
6759
6760 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6761 size_t Pos = Res.find("-S128");
6762 if (Pos == StringRef::npos)
6763 Pos = Res.size();
6764 Res.insert(Pos, "-f64:32:64");
6765 }
6766
6767 if (!T.isX86())
6768 return Res;
6769
6770 AddPtr32Ptr64AddrSpaces();
6771
6772 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6773 // for i128 operations prior to this being reflected in the data layout, and
6774 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6775 // boundaries, so although this is a breaking change, the upgrade is expected
6776 // to fix more IR than it breaks.
6777 // Intel MCU is an exception and uses 4-byte-alignment.
6778 if (!T.isOSIAMCU()) {
6779 std::string I128 = "-i128:128";
6780 if (StringRef Ref = Res; !Ref.contains(I128)) {
6782 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6783 if (R.match(Res, &Groups))
6784 Res = (Groups[1] + I128 + Groups[3]).str();
6785 }
6786 }
6787
6788 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6789 // Raising the alignment is safe because Clang did not produce f80 values in
6790 // the MSVC environment before this upgrade was added.
6791 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6792 StringRef Ref = Res;
6793 auto I = Ref.find("-f80:32-");
6794 if (I != StringRef::npos)
6795 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6796 }
6797
6798 return Res;
6799}
6800
6801void llvm::UpgradeAttributes(AttrBuilder &B) {
6802 StringRef FramePointer;
6803 Attribute A = B.getAttribute("no-frame-pointer-elim");
6804 if (A.isValid()) {
6805 // The value can be "true" or "false".
6806 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6807 B.removeAttribute("no-frame-pointer-elim");
6808 }
6809 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6810 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6811 if (FramePointer != "all")
6812 FramePointer = "non-leaf";
6813 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6814 }
6815 if (!FramePointer.empty())
6816 B.addAttribute("frame-pointer", FramePointer);
6817
6818 A = B.getAttribute("null-pointer-is-valid");
6819 if (A.isValid()) {
6820 // The value can be "true" or "false".
6821 bool NullPointerIsValid = A.getValueAsString() == "true";
6822 B.removeAttribute("null-pointer-is-valid");
6823 if (NullPointerIsValid)
6824 B.addAttribute(Attribute::NullPointerIsValid);
6825 }
6826
6827 A = B.getAttribute("uniform-work-group-size");
6828 if (A.isValid()) {
6829 StringRef Val = A.getValueAsString();
6830 if (!Val.empty()) {
6831 bool IsTrue = Val == "true";
6832 B.removeAttribute("uniform-work-group-size");
6833 if (IsTrue)
6834 B.addAttribute("uniform-work-group-size");
6835 }
6836 }
6837}
6838
6839void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6840 // clang.arc.attachedcall bundles are now required to have an operand.
6841 // If they don't, it's okay to drop them entirely: when there is an operand,
6842 // the "attachedcall" is meaningful and required, but without an operand,
6843 // it's just a marker NOP. Dropping it merely prevents an optimization.
6844 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6845 return OBD.getTag() == "clang.arc.attachedcall" &&
6846 OBD.inputs().empty();
6847 });
6848}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:622
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2811
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:895
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1669
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1753
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1772
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2192
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106