LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
52#include "llvm/Support/Regex.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(llvm::errs());
72 llvm::errs() << "\n";
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(0);
83 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(F);
88 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(F);
104 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(F);
117 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
132 return false;
133
134 rename(F);
135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(F);
173 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
174 return true;
175}
176
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front("avx."))
184 return (Name.starts_with("blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with("movnt.") || // Added in 3.2
189 Name.starts_with("sqrt.p") || // Added in 7.0
190 Name.starts_with("storeu.") || // Added in 3.9
191 Name.starts_with("vbroadcast.s") || // Added in 3.5
192 Name.starts_with("vbroadcastf128") || // Added in 4.0
193 Name.starts_with("vextractf128.") || // Added in 3.7
194 Name.starts_with("vinsertf128.") || // Added in 3.7
195 Name.starts_with("vperm2f128.") || // Added in 6.0
196 Name.starts_with("vpermil.")); // Added in 3.1
197
198 if (Name.consume_front("avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with("pabs.") || // Added in 6.0
201 Name.starts_with("padds.") || // Added in 8.0
202 Name.starts_with("paddus.") || // Added in 8.0
203 Name.starts_with("pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with("pbroadcast") || // Added in 3.8
206 Name.starts_with("pcmpeq.") || // Added in 3.1
207 Name.starts_with("pcmpgt.") || // Added in 3.1
208 Name.starts_with("pmax") || // Added in 3.9
209 Name.starts_with("pmin") || // Added in 3.9
210 Name.starts_with("pmovsx") || // Added in 3.9
211 Name.starts_with("pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with("psll.dq") || // Added in 3.7
215 Name.starts_with("psrl.dq") || // Added in 3.7
216 Name.starts_with("psubs.") || // Added in 8.0
217 Name.starts_with("psubus.") || // Added in 8.0
218 Name.starts_with("vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front("avx512.")) {
225 if (Name.consume_front("mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with("and.") || // Added in 3.9
229 Name.starts_with("andn.") || // Added in 3.9
230 Name.starts_with("broadcast.s") || // Added in 3.9
231 Name.starts_with("broadcastf32x4.") || // Added in 6.0
232 Name.starts_with("broadcastf32x8.") || // Added in 6.0
233 Name.starts_with("broadcastf64x2.") || // Added in 6.0
234 Name.starts_with("broadcastf64x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x8.") || // Added in 6.0
237 Name.starts_with("broadcasti64x2.") || // Added in 6.0
238 Name.starts_with("broadcasti64x4.") || // Added in 6.0
239 Name.starts_with("cmp.b") || // Added in 5.0
240 Name.starts_with("cmp.d") || // Added in 5.0
241 Name.starts_with("cmp.q") || // Added in 5.0
242 Name.starts_with("cmp.w") || // Added in 5.0
243 Name.starts_with("compress.b") || // Added in 9.0
244 Name.starts_with("compress.d") || // Added in 9.0
245 Name.starts_with("compress.p") || // Added in 9.0
246 Name.starts_with("compress.q") || // Added in 9.0
247 Name.starts_with("compress.store.") || // Added in 7.0
248 Name.starts_with("compress.w") || // Added in 9.0
249 Name.starts_with("conflict.") || // Added in 9.0
250 Name.starts_with("cvtdq2pd.") || // Added in 4.0
251 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with("cvtudq2pd.") || // Added in 4.0
263 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with("dbpsadbw.") || // Added in 7.0
268 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with("expand.b") || // Added in 9.0
270 Name.starts_with("expand.d") || // Added in 9.0
271 Name.starts_with("expand.load.") || // Added in 7.0
272 Name.starts_with("expand.p") || // Added in 9.0
273 Name.starts_with("expand.q") || // Added in 9.0
274 Name.starts_with("expand.w") || // Added in 9.0
275 Name.starts_with("fpclass.p") || // Added in 7.0
276 Name.starts_with("insert") || // Added in 4.0
277 Name.starts_with("load.") || // Added in 3.9
278 Name.starts_with("loadu.") || // Added in 3.9
279 Name.starts_with("lzcnt.") || // Added in 5.0
280 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("movddup") || // Added in 3.9
283 Name.starts_with("move.s") || // Added in 4.0
284 Name.starts_with("movshdup") || // Added in 3.9
285 Name.starts_with("movsldup") || // Added in 3.9
286 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with("or.") || // Added in 3.9
288 Name.starts_with("pabs.") || // Added in 6.0
289 Name.starts_with("packssdw.") || // Added in 5.0
290 Name.starts_with("packsswb.") || // Added in 5.0
291 Name.starts_with("packusdw.") || // Added in 5.0
292 Name.starts_with("packuswb.") || // Added in 5.0
293 Name.starts_with("padd.") || // Added in 4.0
294 Name.starts_with("padds.") || // Added in 8.0
295 Name.starts_with("paddus.") || // Added in 8.0
296 Name.starts_with("palignr.") || // Added in 3.9
297 Name.starts_with("pand.") || // Added in 3.9
298 Name.starts_with("pandn.") || // Added in 3.9
299 Name.starts_with("pavg") || // Added in 6.0
300 Name.starts_with("pbroadcast") || // Added in 6.0
301 Name.starts_with("pcmpeq.") || // Added in 3.9
302 Name.starts_with("pcmpgt.") || // Added in 3.9
303 Name.starts_with("perm.df.") || // Added in 3.9
304 Name.starts_with("perm.di.") || // Added in 3.9
305 Name.starts_with("permvar.") || // Added in 7.0
306 Name.starts_with("pmaddubs.w.") || // Added in 7.0
307 Name.starts_with("pmaddw.d.") || // Added in 7.0
308 Name.starts_with("pmax") || // Added in 4.0
309 Name.starts_with("pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with("pmovsx") || // Added in 4.0
315 Name.starts_with("pmovzx") || // Added in 4.0
316 Name.starts_with("pmul.dq.") || // Added in 4.0
317 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with("pmulh.w.") || // Added in 7.0
319 Name.starts_with("pmulhu.w.") || // Added in 7.0
320 Name.starts_with("pmull.") || // Added in 4.0
321 Name.starts_with("pmultishift.qb.") || // Added in 8.0
322 Name.starts_with("pmulu.dq.") || // Added in 4.0
323 Name.starts_with("por.") || // Added in 3.9
324 Name.starts_with("prol.") || // Added in 8.0
325 Name.starts_with("prolv.") || // Added in 8.0
326 Name.starts_with("pror.") || // Added in 8.0
327 Name.starts_with("prorv.") || // Added in 8.0
328 Name.starts_with("pshuf.b.") || // Added in 4.0
329 Name.starts_with("pshuf.d.") || // Added in 3.9
330 Name.starts_with("pshufh.w.") || // Added in 3.9
331 Name.starts_with("pshufl.w.") || // Added in 3.9
332 Name.starts_with("psll.d") || // Added in 4.0
333 Name.starts_with("psll.q") || // Added in 4.0
334 Name.starts_with("psll.w") || // Added in 4.0
335 Name.starts_with("pslli") || // Added in 4.0
336 Name.starts_with("psllv") || // Added in 4.0
337 Name.starts_with("psra.d") || // Added in 4.0
338 Name.starts_with("psra.q") || // Added in 4.0
339 Name.starts_with("psra.w") || // Added in 4.0
340 Name.starts_with("psrai") || // Added in 4.0
341 Name.starts_with("psrav") || // Added in 4.0
342 Name.starts_with("psrl.d") || // Added in 4.0
343 Name.starts_with("psrl.q") || // Added in 4.0
344 Name.starts_with("psrl.w") || // Added in 4.0
345 Name.starts_with("psrli") || // Added in 4.0
346 Name.starts_with("psrlv") || // Added in 4.0
347 Name.starts_with("psub.") || // Added in 4.0
348 Name.starts_with("psubs.") || // Added in 8.0
349 Name.starts_with("psubus.") || // Added in 8.0
350 Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("punpckh") || // Added in 3.9
352 Name.starts_with("punpckl") || // Added in 3.9
353 Name.starts_with("pxor.") || // Added in 3.9
354 Name.starts_with("shuf.f") || // Added in 6.0
355 Name.starts_with("shuf.i") || // Added in 6.0
356 Name.starts_with("shuf.p") || // Added in 4.0
357 Name.starts_with("sqrt.p") || // Added in 7.0
358 Name.starts_with("store.b.") || // Added in 3.9
359 Name.starts_with("store.d.") || // Added in 3.9
360 Name.starts_with("store.p") || // Added in 3.9
361 Name.starts_with("store.q.") || // Added in 3.9
362 Name.starts_with("store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with("storeu.") || // Added in 3.9
365 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with("ucmp.") || // Added in 5.0
367 Name.starts_with("unpckh.") || // Added in 3.9
368 Name.starts_with("unpckl.") || // Added in 3.9
369 Name.starts_with("valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with("vextract") || // Added in 4.0
373 Name.starts_with("vfmadd.") || // Added in 7.0
374 Name.starts_with("vfmaddsub.") || // Added in 7.0
375 Name.starts_with("vfnmadd.") || // Added in 7.0
376 Name.starts_with("vfnmsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermi2var.") || // Added in 7.0
382 Name.starts_with("vpermil.p") || // Added in 3.9
383 Name.starts_with("vpermilvar.") || // Added in 4.0
384 Name.starts_with("vpermt2var.") || // Added in 7.0
385 Name.starts_with("vpmadd52") || // Added in 7.0
386 Name.starts_with("vpshld.") || // Added in 7.0
387 Name.starts_with("vpshldv.") || // Added in 8.0
388 Name.starts_with("vpshrd.") || // Added in 7.0
389 Name.starts_with("vpshrdv.") || // Added in 8.0
390 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with("xor.")); // Added in 3.9
392
393 if (Name.consume_front("mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with("vfmadd.") || // Added in 7.0
396 Name.starts_with("vfmaddsub.") || // Added in 7.0
397 Name.starts_with("vfmsub.") || // Added in 7.0
398 Name.starts_with("vfmsubadd.") || // Added in 7.0
399 Name.starts_with("vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front("maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with("pternlog.") || // Added in 7.0
404 Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmaddsub.") || // Added in 7.0
406 Name.starts_with("vpdpbusd.") || // Added in 7.0
407 Name.starts_with("vpdpbusds.") || // Added in 7.0
408 Name.starts_with("vpdpwssd.") || // Added in 7.0
409 Name.starts_with("vpdpwssds.") || // Added in 7.0
410 Name.starts_with("vpermt2var.") || // Added in 7.0
411 Name.starts_with("vpmadd52") || // Added in 7.0
412 Name.starts_with("vpshldv.") || // Added in 8.0
413 Name.starts_with("vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with("broadcastm") || // Added in 6.0
420 Name.starts_with("cmp.p") || // Added in 12.0
421 Name.starts_with("cvtb2mask.") || // Added in 7.0
422 Name.starts_with("cvtd2mask.") || // Added in 7.0
423 Name.starts_with("cvtmask2") || // Added in 5.0
424 Name.starts_with("cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with("cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with("kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with("padds.") || // Added in 8.0
437 Name.starts_with("pbroadcast") || // Added in 3.9
438 Name.starts_with("prol") || // Added in 8.0
439 Name.starts_with("pror") || // Added in 8.0
440 Name.starts_with("psll.dq") || // Added in 3.9
441 Name.starts_with("psrl.dq") || // Added in 3.9
442 Name.starts_with("psubs.") || // Added in 8.0
443 Name.starts_with("ptestm") || // Added in 6.0
444 Name.starts_with("ptestnm") || // Added in 6.0
445 Name.starts_with("storent.") || // Added in 3.9
446 Name.starts_with("vbroadcast.s") || // Added in 7.0
447 Name.starts_with("vpshld.") || // Added in 8.0
448 Name.starts_with("vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front("fma."))
452 return (Name.starts_with("vfmadd.") || // Added in 7.0
453 Name.starts_with("vfmsub.") || // Added in 7.0
454 Name.starts_with("vfmsubadd.") || // Added in 7.0
455 Name.starts_with("vfnmadd.") || // Added in 7.0
456 Name.starts_with("vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front("fma4."))
459 return Name.starts_with("vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front("sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with("storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front("sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with("padds.") || // Added in 8.0
483 Name.starts_with("paddus.") || // Added in 8.0
484 Name.starts_with("pcmpeq.") || // Added in 3.1
485 Name.starts_with("pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with("pshuf") || // Added in 3.9
492 Name.starts_with("psll.dq") || // Added in 3.7
493 Name.starts_with("psrl.dq") || // Added in 3.7
494 Name.starts_with("psubs.") || // Added in 8.0
495 Name.starts_with("psubus.") || // Added in 8.0
496 Name.starts_with("sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with("storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front("sse41."))
503 return (Name.starts_with("blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with("pmovsx") || // Added in 3.8
515 Name.starts_with("pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front("sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front("sse4a."))
522 return Name.starts_with("movnt."); // Added in 3.9
523
524 if (Name.consume_front("ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front("xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with("vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with("vcvtph2ps.")); // Added in 11.0
542}
543
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front("x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(F);
561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
562 Intrinsic::x86_rdtscp);
563 return true;
564 }
565
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
571 .Case("c", Intrinsic::x86_sse41_ptestc)
572 .Case("z", Intrinsic::x86_sse41_ptestz)
573 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
576 return upgradePTESTIntrinsic(F, ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
586 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
594 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
595
596 if (Name.consume_front("avx512.")) {
597 if (Name.consume_front("mask.cmp.")) {
598 // Added in 7.0
600 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 return upgradeX86MaskedFPCompare(F, ID, NewFn);
609 } else if (Name.starts_with("vpdpbusd.") ||
610 Name.starts_with("vpdpbusds.")) {
611 // Added in 21.1
613 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
622 } else if (Name.starts_with("vpdpwssd.") ||
623 Name.starts_with("vpdpwssds.")) {
624 // Added in 21.1
626 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
634 return upgradeX86MultiplyAddWords(F, ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front("avx2.")) {
640 if (Name.consume_front("vpdpb")) {
641 // Added in 21.1
643 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
658 } else if (Name.consume_front("vpdpw")) {
659 // Added in 21.1
661 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
675 return upgradeX86MultiplyAddWords(F, ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front("avx10.")) {
681 if (Name.consume_front("vpdpb")) {
682 // Added in 21.1
684 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
693 } else if (Name.consume_front("vpdpw")) {
695 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
703 return upgradeX86MultiplyAddWords(F, ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front("avx512bf16.")) {
709 // Added in 9.0
711 .Case("cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case("cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case("cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case("mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case("cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case("cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
725 return upgradeX86BF16Intrinsic(F, ID, NewFn);
726
727 // Added in 9.0
729 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
734 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front("xop.")) {
740 if (Name.starts_with("vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
759 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
762
764 rename(F);
765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
773 Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with("rbit")) {
786 // '(arm|aarch64).rbit'.
788 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
795 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front("neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front("bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
808 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
809 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 {F->getReturnType(),
819 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front("bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
834 .Case("mla",
835 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case("lalb",
838 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case("lalt",
841 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
861 .StartsWith("vclz.", Intrinsic::ctlz)
862 .StartsWith("vcnt.", Intrinsic::ctpop)
863 .StartsWith("vqadds.", Intrinsic::sadd_sat)
864 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
865 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
866 .StartsWith("vqsubu.", Intrinsic::usub_sat)
867 .StartsWith("vrinta.", Intrinsic::round)
868 .StartsWith("vrintn.", Intrinsic::roundeven)
869 .StartsWith("vrintm.", Intrinsic::floor)
870 .StartsWith("vrintp.", Intrinsic::ceil)
871 .StartsWith("vrintx.", Intrinsic::rint)
872 .StartsWith("vrintz.", Intrinsic::trunc)
875 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
876 F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front("vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
884 if (vstRegex.match(Name, &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
898 else
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front("mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with("vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front("vldr.gather.");
937 if (IsGather || Name.consume_front("vstr.scatter.")) {
938 if (Name.consume_front("base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front("wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front("offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front("cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
975 .StartsWith("frintn", Intrinsic::roundeven)
976 .StartsWith("rbit", Intrinsic::bitreverse)
979 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
980 F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with("addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with("bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front("sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front("bf")) {
1007 if (Name == "mmla") {
1008 Type *Tys[] = {F->getReturnType(),
1009 std::next(F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1012 return true;
1013 }
1014 if (Name.consume_back(".lane")) {
1015 // 'aarch64.sve.bf*.lane'.
1018 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1023 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1024 return true;
1025 }
1026 return false; // No other 'aarch64.sve.bf*.lane'.
1027 }
1028 return false; // No other 'aarch64.sve.bf*'.
1029 }
1030
1031 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1032 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1033 NewFn = nullptr;
1034 return true;
1035 }
1036
1037 if (Name.consume_front("addqv")) {
1038 // 'aarch64.sve.addqv'.
1039 if (!F->getReturnType()->isFPOrFPVectorTy())
1040 return false;
1041
1042 auto Args = F->getFunctionType()->params();
1043 Type *Tys[] = {F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1046 return true;
1047 }
1048
1049 if (Name.consume_front("ld")) {
1050 // 'aarch64.sve.ld*'.
1051 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.match(Name)) {
1053 Type *ScalarTy =
1054 cast<VectorType>(F->getReturnType())->getElementType();
1055 ElementCount EC =
1056 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1057 assert(F->arg_size() == 2 &&
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy = F->getArg(1)->getType();
1060 Type *Ty = VectorType::get(ScalarTy, EC);
1061 static const Intrinsic::ID LoadIDs[] = {
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1065 };
1067 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1068 return true;
1069 }
1070 return false; // No other 'aarch64.sve.ld*'.
1071 }
1072
1073 if (Name.consume_front("tuple.")) {
1074 // 'aarch64.sve.tuple.*'.
1075 if (Name.starts_with("get")) {
1076 // 'aarch64.sve.tuple.get*'.
1077 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1080 return true;
1081 }
1082
1083 if (Name.starts_with("set")) {
1084 // 'aarch64.sve.tuple.set*'.
1085 auto Args = F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1089 return true;
1090 }
1091
1092 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.match(Name)) {
1094 // 'aarch64.sve.tuple.create*'.
1095 auto Args = F->getFunctionType()->params();
1096 Type *Tys[] = {F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1099 return true;
1100 }
1101 return false; // No other 'aarch64.sve.tuple.*'.
1102 }
1103
1104 if (Name.starts_with("rev.nxv")) {
1105 // 'aarch64.sve.rev.<Ty>'
1107 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1108 return true;
1109 }
1110
1111 return false; // No other 'aarch64.sve.*'.
1112 }
1113 }
1114 return false; // No other 'arm.*', 'aarch64.*'.
1115}
1116
1118 StringRef Name) {
1119 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1122 .Case("im2col.3d",
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1124 .Case("im2col.4d",
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1126 .Case("im2col.5d",
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1134
1136 return ID;
1137
1138 // These intrinsics may need upgrade for two reasons:
1139 // (1) When the address-space of the first argument is shared[AS=3]
1140 // (and we upgrade it to use shared_cluster address-space[AS=7])
1141 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1143 return ID;
1144
1145 // (2) When there are only two boolean flag arguments at the end:
1146 //
1147 // The last three parameters of the older version of these
1148 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1149 //
1150 // The newer version reads as:
1151 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1152 //
1153 // So, when the type of the [N-3]rd argument is "not i1", then
1154 // it is the older version and we need to upgrade.
1155 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1157 if (!ArgType->isIntegerTy(1))
1158 return ID;
1159 }
1160
1162}
1163
1165 StringRef Name) {
1166 if (Name.consume_front("mapa.shared.cluster"))
1167 if (F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1170
1171 if (Name.consume_front("cp.async.bulk.")) {
1174 .Case("global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .Case("shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1179
1181 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1183 return ID;
1184 }
1185
1187}
1188
1190 if (Name.consume_front("fma.rn."))
1191 return StringSwitch<Intrinsic::ID>(Name)
1192 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1197
1198 if (Name.consume_front("fmax."))
1199 return StringSwitch<Intrinsic::ID>(Name)
1200 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1201 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .Case("ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .Case("ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .Case("ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .Case("nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1221
1222 if (Name.consume_front("fmin."))
1223 return StringSwitch<Intrinsic::ID>(Name)
1224 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1225 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .Case("ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .Case("ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .Case("ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .Case("nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1245
1246 if (Name.consume_front("neg."))
1247 return StringSwitch<Intrinsic::ID>(Name)
1248 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1249 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1251
1253}
1254
1256 return Name.consume_front("local") || Name.consume_front("shared") ||
1257 Name.consume_front("global") || Name.consume_front("constant") ||
1258 Name.consume_front("param");
1259}
1260
1262 const FunctionType *FuncTy) {
1263 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1264 if (Name.starts_with("to.fp16")) {
1265 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1266 HalfTy) &&
1267 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1268 FuncTy->getReturnType());
1269 }
1270
1271 if (Name.starts_with("from.fp16")) {
1272 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1273 HalfTy) &&
1274 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1275 FuncTy->getReturnType());
1276 }
1277
1278 return false;
1279}
1280
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(F && "Illegal to upgrade a non-existent Function.");
1284
1285 StringRef Name = F->getName();
1286
1287 // Quickly eliminate it, if it's not a candidate.
1288 if (!Name.consume_front("llvm.") || Name.empty())
1289 return false;
1290
1291 switch (Name[0]) {
1292 default: break;
1293 case 'a': {
1294 bool IsArm = Name.consume_front("arm.");
1295 if (IsArm || Name.consume_front("aarch64.")) {
1296 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1297 return true;
1298 break;
1299 }
1300
1301 if (Name.consume_front("amdgcn.")) {
1302 if (Name == "alignbit") {
1303 // Target specific intrinsic became redundant
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1306 return true;
1307 }
1308
1309 if (Name.consume_front("atomic.")) {
1310 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1311 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1312 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1313 // and usub_sat so there's no new declaration.
1314 NewFn = nullptr;
1315 return true;
1316 }
1317 break; // No other 'amdgcn.atomic.*'
1318 }
1319
1320 switch (F->getIntrinsicID()) {
1321 default:
1322 break;
1323 // Legacy wmma iu intrinsics without the optional clamp operand.
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (F->arg_size() == 7) {
1326 NewFn = nullptr;
1327 return true;
1328 }
1329 break;
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (F->arg_size() == 8) {
1338 NewFn = nullptr;
1339 return true;
1340 }
1341 break;
1342 }
1343
1344 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1345 Name.consume_front("flat.atomic.")) {
1346 if (Name.starts_with("fadd") ||
1347 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1348 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1349 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1350 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1351 // declaration.
1352 NewFn = nullptr;
1353 return true;
1354 }
1355 }
1356
1357 if (Name.starts_with("ldexp.")) {
1358 // Target specific intrinsic became redundant
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1362 return true;
1363 }
1364 break; // No other 'amdgcn.*'
1365 }
1366
1367 break;
1368 }
1369 case 'c': {
1370 if (F->arg_size() == 1) {
1371 if (Name.consume_front("convert.")) {
1372 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1373 NewFn = nullptr;
1374 return true;
1375 }
1376 }
1377
1379 .StartsWith("ctlz.", Intrinsic::ctlz)
1380 .StartsWith("cttz.", Intrinsic::cttz)
1383 rename(F);
1384 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1385 F->arg_begin()->getType());
1386 return true;
1387 }
1388 }
1389
1390 if (F->arg_size() == 2 && Name == "coro.end") {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1393 Intrinsic::coro_end);
1394 return true;
1395 }
1396
1397 break;
1398 }
1399 case 'd':
1400 if (Name.consume_front("dbg.")) {
1401 // Mark debug intrinsics for upgrade to new debug format.
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name == "addr" || Name == "value" || Name == "assign" ||
1404 Name == "declare" || Name == "label") {
1405 // There's no function to replace these with.
1406 NewFn = nullptr;
1407 // But we do want these to get upgraded.
1408 return true;
1409 }
1410 }
1411 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1412 // converted to DbgVariableRecords later.
1413 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1414 rename(F);
1415 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1416 Intrinsic::dbg_value);
1417 return true;
1418 }
1419 break; // No other 'dbg.*'.
1420 }
1421 break;
1422 case 'e':
1423 if (Name.consume_front("experimental.vector.")) {
1426 // Skip over extract.last.active, otherwise it will be 'upgraded'
1427 // to a regular vector extract which is a different operation.
1428 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1429 .StartsWith("extract.", Intrinsic::vector_extract)
1430 .StartsWith("insert.", Intrinsic::vector_insert)
1431 .StartsWith("reverse.", Intrinsic::vector_reverse)
1432 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1433 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1434 .StartsWith("partial.reduce.add",
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT = F->getFunctionType();
1440 if (ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1442 // Extracting overloads the return type.
1443 Tys.push_back(FT->getReturnType());
1444 if (ID != Intrinsic::vector_interleave2)
1445 Tys.push_back(FT->getParamType(0));
1446 if (ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1448 // Inserting overloads the inserted type.
1449 Tys.push_back(FT->getParamType(1));
1450 rename(F);
1451 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1452 return true;
1453 }
1454
1455 if (Name.consume_front("reduce.")) {
1457 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &Groups))
1460 .Case("add", Intrinsic::vector_reduce_add)
1461 .Case("mul", Intrinsic::vector_reduce_mul)
1462 .Case("and", Intrinsic::vector_reduce_and)
1463 .Case("or", Intrinsic::vector_reduce_or)
1464 .Case("xor", Intrinsic::vector_reduce_xor)
1465 .Case("smax", Intrinsic::vector_reduce_smax)
1466 .Case("smin", Intrinsic::vector_reduce_smin)
1467 .Case("umax", Intrinsic::vector_reduce_umax)
1468 .Case("umin", Intrinsic::vector_reduce_umin)
1469 .Case("fmax", Intrinsic::vector_reduce_fmax)
1470 .Case("fmin", Intrinsic::vector_reduce_fmin)
1472
1473 bool V2 = false;
1475 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1476 Groups.clear();
1477 V2 = true;
1478 if (R2.match(Name, &Groups))
1480 .Case("fadd", Intrinsic::vector_reduce_fadd)
1481 .Case("fmul", Intrinsic::vector_reduce_fmul)
1483 }
1485 rename(F);
1486 auto Args = F->getFunctionType()->params();
1487 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1488 {Args[V2 ? 1 : 0]});
1489 return true;
1490 }
1491 break; // No other 'expermental.vector.reduce.*'.
1492 }
1493
1494 if (Name.consume_front("splice"))
1495 return true;
1496 break; // No other 'experimental.vector.*'.
1497 }
1498 if (Name.consume_front("experimental.stepvector.")) {
1499 Intrinsic::ID ID = Intrinsic::stepvector;
1500 rename(F);
1502 F->getParent(), ID, F->getFunctionType()->getReturnType());
1503 return true;
1504 }
1505 break; // No other 'e*'.
1506 case 'f':
1507 if (Name.starts_with("flt.rounds")) {
1508 rename(F);
1509 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1510 Intrinsic::get_rounding);
1511 return true;
1512 }
1513 break;
1514 case 'i':
1515 if (Name.starts_with("invariant.group.barrier")) {
1516 // Rename invariant.group.barrier to launder.invariant.group
1517 auto Args = F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1519 rename(F);
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1522 return true;
1523 }
1524 break;
1525 case 'l':
1526 if ((Name.starts_with("lifetime.start") ||
1527 Name.starts_with("lifetime.end")) &&
1528 F->arg_size() == 2) {
1529 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1532 rename(F);
1533 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1534 F->getArg(0)->getType());
1535 return true;
1536 }
1537 break;
1538 case 'm': {
1539 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1540 // alignment parameter to embedding the alignment as an attribute of
1541 // the pointer args.
1542 if (unsigned ID = StringSwitch<unsigned>(Name)
1543 .StartsWith("memcpy.", Intrinsic::memcpy)
1544 .StartsWith("memmove.", Intrinsic::memmove)
1545 .Default(0)) {
1546 if (F->arg_size() == 5) {
1547 rename(F);
1548 // Get the types of dest, src, and len
1549 ArrayRef<Type *> ParamTypes =
1550 F->getFunctionType()->params().slice(0, 3);
1551 NewFn =
1552 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1553 return true;
1554 }
1555 }
1556 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1557 rename(F);
1558 // Get the types of dest, and len
1559 const auto *FT = F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0), // Dest
1562 FT->getParamType(2) // len
1563 };
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1565 Intrinsic::memset, ParamTypes);
1566 return true;
1567 }
1568
1569 unsigned MaskedID =
1571 .StartsWith("masked.load", Intrinsic::masked_load)
1572 .StartsWith("masked.gather", Intrinsic::masked_gather)
1573 .StartsWith("masked.store", Intrinsic::masked_store)
1574 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1575 .Default(0);
1576 if (MaskedID && F->arg_size() == 4) {
1577 rename(F);
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1583 return true;
1584 }
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1588 return true;
1589 }
1590 break;
1591 }
1592 case 'n': {
1593 if (Name.consume_front("nvvm.")) {
1594 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1595 if (F->arg_size() == 1) {
1596 Intrinsic::ID IID =
1598 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1599 .Case("clz.i", Intrinsic::ctlz)
1600 .Case("popc.i", Intrinsic::ctpop)
1602 if (IID != Intrinsic::not_intrinsic) {
1603 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1604 {F->getReturnType()});
1605 return true;
1606 }
1607 } else if (F->arg_size() == 2) {
1608 Intrinsic::ID IID =
1610 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1611 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1612 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1613 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1615 if (IID != Intrinsic::not_intrinsic) {
1616 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1617 {F->getReturnType()});
1618 return true;
1619 }
1620 }
1621
1622 // Check for nvvm intrinsics that need a return type adjustment.
1623 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1625 if (IID != Intrinsic::not_intrinsic) {
1626 NewFn = nullptr;
1627 return true;
1628 }
1629 }
1630
1631 // Upgrade Distributed Shared Memory Intrinsics
1633 if (IID != Intrinsic::not_intrinsic) {
1634 rename(F);
1635 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1636 return true;
1637 }
1638
1639 // Upgrade TMA copy G2S Intrinsics
1641 if (IID != Intrinsic::not_intrinsic) {
1642 rename(F);
1643 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1644 return true;
1645 }
1646
1647 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1648 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1649 //
1650 // TODO: We could add lohi.i2d.
1651 bool Expand = false;
1652 if (Name.consume_front("abs."))
1653 // nvvm.abs.{i,ii}
1654 Expand =
1655 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1656 else if (Name.consume_front("fabs."))
1657 // nvvm.fabs.{f,ftz.f,d}
1658 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1659 else if (Name.consume_front("ex2.approx."))
1660 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1661 Expand =
1662 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1663 else if (Name.consume_front("atomic.load."))
1664 // nvvm.atomic.load.add.{f32,f64}.p
1665 // nvvm.atomic.load.{inc,dec}.32.p
1666 Expand = StringSwitch<bool>(Name)
1667 .StartsWith("add.f32.p", true)
1668 .StartsWith("add.f64.p", true)
1669 .StartsWith("inc.32.p", true)
1670 .StartsWith("dec.32.p", true)
1671 .Default(false);
1672 else if (Name.consume_front("bitcast."))
1673 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1674 Expand =
1675 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1676 else if (Name.consume_front("rotate."))
1677 // nvvm.rotate.{b32,b64,right.b64}
1678 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1679 else if (Name.consume_front("ptr.gen.to."))
1680 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1681 Expand = consumeNVVMPtrAddrSpace(Name);
1682 else if (Name.consume_front("ptr."))
1683 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1684 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1685 else if (Name.consume_front("ldg.global."))
1686 // nvvm.ldg.global.{i,p,f}
1687 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1688 Name.starts_with("p."));
1689 else
1690 Expand = StringSwitch<bool>(Name)
1691 .Case("barrier0", true)
1692 .Case("barrier.n", true)
1693 .Case("barrier.sync.cnt", true)
1694 .Case("barrier.sync", true)
1695 .Case("barrier", true)
1696 .Case("bar.sync", true)
1697 .Case("barrier0.popc", true)
1698 .Case("barrier0.and", true)
1699 .Case("barrier0.or", true)
1700 .Case("clz.ll", true)
1701 .Case("popc.ll", true)
1702 .Case("h2f", true)
1703 .Case("swap.lo.hi.b64", true)
1704 .Case("tanh.approx.f32", true)
1705 .Default(false);
1706
1707 if (Expand) {
1708 NewFn = nullptr;
1709 return true;
1710 }
1711 break; // No other 'nvvm.*'.
1712 }
1713 break;
1714 }
1715 case 'o':
1716 if (Name.starts_with("objectsize.")) {
1717 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1718 if (F->arg_size() == 2 || F->arg_size() == 3) {
1719 rename(F);
1720 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1721 Intrinsic::objectsize, Tys);
1722 return true;
1723 }
1724 }
1725 break;
1726
1727 case 'p':
1728 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1729 rename(F);
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1733 return true;
1734 }
1735 break;
1736
1737 case 'r': {
1738 if (Name.consume_front("riscv.")) {
1741 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1744 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1748 rename(F);
1749 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1750 return true;
1751 }
1752 break; // No other applicable upgrades.
1753 }
1754
1756 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1757 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1760 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1762 rename(F);
1763 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1764 return true;
1765 }
1766 break; // No other applicable upgrades.
1767 }
1768
1770 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1774 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1775 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1778 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1779 rename(F);
1780 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1781 return true;
1782 }
1783 break; // No other applicable upgrades.
1784 }
1785
1786 // Replace llvm.riscv.clmul with llvm.clmul.
1787 if (Name == "clmul.i32" || Name == "clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1790 return true;
1791 }
1792
1793 break; // No other 'riscv.*' intrinsics
1794 }
1795 } break;
1796
1797 case 's':
1798 if (Name == "stackprotectorcheck") {
1799 NewFn = nullptr;
1800 return true;
1801 }
1802 break;
1803
1804 case 't':
1805 if (Name == "thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1808 return true;
1809 }
1810 break;
1811
1812 case 'v': {
1813 if (Name == "var.annotation" && F->arg_size() == 4) {
1814 rename(F);
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1818 return true;
1819 }
1820 if (Name.consume_front("vector.splice")) {
1821 if (Name.starts_with(".left") || Name.starts_with(".right"))
1822 break;
1823 return true;
1824 }
1825 break;
1826 }
1827
1828 case 'w':
1829 if (Name.consume_front("wasm.")) {
1832 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1833 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1837 rename(F);
1838 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1839 F->getReturnType());
1840 return true;
1841 }
1842
1843 if (Name.consume_front("dot.i8x16.i7x16.")) {
1845 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1846 .Case("add.signed",
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1850 rename(F);
1851 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1852 return true;
1853 }
1854 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1855 }
1856 break; // No other 'wasm.*'.
1857 }
1858 break;
1859
1860 case 'x':
1861 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1862 return true;
1863 }
1864
1865 auto *ST = dyn_cast<StructType>(F->getReturnType());
1866 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1867 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1868 // Replace return type with literal non-packed struct. Only do this for
1869 // intrinsics declared to return a struct, not for intrinsics with
1870 // overloaded return type, in which case the exact struct type will be
1871 // mangled into the name.
1872 if (Intrinsic::hasStructReturnType(F->getIntrinsicID())) {
1873 FunctionType *FT = F->getFunctionType();
1874 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1875 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1876 std::string Name = F->getName().str();
1877 rename(F);
1878 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1879 Name, F->getParent());
1880
1881 // The new function may also need remangling.
1882 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1883 NewFn = *Result;
1884 return true;
1885 }
1886 }
1887
1888 // Remangle our intrinsic since we upgrade the mangling
1890 if (Result != std::nullopt) {
1891 NewFn = *Result;
1892 return true;
1893 }
1894
1895 // This may not belong here. This function is effectively being overloaded
1896 // to both detect an intrinsic which needs upgrading, and to provide the
1897 // upgraded form of the intrinsic. We should perhaps have two separate
1898 // functions for this.
1899 return false;
1900}
1901
1903 bool CanUpgradeDebugIntrinsicsToRecords) {
1904 NewFn = nullptr;
1905 bool Upgraded =
1906 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1907
1908 // Upgrade intrinsic attributes. This does not change the function.
1909 if (NewFn)
1910 F = NewFn;
1911 if (Intrinsic::ID id = F->getIntrinsicID()) {
1912 // Only do this if the intrinsic signature is valid.
1913 SmallVector<Type *> OverloadTys;
1914 if (Intrinsic::isSignatureValid(id, F->getFunctionType(), OverloadTys))
1915 F->setAttributes(
1916 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1917 }
1918 return Upgraded;
1919}
1920
1922 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1923 GV->getName() == "llvm.global_dtors")) ||
1924 !GV->hasInitializer())
1925 return nullptr;
1927 if (!ATy)
1928 return nullptr;
1930 if (!STy || STy->getNumElements() != 2)
1931 return nullptr;
1932
1933 LLVMContext &C = GV->getContext();
1934 IRBuilder<> IRB(C);
1935 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1936 IRB.getPtrTy());
1937 Constant *Init = GV->getInitializer();
1938 unsigned N = Init->getNumOperands();
1939 std::vector<Constant *> NewCtors(N);
1940 for (unsigned i = 0; i != N; ++i) {
1941 auto Ctor = cast<Constant>(Init->getOperand(i));
1942 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1943 Ctor->getAggregateElement(1),
1945 }
1946 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1947
1948 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1949 NewInit, GV->getName());
1950}
1951
1952// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1953// to byte shuffles.
1955 unsigned Shift) {
1956 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1957 unsigned NumElts = ResultTy->getNumElements() * 8;
1958
1959 // Bitcast from a 64-bit element type to a byte element type.
1960 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1961 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1962
1963 // We'll be shuffling in zeroes.
1964 Value *Res = Constant::getNullValue(VecTy);
1965
1966 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1967 // we'll just return the zero vector.
1968 if (Shift < 16) {
1969 int Idxs[64];
1970 // 256/512-bit version is split into 2/4 16-byte lanes.
1971 for (unsigned l = 0; l != NumElts; l += 16)
1972 for (unsigned i = 0; i != 16; ++i) {
1973 unsigned Idx = NumElts + i - Shift;
1974 if (Idx < NumElts)
1975 Idx -= NumElts - 16; // end of lane, switch operand.
1976 Idxs[l + i] = Idx + l;
1977 }
1978
1979 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1980 }
1981
1982 // Bitcast back to a 64-bit element type.
1983 return Builder.CreateBitCast(Res, ResultTy, "cast");
1984}
1985
1986// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1987// to byte shuffles.
1989 unsigned Shift) {
1990 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1991 unsigned NumElts = ResultTy->getNumElements() * 8;
1992
1993 // Bitcast from a 64-bit element type to a byte element type.
1994 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1995 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1996
1997 // We'll be shuffling in zeroes.
1998 Value *Res = Constant::getNullValue(VecTy);
1999
2000 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2001 // we'll just return the zero vector.
2002 if (Shift < 16) {
2003 int Idxs[64];
2004 // 256/512-bit version is split into 2/4 16-byte lanes.
2005 for (unsigned l = 0; l != NumElts; l += 16)
2006 for (unsigned i = 0; i != 16; ++i) {
2007 unsigned Idx = i + Shift;
2008 if (Idx >= 16)
2009 Idx += NumElts - 16; // end of lane, switch operand.
2010 Idxs[l + i] = Idx + l;
2011 }
2012
2013 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2014 }
2015
2016 // Bitcast back to a 64-bit element type.
2017 return Builder.CreateBitCast(Res, ResultTy, "cast");
2018}
2019
2020static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2021 unsigned NumElts) {
2022 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2024 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2025 Mask = Builder.CreateBitCast(Mask, MaskTy);
2026
2027 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2028 // i8 and we need to extract down to the right number of elements.
2029 if (NumElts <= 4) {
2030 int Indices[4];
2031 for (unsigned i = 0; i != NumElts; ++i)
2032 Indices[i] = i;
2033 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2034 "extract");
2035 }
2036
2037 return Mask;
2038}
2039
2040static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2041 Value *Op1) {
2042 // If the mask is all ones just emit the first operation.
2043 if (const auto *C = dyn_cast<Constant>(Mask))
2044 if (C->isAllOnesValue())
2045 return Op0;
2046
2047 Mask = getX86MaskVec(Builder, Mask,
2048 cast<FixedVectorType>(Op0->getType())->getNumElements());
2049 return Builder.CreateSelect(Mask, Op0, Op1);
2050}
2051
2052static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2053 Value *Op1) {
2054 // If the mask is all ones just emit the first operation.
2055 if (const auto *C = dyn_cast<Constant>(Mask))
2056 if (C->isAllOnesValue())
2057 return Op0;
2058
2059 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2060 Mask->getType()->getIntegerBitWidth());
2061 Mask = Builder.CreateBitCast(Mask, MaskTy);
2062 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2063 return Builder.CreateSelect(Mask, Op0, Op1);
2064}
2065
2066// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2067// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2068// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2070 Value *Op1, Value *Shift,
2071 Value *Passthru, Value *Mask,
2072 bool IsVALIGN) {
2073 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2074
2075 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2076 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2077 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2078 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2079
2080 // Mask the immediate for VALIGN.
2081 if (IsVALIGN)
2082 ShiftVal &= (NumElts - 1);
2083
2084 // If palignr is shifting the pair of vectors more than the size of two
2085 // lanes, emit zero.
2086 if (ShiftVal >= 32)
2088
2089 // If palignr is shifting the pair of input vectors more than one lane,
2090 // but less than two lanes, convert to shifting in zeroes.
2091 if (ShiftVal > 16) {
2092 ShiftVal -= 16;
2093 Op1 = Op0;
2095 }
2096
2097 int Indices[64];
2098 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2099 for (unsigned l = 0; l < NumElts; l += 16) {
2100 for (unsigned i = 0; i != 16; ++i) {
2101 unsigned Idx = ShiftVal + i;
2102 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2103 Idx += NumElts - 16; // End of lane, switch operand.
2104 Indices[l + i] = Idx + l;
2105 }
2106 }
2107
2108 Value *Align = Builder.CreateShuffleVector(
2109 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2110
2111 return emitX86Select(Builder, Mask, Align, Passthru);
2112}
2113
2115 bool ZeroMask, bool IndexForm) {
2116 Type *Ty = CI.getType();
2117 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2118 unsigned EltWidth = Ty->getScalarSizeInBits();
2119 bool IsFloat = Ty->isFPOrFPVectorTy();
2120 Intrinsic::ID IID;
2121 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2122 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2123 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2125 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2129 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2131 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2133 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2137 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2139 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2141 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2145 else if (VecWidth == 128 && EltWidth == 16)
2146 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2147 else if (VecWidth == 256 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2149 else if (VecWidth == 512 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2151 else if (VecWidth == 128 && EltWidth == 8)
2152 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2153 else if (VecWidth == 256 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2155 else if (VecWidth == 512 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2157 else
2158 llvm_unreachable("Unexpected intrinsic");
2159
2160 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2161 CI.getArgOperand(2) };
2162
2163 // If this isn't index form we need to swap operand 0 and 1.
2164 if (!IndexForm)
2165 std::swap(Args[0], Args[1]);
2166
2167 Value *V = Builder.CreateIntrinsic(IID, Args);
2168 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2169 : Builder.CreateBitCast(CI.getArgOperand(1),
2170 Ty);
2171 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2172}
2173
2175 Intrinsic::ID IID) {
2176 Type *Ty = CI.getType();
2177 Value *Op0 = CI.getOperand(0);
2178 Value *Op1 = CI.getOperand(1);
2179 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2180
2181 if (CI.arg_size() == 4) { // For masked intrinsics.
2182 Value *VecSrc = CI.getOperand(2);
2183 Value *Mask = CI.getOperand(3);
2184 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2185 }
2186 return Res;
2187}
2188
2190 bool IsRotateRight) {
2191 Type *Ty = CI.getType();
2192 Value *Src = CI.getArgOperand(0);
2193 Value *Amt = CI.getArgOperand(1);
2194
2195 // Amount may be scalar immediate, in which case create a splat vector.
2196 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2197 // we only care about the lowest log2 bits anyway.
2198 if (Amt->getType() != Ty) {
2199 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2200 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2201 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2202 }
2203
2204 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2205 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2206
2207 if (CI.arg_size() == 4) { // For masked intrinsics.
2208 Value *VecSrc = CI.getOperand(2);
2209 Value *Mask = CI.getOperand(3);
2210 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2211 }
2212 return Res;
2213}
2214
2215static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2216 bool IsSigned) {
2217 Type *Ty = CI.getType();
2218 Value *LHS = CI.getArgOperand(0);
2219 Value *RHS = CI.getArgOperand(1);
2220
2221 CmpInst::Predicate Pred;
2222 switch (Imm) {
2223 case 0x0:
2224 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2225 break;
2226 case 0x1:
2227 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2228 break;
2229 case 0x2:
2230 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2231 break;
2232 case 0x3:
2233 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2234 break;
2235 case 0x4:
2236 Pred = ICmpInst::ICMP_EQ;
2237 break;
2238 case 0x5:
2239 Pred = ICmpInst::ICMP_NE;
2240 break;
2241 case 0x6:
2242 return Constant::getNullValue(Ty); // FALSE
2243 case 0x7:
2244 return Constant::getAllOnesValue(Ty); // TRUE
2245 default:
2246 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2247 }
2248
2249 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2250 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2251 return Ext;
2252}
2253
2255 bool IsShiftRight, bool ZeroMask) {
2256 Type *Ty = CI.getType();
2257 Value *Op0 = CI.getArgOperand(0);
2258 Value *Op1 = CI.getArgOperand(1);
2259 Value *Amt = CI.getArgOperand(2);
2260
2261 if (IsShiftRight)
2262 std::swap(Op0, Op1);
2263
2264 // Amount may be scalar immediate, in which case create a splat vector.
2265 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2266 // we only care about the lowest log2 bits anyway.
2267 if (Amt->getType() != Ty) {
2268 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2269 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2270 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2271 }
2272
2273 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2274 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2275
2276 unsigned NumArgs = CI.arg_size();
2277 if (NumArgs >= 4) { // For masked intrinsics.
2278 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2279 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2280 CI.getArgOperand(0);
2281 Value *Mask = CI.getOperand(NumArgs - 1);
2282 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2283 }
2284 return Res;
2285}
2286
2288 Value *Mask, bool Aligned) {
2289 const Align Alignment =
2290 Aligned
2291 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2292 : Align(1);
2293
2294 // If the mask is all ones just emit a regular store.
2295 if (const auto *C = dyn_cast<Constant>(Mask))
2296 if (C->isAllOnesValue())
2297 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2298
2299 // Convert the mask from an integer type to a vector of i1.
2300 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2301 Mask = getX86MaskVec(Builder, Mask, NumElts);
2302 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2303}
2304
2306 Value *Passthru, Value *Mask, bool Aligned) {
2307 Type *ValTy = Passthru->getType();
2308 const Align Alignment =
2309 Aligned
2310 ? Align(
2312 8)
2313 : Align(1);
2314
2315 // If the mask is all ones just emit a regular store.
2316 if (const auto *C = dyn_cast<Constant>(Mask))
2317 if (C->isAllOnesValue())
2318 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2319
2320 // Convert the mask from an integer type to a vector of i1.
2321 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2322 Mask = getX86MaskVec(Builder, Mask, NumElts);
2323 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2324}
2325
2326static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2327 Type *Ty = CI.getType();
2328 Value *Op0 = CI.getArgOperand(0);
2329 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2330 {Op0, Builder.getInt1(false)});
2331 if (CI.arg_size() == 3)
2332 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2333 return Res;
2334}
2335
2336static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2337 Type *Ty = CI.getType();
2338
2339 // Arguments have a vXi32 type so cast to vXi64.
2340 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2341 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2342
2343 if (IsSigned) {
2344 // Shift left then arithmetic shift right.
2345 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2346 LHS = Builder.CreateShl(LHS, ShiftAmt);
2347 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2348 RHS = Builder.CreateShl(RHS, ShiftAmt);
2349 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2350 } else {
2351 // Clear the upper bits.
2352 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2353 LHS = Builder.CreateAnd(LHS, Mask);
2354 RHS = Builder.CreateAnd(RHS, Mask);
2355 }
2356
2357 Value *Res = Builder.CreateMul(LHS, RHS);
2358
2359 if (CI.arg_size() == 4)
2360 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2361
2362 return Res;
2363}
2364
2365// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2367 Value *Mask) {
2368 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2369 if (Mask) {
2370 const auto *C = dyn_cast<Constant>(Mask);
2371 if (!C || !C->isAllOnesValue())
2372 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2373 }
2374
2375 if (NumElts < 8) {
2376 int Indices[8];
2377 for (unsigned i = 0; i != NumElts; ++i)
2378 Indices[i] = i;
2379 for (unsigned i = NumElts; i != 8; ++i)
2380 Indices[i] = NumElts + i % NumElts;
2381 Vec = Builder.CreateShuffleVector(Vec,
2383 Indices);
2384 }
2385 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2386}
2387
2389 unsigned CC, bool Signed) {
2390 Value *Op0 = CI.getArgOperand(0);
2391 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2392
2393 Value *Cmp;
2394 if (CC == 3) {
2396 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2397 } else if (CC == 7) {
2399 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2400 } else {
2402 switch (CC) {
2403 default: llvm_unreachable("Unknown condition code");
2404 case 0: Pred = ICmpInst::ICMP_EQ; break;
2405 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2406 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2407 case 4: Pred = ICmpInst::ICMP_NE; break;
2408 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2409 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2410 }
2411 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2412 }
2413
2414 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2415
2416 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2417}
2418
2419// Replace a masked intrinsic with an older unmasked intrinsic.
2421 Intrinsic::ID IID) {
2422 Value *Rep =
2423 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2424 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2425}
2426
2428 Value* A = CI.getArgOperand(0);
2429 Value* B = CI.getArgOperand(1);
2430 Value* Src = CI.getArgOperand(2);
2431 Value* Mask = CI.getArgOperand(3);
2432
2433 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2434 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2435 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2436 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2437 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2438 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2439}
2440
2442 Value* Op = CI.getArgOperand(0);
2443 Type* ReturnOp = CI.getType();
2444 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2445 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2446 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2447}
2448
2449// Replace intrinsic with unmasked version and a select.
2451 CallBase &CI, Value *&Rep) {
2452 Name = Name.substr(12); // Remove avx512.mask.
2453
2454 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2455 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2456 Intrinsic::ID IID;
2457 if (Name.starts_with("max.p")) {
2458 if (VecWidth == 128 && EltWidth == 32)
2459 IID = Intrinsic::x86_sse_max_ps;
2460 else if (VecWidth == 128 && EltWidth == 64)
2461 IID = Intrinsic::x86_sse2_max_pd;
2462 else if (VecWidth == 256 && EltWidth == 32)
2463 IID = Intrinsic::x86_avx_max_ps_256;
2464 else if (VecWidth == 256 && EltWidth == 64)
2465 IID = Intrinsic::x86_avx_max_pd_256;
2466 else
2467 llvm_unreachable("Unexpected intrinsic");
2468 } else if (Name.starts_with("min.p")) {
2469 if (VecWidth == 128 && EltWidth == 32)
2470 IID = Intrinsic::x86_sse_min_ps;
2471 else if (VecWidth == 128 && EltWidth == 64)
2472 IID = Intrinsic::x86_sse2_min_pd;
2473 else if (VecWidth == 256 && EltWidth == 32)
2474 IID = Intrinsic::x86_avx_min_ps_256;
2475 else if (VecWidth == 256 && EltWidth == 64)
2476 IID = Intrinsic::x86_avx_min_pd_256;
2477 else
2478 llvm_unreachable("Unexpected intrinsic");
2479 } else if (Name.starts_with("pshuf.b.")) {
2480 if (VecWidth == 128)
2481 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2482 else if (VecWidth == 256)
2483 IID = Intrinsic::x86_avx2_pshuf_b;
2484 else if (VecWidth == 512)
2485 IID = Intrinsic::x86_avx512_pshuf_b_512;
2486 else
2487 llvm_unreachable("Unexpected intrinsic");
2488 } else if (Name.starts_with("pmul.hr.sw.")) {
2489 if (VecWidth == 128)
2490 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2491 else if (VecWidth == 256)
2492 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2493 else if (VecWidth == 512)
2494 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2495 else
2496 llvm_unreachable("Unexpected intrinsic");
2497 } else if (Name.starts_with("pmulh.w.")) {
2498 if (VecWidth == 128)
2499 IID = Intrinsic::x86_sse2_pmulh_w;
2500 else if (VecWidth == 256)
2501 IID = Intrinsic::x86_avx2_pmulh_w;
2502 else if (VecWidth == 512)
2503 IID = Intrinsic::x86_avx512_pmulh_w_512;
2504 else
2505 llvm_unreachable("Unexpected intrinsic");
2506 } else if (Name.starts_with("pmulhu.w.")) {
2507 if (VecWidth == 128)
2508 IID = Intrinsic::x86_sse2_pmulhu_w;
2509 else if (VecWidth == 256)
2510 IID = Intrinsic::x86_avx2_pmulhu_w;
2511 else if (VecWidth == 512)
2512 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2513 else
2514 llvm_unreachable("Unexpected intrinsic");
2515 } else if (Name.starts_with("pmaddw.d.")) {
2516 if (VecWidth == 128)
2517 IID = Intrinsic::x86_sse2_pmadd_wd;
2518 else if (VecWidth == 256)
2519 IID = Intrinsic::x86_avx2_pmadd_wd;
2520 else if (VecWidth == 512)
2521 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2522 else
2523 llvm_unreachable("Unexpected intrinsic");
2524 } else if (Name.starts_with("pmaddubs.w.")) {
2525 if (VecWidth == 128)
2526 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2527 else if (VecWidth == 256)
2528 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2529 else if (VecWidth == 512)
2530 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2531 else
2532 llvm_unreachable("Unexpected intrinsic");
2533 } else if (Name.starts_with("packsswb.")) {
2534 if (VecWidth == 128)
2535 IID = Intrinsic::x86_sse2_packsswb_128;
2536 else if (VecWidth == 256)
2537 IID = Intrinsic::x86_avx2_packsswb;
2538 else if (VecWidth == 512)
2539 IID = Intrinsic::x86_avx512_packsswb_512;
2540 else
2541 llvm_unreachable("Unexpected intrinsic");
2542 } else if (Name.starts_with("packssdw.")) {
2543 if (VecWidth == 128)
2544 IID = Intrinsic::x86_sse2_packssdw_128;
2545 else if (VecWidth == 256)
2546 IID = Intrinsic::x86_avx2_packssdw;
2547 else if (VecWidth == 512)
2548 IID = Intrinsic::x86_avx512_packssdw_512;
2549 else
2550 llvm_unreachable("Unexpected intrinsic");
2551 } else if (Name.starts_with("packuswb.")) {
2552 if (VecWidth == 128)
2553 IID = Intrinsic::x86_sse2_packuswb_128;
2554 else if (VecWidth == 256)
2555 IID = Intrinsic::x86_avx2_packuswb;
2556 else if (VecWidth == 512)
2557 IID = Intrinsic::x86_avx512_packuswb_512;
2558 else
2559 llvm_unreachable("Unexpected intrinsic");
2560 } else if (Name.starts_with("packusdw.")) {
2561 if (VecWidth == 128)
2562 IID = Intrinsic::x86_sse41_packusdw;
2563 else if (VecWidth == 256)
2564 IID = Intrinsic::x86_avx2_packusdw;
2565 else if (VecWidth == 512)
2566 IID = Intrinsic::x86_avx512_packusdw_512;
2567 else
2568 llvm_unreachable("Unexpected intrinsic");
2569 } else if (Name.starts_with("vpermilvar.")) {
2570 if (VecWidth == 128 && EltWidth == 32)
2571 IID = Intrinsic::x86_avx_vpermilvar_ps;
2572 else if (VecWidth == 128 && EltWidth == 64)
2573 IID = Intrinsic::x86_avx_vpermilvar_pd;
2574 else if (VecWidth == 256 && EltWidth == 32)
2575 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2576 else if (VecWidth == 256 && EltWidth == 64)
2577 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2578 else if (VecWidth == 512 && EltWidth == 32)
2579 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2580 else if (VecWidth == 512 && EltWidth == 64)
2581 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2582 else
2583 llvm_unreachable("Unexpected intrinsic");
2584 } else if (Name == "cvtpd2dq.256") {
2585 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2586 } else if (Name == "cvtpd2ps.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2588 } else if (Name == "cvttpd2dq.256") {
2589 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2590 } else if (Name == "cvttps2dq.128") {
2591 IID = Intrinsic::x86_sse2_cvttps2dq;
2592 } else if (Name == "cvttps2dq.256") {
2593 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2594 } else if (Name.starts_with("permvar.")) {
2595 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2596 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2597 IID = Intrinsic::x86_avx2_permps;
2598 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2599 IID = Intrinsic::x86_avx2_permd;
2600 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2601 IID = Intrinsic::x86_avx512_permvar_df_256;
2602 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_di_256;
2604 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_sf_512;
2606 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_si_512;
2608 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_df_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_di_512;
2612 else if (VecWidth == 128 && EltWidth == 16)
2613 IID = Intrinsic::x86_avx512_permvar_hi_128;
2614 else if (VecWidth == 256 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_256;
2616 else if (VecWidth == 512 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_512;
2618 else if (VecWidth == 128 && EltWidth == 8)
2619 IID = Intrinsic::x86_avx512_permvar_qi_128;
2620 else if (VecWidth == 256 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_256;
2622 else if (VecWidth == 512 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_512;
2624 else
2625 llvm_unreachable("Unexpected intrinsic");
2626 } else if (Name.starts_with("dbpsadbw.")) {
2627 if (VecWidth == 128)
2628 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2629 else if (VecWidth == 256)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2631 else if (VecWidth == 512)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2633 else
2634 llvm_unreachable("Unexpected intrinsic");
2635 } else if (Name.starts_with("pmultishift.qb.")) {
2636 if (VecWidth == 128)
2637 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2638 else if (VecWidth == 256)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2640 else if (VecWidth == 512)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2642 else
2643 llvm_unreachable("Unexpected intrinsic");
2644 } else if (Name.starts_with("conflict.")) {
2645 if (Name[9] == 'd' && VecWidth == 128)
2646 IID = Intrinsic::x86_avx512_conflict_d_128;
2647 else if (Name[9] == 'd' && VecWidth == 256)
2648 IID = Intrinsic::x86_avx512_conflict_d_256;
2649 else if (Name[9] == 'd' && VecWidth == 512)
2650 IID = Intrinsic::x86_avx512_conflict_d_512;
2651 else if (Name[9] == 'q' && VecWidth == 128)
2652 IID = Intrinsic::x86_avx512_conflict_q_128;
2653 else if (Name[9] == 'q' && VecWidth == 256)
2654 IID = Intrinsic::x86_avx512_conflict_q_256;
2655 else if (Name[9] == 'q' && VecWidth == 512)
2656 IID = Intrinsic::x86_avx512_conflict_q_512;
2657 else
2658 llvm_unreachable("Unexpected intrinsic");
2659 } else if (Name.starts_with("pavg.")) {
2660 if (Name[5] == 'b' && VecWidth == 128)
2661 IID = Intrinsic::x86_sse2_pavg_b;
2662 else if (Name[5] == 'b' && VecWidth == 256)
2663 IID = Intrinsic::x86_avx2_pavg_b;
2664 else if (Name[5] == 'b' && VecWidth == 512)
2665 IID = Intrinsic::x86_avx512_pavg_b_512;
2666 else if (Name[5] == 'w' && VecWidth == 128)
2667 IID = Intrinsic::x86_sse2_pavg_w;
2668 else if (Name[5] == 'w' && VecWidth == 256)
2669 IID = Intrinsic::x86_avx2_pavg_w;
2670 else if (Name[5] == 'w' && VecWidth == 512)
2671 IID = Intrinsic::x86_avx512_pavg_w_512;
2672 else
2673 llvm_unreachable("Unexpected intrinsic");
2674 } else
2675 return false;
2676
2677 SmallVector<Value *, 4> Args(CI.args());
2678 Args.pop_back();
2679 Args.pop_back();
2680 Rep = Builder.CreateIntrinsic(IID, Args);
2681 unsigned NumArgs = CI.arg_size();
2682 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2683 CI.getArgOperand(NumArgs - 2));
2684 return true;
2685}
2686
2687/// Upgrade comment in call to inline asm that represents an objc retain release
2688/// marker.
2689void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2690 size_t Pos;
2691 if (AsmStr->find("mov\tfp") == 0 &&
2692 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2693 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2694 AsmStr->replace(Pos, 1, ";");
2695 }
2696}
2697
2699 Function *F, IRBuilder<> &Builder) {
2700 Value *Rep = nullptr;
2701
2702 if (Name == "abs.i" || Name == "abs.ll") {
2703 Value *Arg = CI->getArgOperand(0);
2704 Value *Neg = Builder.CreateNeg(Arg, "neg");
2705 Value *Cmp = Builder.CreateICmpSGE(
2706 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2707 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2708 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2709 Type *Ty = (Name == "abs.bf16")
2710 ? Builder.getBFloatTy()
2711 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2712 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2713 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2714 Rep = Builder.CreateBitCast(Abs, CI->getType());
2715 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2716 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2717 : Intrinsic::nvvm_fabs;
2718 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2719 } else if (Name.consume_front("ex2.approx.")) {
2720 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2721 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2722 : Intrinsic::nvvm_ex2_approx;
2723 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2724 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2725 Name.starts_with("atomic.load.add.f64.p")) {
2726 Value *Ptr = CI->getArgOperand(0);
2727 Value *Val = CI->getArgOperand(1);
2728 Rep = Builder.CreateAtomicRMW(
2730 CI->getContext().getOrInsertSyncScopeID("device"));
2731 // The default scope for atomic.load.* intrinsics is device
2732 // (= gpu scope in ptx), but the default LLVM atomic scope is
2733 // "system"
2734 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2735 Name.starts_with("atomic.load.dec.32.p")) {
2736 Value *Ptr = CI->getArgOperand(0);
2737 Value *Val = CI->getArgOperand(1);
2738 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2740 Rep = Builder.CreateAtomicRMW(
2742 CI->getContext().getOrInsertSyncScopeID("device"));
2743 // See comment above.
2744 } else if (Name == "clz.ll") {
2745 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2746 Value *Arg = CI->getArgOperand(0);
2747 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2748 {Arg, Builder.getFalse()},
2749 /*FMFSource=*/nullptr, "ctlz");
2750 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2751 } else if (Name == "popc.ll") {
2752 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2753 // i64.
2754 Value *Arg = CI->getArgOperand(0);
2755 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2756 Arg, /*FMFSource=*/nullptr, "ctpop");
2757 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2758 } else if (Name == "h2f") {
2759 Value *Cast =
2760 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2761 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2762 } else if (Name.consume_front("bitcast.") &&
2763 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2764 Name == "d2ll")) {
2765 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2766 } else if (Name == "rotate.b32") {
2767 Value *Arg = CI->getOperand(0);
2768 Value *ShiftAmt = CI->getOperand(1);
2769 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2770 {Arg, Arg, ShiftAmt});
2771 } else if (Name == "rotate.b64") {
2772 Type *Int64Ty = Builder.getInt64Ty();
2773 Value *Arg = CI->getOperand(0);
2774 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2775 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2776 {Arg, Arg, ZExtShiftAmt});
2777 } else if (Name == "rotate.right.b64") {
2778 Type *Int64Ty = Builder.getInt64Ty();
2779 Value *Arg = CI->getOperand(0);
2780 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2781 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2782 {Arg, Arg, ZExtShiftAmt});
2783 } else if (Name == "swap.lo.hi.b64") {
2784 Type *Int64Ty = Builder.getInt64Ty();
2785 Value *Arg = CI->getOperand(0);
2786 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2787 {Arg, Arg, Builder.getInt64(32)});
2788 } else if ((Name.consume_front("ptr.gen.to.") &&
2789 consumeNVVMPtrAddrSpace(Name)) ||
2790 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2791 Name.starts_with(".to.gen"))) {
2792 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2793 } else if (Name.consume_front("ldg.global")) {
2794 Value *Ptr = CI->getArgOperand(0);
2795 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2796 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2797 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2798 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2799 MDNode *MD = MDNode::get(Builder.getContext(), {});
2800 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2801 return LD;
2802 } else if (Name == "tanh.approx.f32") {
2803 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2804 FastMathFlags FMF;
2805 FMF.setApproxFunc();
2806 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2807 FMF);
2808 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2809 Value *Arg =
2810 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2811 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2812 {}, {Arg});
2813 } else if (Name == "barrier") {
2814 Rep = Builder.CreateIntrinsic(
2815 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2816 {CI->getArgOperand(0), CI->getArgOperand(1)});
2817 } else if (Name == "barrier.sync") {
2818 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2819 {CI->getArgOperand(0)});
2820 } else if (Name == "barrier.sync.cnt") {
2821 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2822 {CI->getArgOperand(0), CI->getArgOperand(1)});
2823 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2824 Name == "barrier0.or") {
2825 Value *C = CI->getArgOperand(0);
2826 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2827
2828 Intrinsic::ID IID =
2830 .Case("barrier0.popc",
2831 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2832 .Case("barrier0.and",
2833 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2834 .Case("barrier0.or",
2835 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2836 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2837 Rep = Builder.CreateZExt(Bar, CI->getType());
2838 } else {
2840 if (IID != Intrinsic::not_intrinsic &&
2841 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2842 rename(F);
2843 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2845 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2846 Value *Arg = CI->getArgOperand(I);
2847 Type *OldType = Arg->getType();
2848 Type *NewType = NewFn->getArg(I)->getType();
2849 Args.push_back(
2850 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2851 ? Builder.CreateBitCast(Arg, NewType)
2852 : Arg);
2853 }
2854 Rep = Builder.CreateCall(NewFn, Args);
2855 if (F->getReturnType()->isIntegerTy())
2856 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2857 }
2858 }
2859
2860 return Rep;
2861}
2862
2864 IRBuilder<> &Builder) {
2865 LLVMContext &C = F->getContext();
2866 Value *Rep = nullptr;
2867
2868 if (Name.starts_with("sse4a.movnt.")) {
2870 Elts.push_back(
2871 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2872 MDNode *Node = MDNode::get(C, Elts);
2873
2874 Value *Arg0 = CI->getArgOperand(0);
2875 Value *Arg1 = CI->getArgOperand(1);
2876
2877 // Nontemporal (unaligned) store of the 0'th element of the float/double
2878 // vector.
2879 Value *Extract =
2880 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2881
2882 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2883 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2884 } else if (Name.starts_with("avx.movnt.") ||
2885 Name.starts_with("avx512.storent.")) {
2887 Elts.push_back(
2888 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2889 MDNode *Node = MDNode::get(C, Elts);
2890
2891 Value *Arg0 = CI->getArgOperand(0);
2892 Value *Arg1 = CI->getArgOperand(1);
2893
2894 StoreInst *SI = Builder.CreateAlignedStore(
2895 Arg1, Arg0,
2897 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2898 } else if (Name == "sse2.storel.dq") {
2899 Value *Arg0 = CI->getArgOperand(0);
2900 Value *Arg1 = CI->getArgOperand(1);
2901
2902 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2903 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2904 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2905 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2906 } else if (Name.starts_with("sse.storeu.") ||
2907 Name.starts_with("sse2.storeu.") ||
2908 Name.starts_with("avx.storeu.")) {
2909 Value *Arg0 = CI->getArgOperand(0);
2910 Value *Arg1 = CI->getArgOperand(1);
2911 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2912 } else if (Name == "avx512.mask.store.ss") {
2913 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2914 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2915 Mask, false);
2916 } else if (Name.starts_with("avx512.mask.store")) {
2917 // "avx512.mask.storeu." or "avx512.mask.store."
2918 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2919 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2920 CI->getArgOperand(2), Aligned);
2921 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2922 // Upgrade packed integer vector compare intrinsics to compare instructions.
2923 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2924 bool CmpEq = Name[9] == 'e';
2925 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2926 CI->getArgOperand(0), CI->getArgOperand(1));
2927 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2928 } else if (Name.starts_with("avx512.broadcastm")) {
2929 Type *ExtTy = Type::getInt32Ty(C);
2930 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2931 ExtTy = Type::getInt64Ty(C);
2932 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2933 ExtTy->getPrimitiveSizeInBits();
2934 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2935 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2936 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2937 Value *Vec = CI->getArgOperand(0);
2938 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2939 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2940 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2941 } else if (Name.starts_with("avx.sqrt.p") ||
2942 Name.starts_with("sse2.sqrt.p") ||
2943 Name.starts_with("sse.sqrt.p")) {
2944 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2945 {CI->getArgOperand(0)});
2946 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2947 if (CI->arg_size() == 4 &&
2948 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2949 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2950 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2951 : Intrinsic::x86_avx512_sqrt_pd_512;
2952
2953 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2954 Rep = Builder.CreateIntrinsic(IID, Args);
2955 } else {
2956 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2957 {CI->getArgOperand(0)});
2958 }
2959 Rep =
2960 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2961 } else if (Name.starts_with("avx512.ptestm") ||
2962 Name.starts_with("avx512.ptestnm")) {
2963 Value *Op0 = CI->getArgOperand(0);
2964 Value *Op1 = CI->getArgOperand(1);
2965 Value *Mask = CI->getArgOperand(2);
2966 Rep = Builder.CreateAnd(Op0, Op1);
2967 llvm::Type *Ty = Op0->getType();
2969 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2972 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2973 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2974 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2975 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2976 ->getNumElements();
2977 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2978 Rep =
2979 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2980 } else if (Name.starts_with("avx512.kunpck")) {
2981 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2982 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2983 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2984 int Indices[64];
2985 for (unsigned i = 0; i != NumElts; ++i)
2986 Indices[i] = i;
2987
2988 // First extract half of each vector. This gives better codegen than
2989 // doing it in a single shuffle.
2990 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2991 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2992 // Concat the vectors.
2993 // NOTE: Operands have to be swapped to match intrinsic definition.
2994 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2995 Rep = Builder.CreateBitCast(Rep, CI->getType());
2996 } else if (Name == "avx512.kand.w") {
2997 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2998 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2999 Rep = Builder.CreateAnd(LHS, RHS);
3000 Rep = Builder.CreateBitCast(Rep, CI->getType());
3001 } else if (Name == "avx512.kandn.w") {
3002 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3003 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3004 LHS = Builder.CreateNot(LHS);
3005 Rep = Builder.CreateAnd(LHS, RHS);
3006 Rep = Builder.CreateBitCast(Rep, CI->getType());
3007 } else if (Name == "avx512.kor.w") {
3008 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3009 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3010 Rep = Builder.CreateOr(LHS, RHS);
3011 Rep = Builder.CreateBitCast(Rep, CI->getType());
3012 } else if (Name == "avx512.kxor.w") {
3013 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3014 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3015 Rep = Builder.CreateXor(LHS, RHS);
3016 Rep = Builder.CreateBitCast(Rep, CI->getType());
3017 } else if (Name == "avx512.kxnor.w") {
3018 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3019 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3020 LHS = Builder.CreateNot(LHS);
3021 Rep = Builder.CreateXor(LHS, RHS);
3022 Rep = Builder.CreateBitCast(Rep, CI->getType());
3023 } else if (Name == "avx512.knot.w") {
3024 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3025 Rep = Builder.CreateNot(Rep);
3026 Rep = Builder.CreateBitCast(Rep, CI->getType());
3027 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3028 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3029 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3030 Rep = Builder.CreateOr(LHS, RHS);
3031 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3032 Value *C;
3033 if (Name[14] == 'c')
3034 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3035 else
3036 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3037 Rep = Builder.CreateICmpEQ(Rep, C);
3038 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3039 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3040 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3041 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3042 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3043 Type *I32Ty = Type::getInt32Ty(C);
3044 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3045 ConstantInt::get(I32Ty, 0));
3046 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3047 ConstantInt::get(I32Ty, 0));
3048 Value *EltOp;
3049 if (Name.contains(".add."))
3050 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3051 else if (Name.contains(".sub."))
3052 EltOp = Builder.CreateFSub(Elt0, Elt1);
3053 else if (Name.contains(".mul."))
3054 EltOp = Builder.CreateFMul(Elt0, Elt1);
3055 else
3056 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3057 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3058 ConstantInt::get(I32Ty, 0));
3059 } else if (Name.starts_with("avx512.mask.pcmp")) {
3060 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3061 bool CmpEq = Name[16] == 'e';
3062 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3063 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3064 Type *OpTy = CI->getArgOperand(0)->getType();
3065 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3066 Intrinsic::ID IID;
3067 switch (VecWidth) {
3068 default:
3069 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3070 break;
3071 case 128:
3072 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3073 break;
3074 case 256:
3075 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3076 break;
3077 case 512:
3078 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3079 break;
3080 }
3081
3082 Rep =
3083 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3084 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3085 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3086 Type *OpTy = CI->getArgOperand(0)->getType();
3087 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3088 unsigned EltWidth = OpTy->getScalarSizeInBits();
3089 Intrinsic::ID IID;
3090 if (VecWidth == 128 && EltWidth == 32)
3091 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3092 else if (VecWidth == 256 && EltWidth == 32)
3093 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3094 else if (VecWidth == 512 && EltWidth == 32)
3095 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3096 else if (VecWidth == 128 && EltWidth == 64)
3097 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3098 else if (VecWidth == 256 && EltWidth == 64)
3099 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3100 else if (VecWidth == 512 && EltWidth == 64)
3101 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3102 else
3103 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3104
3105 Rep =
3106 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3107 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3108 } else if (Name.starts_with("avx512.cmp.p")) {
3109 SmallVector<Value *, 4> Args(CI->args());
3110 Type *OpTy = Args[0]->getType();
3111 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3112 unsigned EltWidth = OpTy->getScalarSizeInBits();
3113 Intrinsic::ID IID;
3114 if (VecWidth == 128 && EltWidth == 32)
3115 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3116 else if (VecWidth == 256 && EltWidth == 32)
3117 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3118 else if (VecWidth == 512 && EltWidth == 32)
3119 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3120 else if (VecWidth == 128 && EltWidth == 64)
3121 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3122 else if (VecWidth == 256 && EltWidth == 64)
3123 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3124 else if (VecWidth == 512 && EltWidth == 64)
3125 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3126 else
3127 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3128
3130 if (VecWidth == 512)
3131 std::swap(Mask, Args.back());
3132 Args.push_back(Mask);
3133
3134 Rep = Builder.CreateIntrinsic(IID, Args);
3135 } else if (Name.starts_with("avx512.mask.cmp.")) {
3136 // Integer compare intrinsics.
3137 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3138 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3139 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3140 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3141 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3142 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3143 Name.starts_with("avx512.cvtw2mask.") ||
3144 Name.starts_with("avx512.cvtd2mask.") ||
3145 Name.starts_with("avx512.cvtq2mask.")) {
3146 Value *Op = CI->getArgOperand(0);
3147 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3148 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3149 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3150 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3151 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3152 Name.starts_with("avx512.mask.pabs")) {
3153 Rep = upgradeAbs(Builder, *CI);
3154 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3155 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3156 Name.starts_with("avx512.mask.pmaxs")) {
3157 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3158 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3159 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3160 Name.starts_with("avx512.mask.pmaxu")) {
3161 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3162 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3163 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3164 Name.starts_with("avx512.mask.pmins")) {
3165 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3166 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3167 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3168 Name.starts_with("avx512.mask.pminu")) {
3169 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3170 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3171 Name == "avx512.pmulu.dq.512" ||
3172 Name.starts_with("avx512.mask.pmulu.dq.")) {
3173 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3174 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3175 Name == "avx512.pmul.dq.512" ||
3176 Name.starts_with("avx512.mask.pmul.dq.")) {
3177 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3178 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3179 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3180 Rep =
3181 Builder.CreateSIToFP(CI->getArgOperand(1),
3182 cast<VectorType>(CI->getType())->getElementType());
3183 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3184 } else if (Name == "avx512.cvtusi2sd") {
3185 Rep =
3186 Builder.CreateUIToFP(CI->getArgOperand(1),
3187 cast<VectorType>(CI->getType())->getElementType());
3188 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3189 } else if (Name == "sse2.cvtss2sd") {
3190 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3191 Rep = Builder.CreateFPExt(
3192 Rep, cast<VectorType>(CI->getType())->getElementType());
3193 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3194 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3195 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3196 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3197 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3198 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3199 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3200 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3201 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3202 Name == "avx512.mask.cvtqq2ps.256" ||
3203 Name == "avx512.mask.cvtqq2ps.512" ||
3204 Name == "avx512.mask.cvtuqq2ps.256" ||
3205 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3206 Name == "avx.cvt.ps2.pd.256" ||
3207 Name == "avx512.mask.cvtps2pd.128" ||
3208 Name == "avx512.mask.cvtps2pd.256") {
3209 auto *DstTy = cast<FixedVectorType>(CI->getType());
3210 Rep = CI->getArgOperand(0);
3211 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3212
3213 unsigned NumDstElts = DstTy->getNumElements();
3214 if (NumDstElts < SrcTy->getNumElements()) {
3215 assert(NumDstElts == 2 && "Unexpected vector size");
3216 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3217 }
3218
3219 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3220 bool IsUnsigned = Name.contains("cvtu");
3221 if (IsPS2PD)
3222 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3223 else if (CI->arg_size() == 4 &&
3224 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3225 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3226 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3227 : Intrinsic::x86_avx512_sitofp_round;
3228 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3229 {Rep, CI->getArgOperand(3)});
3230 } else {
3231 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3232 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3233 }
3234
3235 if (CI->arg_size() >= 3)
3236 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3237 CI->getArgOperand(1));
3238 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3239 Name.starts_with("vcvtph2ps.")) {
3240 auto *DstTy = cast<FixedVectorType>(CI->getType());
3241 Rep = CI->getArgOperand(0);
3242 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3243 unsigned NumDstElts = DstTy->getNumElements();
3244 if (NumDstElts != SrcTy->getNumElements()) {
3245 assert(NumDstElts == 4 && "Unexpected vector size");
3246 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3247 }
3248 Rep = Builder.CreateBitCast(
3249 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3250 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3251 if (CI->arg_size() >= 3)
3252 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3253 CI->getArgOperand(1));
3254 } else if (Name.starts_with("avx512.mask.load")) {
3255 // "avx512.mask.loadu." or "avx512.mask.load."
3256 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3257 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3258 CI->getArgOperand(2), Aligned);
3259 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3260 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3261 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3262 ResultTy->getNumElements());
3263
3264 Rep = Builder.CreateIntrinsic(
3265 Intrinsic::masked_expandload, ResultTy,
3266 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3267 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3268 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3269 Value *MaskVec =
3270 getX86MaskVec(Builder, CI->getArgOperand(2),
3271 cast<FixedVectorType>(ResultTy)->getNumElements());
3272
3273 Rep = Builder.CreateIntrinsic(
3274 Intrinsic::masked_compressstore, ResultTy,
3275 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3276 } else if (Name.starts_with("avx512.mask.compress.") ||
3277 Name.starts_with("avx512.mask.expand.")) {
3278 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3279
3280 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3281 ResultTy->getNumElements());
3282
3283 bool IsCompress = Name[12] == 'c';
3284 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3285 : Intrinsic::x86_avx512_mask_expand;
3286 Rep = Builder.CreateIntrinsic(
3287 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3288 } else if (Name.starts_with("xop.vpcom")) {
3289 bool IsSigned;
3290 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3291 Name.ends_with("uq"))
3292 IsSigned = false;
3293 else if (Name.ends_with("b") || Name.ends_with("w") ||
3294 Name.ends_with("d") || Name.ends_with("q"))
3295 IsSigned = true;
3296 else
3297 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3298
3299 unsigned Imm;
3300 if (CI->arg_size() == 3) {
3301 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3302 } else {
3303 Name = Name.substr(9); // strip off "xop.vpcom"
3304 if (Name.starts_with("lt"))
3305 Imm = 0;
3306 else if (Name.starts_with("le"))
3307 Imm = 1;
3308 else if (Name.starts_with("gt"))
3309 Imm = 2;
3310 else if (Name.starts_with("ge"))
3311 Imm = 3;
3312 else if (Name.starts_with("eq"))
3313 Imm = 4;
3314 else if (Name.starts_with("ne"))
3315 Imm = 5;
3316 else if (Name.starts_with("false"))
3317 Imm = 6;
3318 else if (Name.starts_with("true"))
3319 Imm = 7;
3320 else
3321 llvm_unreachable("Unknown condition");
3322 }
3323
3324 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3325 } else if (Name.starts_with("xop.vpcmov")) {
3326 Value *Sel = CI->getArgOperand(2);
3327 Value *NotSel = Builder.CreateNot(Sel);
3328 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3329 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3330 Rep = Builder.CreateOr(Sel0, Sel1);
3331 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3332 Name.starts_with("avx512.mask.prol")) {
3333 Rep = upgradeX86Rotate(Builder, *CI, false);
3334 } else if (Name.starts_with("avx512.pror") ||
3335 Name.starts_with("avx512.mask.pror")) {
3336 Rep = upgradeX86Rotate(Builder, *CI, true);
3337 } else if (Name.starts_with("avx512.vpshld.") ||
3338 Name.starts_with("avx512.mask.vpshld") ||
3339 Name.starts_with("avx512.maskz.vpshld")) {
3340 bool ZeroMask = Name[11] == 'z';
3341 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3342 } else if (Name.starts_with("avx512.vpshrd.") ||
3343 Name.starts_with("avx512.mask.vpshrd") ||
3344 Name.starts_with("avx512.maskz.vpshrd")) {
3345 bool ZeroMask = Name[11] == 'z';
3346 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3347 } else if (Name == "sse42.crc32.64.8") {
3348 Value *Trunc0 =
3349 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3350 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3351 {Trunc0, CI->getArgOperand(1)});
3352 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3353 } else if (Name.starts_with("avx.vbroadcast.s") ||
3354 Name.starts_with("avx512.vbroadcast.s")) {
3355 // Replace broadcasts with a series of insertelements.
3356 auto *VecTy = cast<FixedVectorType>(CI->getType());
3357 Type *EltTy = VecTy->getElementType();
3358 unsigned EltNum = VecTy->getNumElements();
3359 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3360 Type *I32Ty = Type::getInt32Ty(C);
3361 Rep = PoisonValue::get(VecTy);
3362 for (unsigned I = 0; I < EltNum; ++I)
3363 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3364 } else if (Name.starts_with("sse41.pmovsx") ||
3365 Name.starts_with("sse41.pmovzx") ||
3366 Name.starts_with("avx2.pmovsx") ||
3367 Name.starts_with("avx2.pmovzx") ||
3368 Name.starts_with("avx512.mask.pmovsx") ||
3369 Name.starts_with("avx512.mask.pmovzx")) {
3370 auto *DstTy = cast<FixedVectorType>(CI->getType());
3371 unsigned NumDstElts = DstTy->getNumElements();
3372
3373 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3374 SmallVector<int, 8> ShuffleMask(NumDstElts);
3375 for (unsigned i = 0; i != NumDstElts; ++i)
3376 ShuffleMask[i] = i;
3377
3378 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3379
3380 bool DoSext = Name.contains("pmovsx");
3381 Rep =
3382 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3383 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3384 if (CI->arg_size() == 3)
3385 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3386 CI->getArgOperand(1));
3387 } else if (Name == "avx512.mask.pmov.qd.256" ||
3388 Name == "avx512.mask.pmov.qd.512" ||
3389 Name == "avx512.mask.pmov.wb.256" ||
3390 Name == "avx512.mask.pmov.wb.512") {
3391 Type *Ty = CI->getArgOperand(1)->getType();
3392 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3393 Rep =
3394 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3395 } else if (Name.starts_with("avx.vbroadcastf128") ||
3396 Name == "avx2.vbroadcasti128") {
3397 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3398 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3399 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3400 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3401 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3402 if (NumSrcElts == 2)
3403 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3404 else
3405 Rep = Builder.CreateShuffleVector(Load,
3406 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3407 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3408 Name.starts_with("avx512.mask.shuf.f")) {
3409 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3410 Type *VT = CI->getType();
3411 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3412 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3413 unsigned ControlBitsMask = NumLanes - 1;
3414 unsigned NumControlBits = NumLanes / 2;
3415 SmallVector<int, 8> ShuffleMask(0);
3416
3417 for (unsigned l = 0; l != NumLanes; ++l) {
3418 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3419 // We actually need the other source.
3420 if (l >= NumLanes / 2)
3421 LaneMask += NumLanes;
3422 for (unsigned i = 0; i != NumElementsInLane; ++i)
3423 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3424 }
3425 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3426 CI->getArgOperand(1), ShuffleMask);
3427 Rep =
3428 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3429 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3430 Name.starts_with("avx512.mask.broadcasti")) {
3431 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3432 ->getNumElements();
3433 unsigned NumDstElts =
3434 cast<FixedVectorType>(CI->getType())->getNumElements();
3435
3436 SmallVector<int, 8> ShuffleMask(NumDstElts);
3437 for (unsigned i = 0; i != NumDstElts; ++i)
3438 ShuffleMask[i] = i % NumSrcElts;
3439
3440 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3441 CI->getArgOperand(0), ShuffleMask);
3442 Rep =
3443 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3444 } else if (Name.starts_with("avx2.pbroadcast") ||
3445 Name.starts_with("avx2.vbroadcast") ||
3446 Name.starts_with("avx512.pbroadcast") ||
3447 Name.starts_with("avx512.mask.broadcast.s")) {
3448 // Replace vp?broadcasts with a vector shuffle.
3449 Value *Op = CI->getArgOperand(0);
3450 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3451 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3454 Rep = Builder.CreateShuffleVector(Op, M);
3455
3456 if (CI->arg_size() == 3)
3457 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3458 CI->getArgOperand(1));
3459 } else if (Name.starts_with("sse2.padds.") ||
3460 Name.starts_with("avx2.padds.") ||
3461 Name.starts_with("avx512.padds.") ||
3462 Name.starts_with("avx512.mask.padds.")) {
3463 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3464 } else if (Name.starts_with("sse2.psubs.") ||
3465 Name.starts_with("avx2.psubs.") ||
3466 Name.starts_with("avx512.psubs.") ||
3467 Name.starts_with("avx512.mask.psubs.")) {
3468 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3469 } else if (Name.starts_with("sse2.paddus.") ||
3470 Name.starts_with("avx2.paddus.") ||
3471 Name.starts_with("avx512.mask.paddus.")) {
3472 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3473 } else if (Name.starts_with("sse2.psubus.") ||
3474 Name.starts_with("avx2.psubus.") ||
3475 Name.starts_with("avx512.mask.psubus.")) {
3476 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3477 } else if (Name.starts_with("avx512.mask.palignr.")) {
3478 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3479 CI->getArgOperand(1), CI->getArgOperand(2),
3480 CI->getArgOperand(3), CI->getArgOperand(4),
3481 false);
3482 } else if (Name.starts_with("avx512.mask.valign.")) {
3484 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3485 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3486 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3487 // 128/256-bit shift left specified in bits.
3488 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3489 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3490 Shift / 8); // Shift is in bits.
3491 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3492 // 128/256-bit shift right specified in bits.
3493 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3494 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3495 Shift / 8); // Shift is in bits.
3496 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3497 Name == "avx512.psll.dq.512") {
3498 // 128/256/512-bit shift left specified in bytes.
3499 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3500 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3501 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3502 Name == "avx512.psrl.dq.512") {
3503 // 128/256/512-bit shift right specified in bytes.
3504 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3505 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3506 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3507 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3508 Name.starts_with("avx2.pblendd.")) {
3509 Value *Op0 = CI->getArgOperand(0);
3510 Value *Op1 = CI->getArgOperand(1);
3511 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3512 auto *VecTy = cast<FixedVectorType>(CI->getType());
3513 unsigned NumElts = VecTy->getNumElements();
3514
3515 SmallVector<int, 16> Idxs(NumElts);
3516 for (unsigned i = 0; i != NumElts; ++i)
3517 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3518
3519 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3520 } else if (Name.starts_with("avx.vinsertf128.") ||
3521 Name == "avx2.vinserti128" ||
3522 Name.starts_with("avx512.mask.insert")) {
3523 Value *Op0 = CI->getArgOperand(0);
3524 Value *Op1 = CI->getArgOperand(1);
3525 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3526 unsigned DstNumElts =
3527 cast<FixedVectorType>(CI->getType())->getNumElements();
3528 unsigned SrcNumElts =
3529 cast<FixedVectorType>(Op1->getType())->getNumElements();
3530 unsigned Scale = DstNumElts / SrcNumElts;
3531
3532 // Mask off the high bits of the immediate value; hardware ignores those.
3533 Imm = Imm % Scale;
3534
3535 // Extend the second operand into a vector the size of the destination.
3536 SmallVector<int, 8> Idxs(DstNumElts);
3537 for (unsigned i = 0; i != SrcNumElts; ++i)
3538 Idxs[i] = i;
3539 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3540 Idxs[i] = SrcNumElts;
3541 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3542
3543 // Insert the second operand into the first operand.
3544
3545 // Note that there is no guarantee that instruction lowering will actually
3546 // produce a vinsertf128 instruction for the created shuffles. In
3547 // particular, the 0 immediate case involves no lane changes, so it can
3548 // be handled as a blend.
3549
3550 // Example of shuffle mask for 32-bit elements:
3551 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3552 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3553
3554 // First fill with identify mask.
3555 for (unsigned i = 0; i != DstNumElts; ++i)
3556 Idxs[i] = i;
3557 // Then replace the elements where we need to insert.
3558 for (unsigned i = 0; i != SrcNumElts; ++i)
3559 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3560 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3561
3562 // If the intrinsic has a mask operand, handle that.
3563 if (CI->arg_size() == 5)
3564 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3565 CI->getArgOperand(3));
3566 } else if (Name.starts_with("avx.vextractf128.") ||
3567 Name == "avx2.vextracti128" ||
3568 Name.starts_with("avx512.mask.vextract")) {
3569 Value *Op0 = CI->getArgOperand(0);
3570 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3571 unsigned DstNumElts =
3572 cast<FixedVectorType>(CI->getType())->getNumElements();
3573 unsigned SrcNumElts =
3574 cast<FixedVectorType>(Op0->getType())->getNumElements();
3575 unsigned Scale = SrcNumElts / DstNumElts;
3576
3577 // Mask off the high bits of the immediate value; hardware ignores those.
3578 Imm = Imm % Scale;
3579
3580 // Get indexes for the subvector of the input vector.
3581 SmallVector<int, 8> Idxs(DstNumElts);
3582 for (unsigned i = 0; i != DstNumElts; ++i) {
3583 Idxs[i] = i + (Imm * DstNumElts);
3584 }
3585 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3586
3587 // If the intrinsic has a mask operand, handle that.
3588 if (CI->arg_size() == 4)
3589 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3590 CI->getArgOperand(2));
3591 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3592 Name.starts_with("avx512.mask.perm.di.")) {
3593 Value *Op0 = CI->getArgOperand(0);
3594 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3595 auto *VecTy = cast<FixedVectorType>(CI->getType());
3596 unsigned NumElts = VecTy->getNumElements();
3597
3598 SmallVector<int, 8> Idxs(NumElts);
3599 for (unsigned i = 0; i != NumElts; ++i)
3600 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3601
3602 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3603
3604 if (CI->arg_size() == 4)
3605 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3606 CI->getArgOperand(2));
3607 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3608 // The immediate permute control byte looks like this:
3609 // [1:0] - select 128 bits from sources for low half of destination
3610 // [2] - ignore
3611 // [3] - zero low half of destination
3612 // [5:4] - select 128 bits from sources for high half of destination
3613 // [6] - ignore
3614 // [7] - zero high half of destination
3615
3616 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3617
3618 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3619 unsigned HalfSize = NumElts / 2;
3620 SmallVector<int, 8> ShuffleMask(NumElts);
3621
3622 // Determine which operand(s) are actually in use for this instruction.
3623 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3624 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3625
3626 // If needed, replace operands based on zero mask.
3627 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3628 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3629
3630 // Permute low half of result.
3631 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3632 for (unsigned i = 0; i < HalfSize; ++i)
3633 ShuffleMask[i] = StartIndex + i;
3634
3635 // Permute high half of result.
3636 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3637 for (unsigned i = 0; i < HalfSize; ++i)
3638 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3639
3640 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3641
3642 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3643 Name.starts_with("avx512.mask.vpermil.p") ||
3644 Name.starts_with("avx512.mask.pshuf.d.")) {
3645 Value *Op0 = CI->getArgOperand(0);
3646 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3647 auto *VecTy = cast<FixedVectorType>(CI->getType());
3648 unsigned NumElts = VecTy->getNumElements();
3649 // Calculate the size of each index in the immediate.
3650 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3651 unsigned IdxMask = ((1 << IdxSize) - 1);
3652
3653 SmallVector<int, 8> Idxs(NumElts);
3654 // Lookup the bits for this element, wrapping around the immediate every
3655 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3656 // to offset by the first index of each group.
3657 for (unsigned i = 0; i != NumElts; ++i)
3658 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3659
3660 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3661
3662 if (CI->arg_size() == 4)
3663 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3664 CI->getArgOperand(2));
3665 } else if (Name == "sse2.pshufl.w" ||
3666 Name.starts_with("avx512.mask.pshufl.w.")) {
3667 Value *Op0 = CI->getArgOperand(0);
3668 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3669 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3670
3671 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3672 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3673
3674 SmallVector<int, 16> Idxs(NumElts);
3675 for (unsigned l = 0; l != NumElts; l += 8) {
3676 for (unsigned i = 0; i != 4; ++i)
3677 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3678 for (unsigned i = 4; i != 8; ++i)
3679 Idxs[i + l] = i + l;
3680 }
3681
3682 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3683
3684 if (CI->arg_size() == 4)
3685 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3686 CI->getArgOperand(2));
3687 } else if (Name == "sse2.pshufh.w" ||
3688 Name.starts_with("avx512.mask.pshufh.w.")) {
3689 Value *Op0 = CI->getArgOperand(0);
3690 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3691 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3692
3693 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3694 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3695
3696 SmallVector<int, 16> Idxs(NumElts);
3697 for (unsigned l = 0; l != NumElts; l += 8) {
3698 for (unsigned i = 0; i != 4; ++i)
3699 Idxs[i + l] = i + l;
3700 for (unsigned i = 0; i != 4; ++i)
3701 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3702 }
3703
3704 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3705
3706 if (CI->arg_size() == 4)
3707 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3708 CI->getArgOperand(2));
3709 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3710 Value *Op0 = CI->getArgOperand(0);
3711 Value *Op1 = CI->getArgOperand(1);
3712 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3713 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3714
3715 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3716 unsigned HalfLaneElts = NumLaneElts / 2;
3717
3718 SmallVector<int, 16> Idxs(NumElts);
3719 for (unsigned i = 0; i != NumElts; ++i) {
3720 // Base index is the starting element of the lane.
3721 Idxs[i] = i - (i % NumLaneElts);
3722 // If we are half way through the lane switch to the other source.
3723 if ((i % NumLaneElts) >= HalfLaneElts)
3724 Idxs[i] += NumElts;
3725 // Now select the specific element. By adding HalfLaneElts bits from
3726 // the immediate. Wrapping around the immediate every 8-bits.
3727 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3728 }
3729
3730 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3731
3732 Rep =
3733 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3734 } else if (Name.starts_with("avx512.mask.movddup") ||
3735 Name.starts_with("avx512.mask.movshdup") ||
3736 Name.starts_with("avx512.mask.movsldup")) {
3737 Value *Op0 = CI->getArgOperand(0);
3738 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3739 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3740
3741 unsigned Offset = 0;
3742 if (Name.starts_with("avx512.mask.movshdup."))
3743 Offset = 1;
3744
3745 SmallVector<int, 16> Idxs(NumElts);
3746 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3747 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3748 Idxs[i + l + 0] = i + l + Offset;
3749 Idxs[i + l + 1] = i + l + Offset;
3750 }
3751
3752 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3753
3754 Rep =
3755 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3756 } else if (Name.starts_with("avx512.mask.punpckl") ||
3757 Name.starts_with("avx512.mask.unpckl.")) {
3758 Value *Op0 = CI->getArgOperand(0);
3759 Value *Op1 = CI->getArgOperand(1);
3760 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3761 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3762
3763 SmallVector<int, 64> Idxs(NumElts);
3764 for (int l = 0; l != NumElts; l += NumLaneElts)
3765 for (int i = 0; i != NumLaneElts; ++i)
3766 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3767
3768 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3769
3770 Rep =
3771 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3772 } else if (Name.starts_with("avx512.mask.punpckh") ||
3773 Name.starts_with("avx512.mask.unpckh.")) {
3774 Value *Op0 = CI->getArgOperand(0);
3775 Value *Op1 = CI->getArgOperand(1);
3776 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3777 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3778
3779 SmallVector<int, 64> Idxs(NumElts);
3780 for (int l = 0; l != NumElts; l += NumLaneElts)
3781 for (int i = 0; i != NumLaneElts; ++i)
3782 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3783
3784 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3785
3786 Rep =
3787 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3788 } else if (Name.starts_with("avx512.mask.and.") ||
3789 Name.starts_with("avx512.mask.pand.")) {
3790 VectorType *FTy = cast<VectorType>(CI->getType());
3792 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3793 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3794 Rep = Builder.CreateBitCast(Rep, FTy);
3795 Rep =
3796 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3797 } else if (Name.starts_with("avx512.mask.andn.") ||
3798 Name.starts_with("avx512.mask.pandn.")) {
3799 VectorType *FTy = cast<VectorType>(CI->getType());
3801 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3802 Rep = Builder.CreateAnd(Rep,
3803 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3804 Rep = Builder.CreateBitCast(Rep, FTy);
3805 Rep =
3806 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3807 } else if (Name.starts_with("avx512.mask.or.") ||
3808 Name.starts_with("avx512.mask.por.")) {
3809 VectorType *FTy = cast<VectorType>(CI->getType());
3811 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3812 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3813 Rep = Builder.CreateBitCast(Rep, FTy);
3814 Rep =
3815 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3816 } else if (Name.starts_with("avx512.mask.xor.") ||
3817 Name.starts_with("avx512.mask.pxor.")) {
3818 VectorType *FTy = cast<VectorType>(CI->getType());
3820 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3821 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3822 Rep = Builder.CreateBitCast(Rep, FTy);
3823 Rep =
3824 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3825 } else if (Name.starts_with("avx512.mask.padd.")) {
3826 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3827 Rep =
3828 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3829 } else if (Name.starts_with("avx512.mask.psub.")) {
3830 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3831 Rep =
3832 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3833 } else if (Name.starts_with("avx512.mask.pmull.")) {
3834 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3835 Rep =
3836 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3837 } else if (Name.starts_with("avx512.mask.add.p")) {
3838 if (Name.ends_with(".512")) {
3839 Intrinsic::ID IID;
3840 if (Name[17] == 's')
3841 IID = Intrinsic::x86_avx512_add_ps_512;
3842 else
3843 IID = Intrinsic::x86_avx512_add_pd_512;
3844
3845 Rep = Builder.CreateIntrinsic(
3846 IID,
3847 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3848 } else {
3849 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3850 }
3851 Rep =
3852 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3853 } else if (Name.starts_with("avx512.mask.div.p")) {
3854 if (Name.ends_with(".512")) {
3855 Intrinsic::ID IID;
3856 if (Name[17] == 's')
3857 IID = Intrinsic::x86_avx512_div_ps_512;
3858 else
3859 IID = Intrinsic::x86_avx512_div_pd_512;
3860
3861 Rep = Builder.CreateIntrinsic(
3862 IID,
3863 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3864 } else {
3865 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3866 }
3867 Rep =
3868 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3869 } else if (Name.starts_with("avx512.mask.mul.p")) {
3870 if (Name.ends_with(".512")) {
3871 Intrinsic::ID IID;
3872 if (Name[17] == 's')
3873 IID = Intrinsic::x86_avx512_mul_ps_512;
3874 else
3875 IID = Intrinsic::x86_avx512_mul_pd_512;
3876
3877 Rep = Builder.CreateIntrinsic(
3878 IID,
3879 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3880 } else {
3881 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3882 }
3883 Rep =
3884 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3885 } else if (Name.starts_with("avx512.mask.sub.p")) {
3886 if (Name.ends_with(".512")) {
3887 Intrinsic::ID IID;
3888 if (Name[17] == 's')
3889 IID = Intrinsic::x86_avx512_sub_ps_512;
3890 else
3891 IID = Intrinsic::x86_avx512_sub_pd_512;
3892
3893 Rep = Builder.CreateIntrinsic(
3894 IID,
3895 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3896 } else {
3897 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3898 }
3899 Rep =
3900 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3901 } else if ((Name.starts_with("avx512.mask.max.p") ||
3902 Name.starts_with("avx512.mask.min.p")) &&
3903 Name.drop_front(18) == ".512") {
3904 bool IsDouble = Name[17] == 'd';
3905 bool IsMin = Name[13] == 'i';
3906 static const Intrinsic::ID MinMaxTbl[2][2] = {
3907 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3908 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3909 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3910
3911 Rep = Builder.CreateIntrinsic(
3912 IID,
3913 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3914 Rep =
3915 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3916 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3917 Rep =
3918 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3919 {CI->getArgOperand(0), Builder.getInt1(false)});
3920 Rep =
3921 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3922 } else if (Name.starts_with("avx512.mask.psll")) {
3923 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3924 bool IsVariable = Name[16] == 'v';
3925 char Size = Name[16] == '.' ? Name[17]
3926 : Name[17] == '.' ? Name[18]
3927 : Name[18] == '.' ? Name[19]
3928 : Name[20];
3929
3930 Intrinsic::ID IID;
3931 if (IsVariable && Name[17] != '.') {
3932 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3933 IID = Intrinsic::x86_avx2_psllv_q;
3934 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3935 IID = Intrinsic::x86_avx2_psllv_q_256;
3936 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3937 IID = Intrinsic::x86_avx2_psllv_d;
3938 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3939 IID = Intrinsic::x86_avx2_psllv_d_256;
3940 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3941 IID = Intrinsic::x86_avx512_psllv_w_128;
3942 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3943 IID = Intrinsic::x86_avx512_psllv_w_256;
3944 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3945 IID = Intrinsic::x86_avx512_psllv_w_512;
3946 else
3947 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3948 } else if (Name.ends_with(".128")) {
3949 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3950 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3951 : Intrinsic::x86_sse2_psll_d;
3952 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3953 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3954 : Intrinsic::x86_sse2_psll_q;
3955 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3956 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3957 : Intrinsic::x86_sse2_psll_w;
3958 else
3959 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3960 } else if (Name.ends_with(".256")) {
3961 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3962 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3963 : Intrinsic::x86_avx2_psll_d;
3964 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3965 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3966 : Intrinsic::x86_avx2_psll_q;
3967 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3968 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3969 : Intrinsic::x86_avx2_psll_w;
3970 else
3971 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3972 } else {
3973 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3974 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3975 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3976 : Intrinsic::x86_avx512_psll_d_512;
3977 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3978 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3979 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3980 : Intrinsic::x86_avx512_psll_q_512;
3981 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3982 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3983 : Intrinsic::x86_avx512_psll_w_512;
3984 else
3985 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3986 }
3987
3988 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3989 } else if (Name.starts_with("avx512.mask.psrl")) {
3990 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3991 bool IsVariable = Name[16] == 'v';
3992 char Size = Name[16] == '.' ? Name[17]
3993 : Name[17] == '.' ? Name[18]
3994 : Name[18] == '.' ? Name[19]
3995 : Name[20];
3996
3997 Intrinsic::ID IID;
3998 if (IsVariable && Name[17] != '.') {
3999 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
4000 IID = Intrinsic::x86_avx2_psrlv_q;
4001 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
4002 IID = Intrinsic::x86_avx2_psrlv_q_256;
4003 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4004 IID = Intrinsic::x86_avx2_psrlv_d;
4005 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4006 IID = Intrinsic::x86_avx2_psrlv_d_256;
4007 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4008 IID = Intrinsic::x86_avx512_psrlv_w_128;
4009 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4010 IID = Intrinsic::x86_avx512_psrlv_w_256;
4011 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4012 IID = Intrinsic::x86_avx512_psrlv_w_512;
4013 else
4014 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4015 } else if (Name.ends_with(".128")) {
4016 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4017 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4018 : Intrinsic::x86_sse2_psrl_d;
4019 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4020 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4021 : Intrinsic::x86_sse2_psrl_q;
4022 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4023 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4024 : Intrinsic::x86_sse2_psrl_w;
4025 else
4026 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4027 } else if (Name.ends_with(".256")) {
4028 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4029 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4030 : Intrinsic::x86_avx2_psrl_d;
4031 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4032 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4033 : Intrinsic::x86_avx2_psrl_q;
4034 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4035 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4036 : Intrinsic::x86_avx2_psrl_w;
4037 else
4038 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4039 } else {
4040 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4041 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4042 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4043 : Intrinsic::x86_avx512_psrl_d_512;
4044 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4046 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4047 : Intrinsic::x86_avx512_psrl_q_512;
4048 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4049 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4050 : Intrinsic::x86_avx512_psrl_w_512;
4051 else
4052 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4053 }
4054
4055 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4056 } else if (Name.starts_with("avx512.mask.psra")) {
4057 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4058 bool IsVariable = Name[16] == 'v';
4059 char Size = Name[16] == '.' ? Name[17]
4060 : Name[17] == '.' ? Name[18]
4061 : Name[18] == '.' ? Name[19]
4062 : Name[20];
4063
4064 Intrinsic::ID IID;
4065 if (IsVariable && Name[17] != '.') {
4066 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4067 IID = Intrinsic::x86_avx2_psrav_d;
4068 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4069 IID = Intrinsic::x86_avx2_psrav_d_256;
4070 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4071 IID = Intrinsic::x86_avx512_psrav_w_128;
4072 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4073 IID = Intrinsic::x86_avx512_psrav_w_256;
4074 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4075 IID = Intrinsic::x86_avx512_psrav_w_512;
4076 else
4077 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4078 } else if (Name.ends_with(".128")) {
4079 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4080 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4081 : Intrinsic::x86_sse2_psra_d;
4082 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4083 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4084 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4085 : Intrinsic::x86_avx512_psra_q_128;
4086 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4087 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4088 : Intrinsic::x86_sse2_psra_w;
4089 else
4090 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4091 } else if (Name.ends_with(".256")) {
4092 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4093 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4094 : Intrinsic::x86_avx2_psra_d;
4095 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4096 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4097 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4098 : Intrinsic::x86_avx512_psra_q_256;
4099 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4100 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4101 : Intrinsic::x86_avx2_psra_w;
4102 else
4103 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4104 } else {
4105 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4106 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4107 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4108 : Intrinsic::x86_avx512_psra_d_512;
4109 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4110 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4111 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4112 : Intrinsic::x86_avx512_psra_q_512;
4113 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4114 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4115 : Intrinsic::x86_avx512_psra_w_512;
4116 else
4117 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4118 }
4119
4120 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4121 } else if (Name.starts_with("avx512.mask.move.s")) {
4122 Rep = upgradeMaskedMove(Builder, *CI);
4123 } else if (Name.starts_with("avx512.cvtmask2")) {
4124 Rep = upgradeMaskToInt(Builder, *CI);
4125 } else if (Name.ends_with(".movntdqa")) {
4127 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4128
4129 LoadInst *LI = Builder.CreateAlignedLoad(
4130 CI->getType(), CI->getArgOperand(0),
4132 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4133 Rep = LI;
4134 } else if (Name.starts_with("fma.vfmadd.") ||
4135 Name.starts_with("fma.vfmsub.") ||
4136 Name.starts_with("fma.vfnmadd.") ||
4137 Name.starts_with("fma.vfnmsub.")) {
4138 bool NegMul = Name[6] == 'n';
4139 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4140 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4141
4142 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4143 CI->getArgOperand(2)};
4144
4145 if (IsScalar) {
4146 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4147 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4148 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4149 }
4150
4151 if (NegMul && !IsScalar)
4152 Ops[0] = Builder.CreateFNeg(Ops[0]);
4153 if (NegMul && IsScalar)
4154 Ops[1] = Builder.CreateFNeg(Ops[1]);
4155 if (NegAcc)
4156 Ops[2] = Builder.CreateFNeg(Ops[2]);
4157
4158 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4159
4160 if (IsScalar)
4161 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4162 } else if (Name.starts_with("fma4.vfmadd.s")) {
4163 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4164 CI->getArgOperand(2)};
4165
4166 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4167 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4168 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4169
4170 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4171
4172 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4173 Rep, (uint64_t)0);
4174 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4175 Name.starts_with("avx512.maskz.vfmadd.s") ||
4176 Name.starts_with("avx512.mask3.vfmadd.s") ||
4177 Name.starts_with("avx512.mask3.vfmsub.s") ||
4178 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4179 bool IsMask3 = Name[11] == '3';
4180 bool IsMaskZ = Name[11] == 'z';
4181 // Drop the "avx512.mask." to make it easier.
4182 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4183 bool NegMul = Name[2] == 'n';
4184 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4185
4186 Value *A = CI->getArgOperand(0);
4187 Value *B = CI->getArgOperand(1);
4188 Value *C = CI->getArgOperand(2);
4189
4190 if (NegMul && (IsMask3 || IsMaskZ))
4191 A = Builder.CreateFNeg(A);
4192 if (NegMul && !(IsMask3 || IsMaskZ))
4193 B = Builder.CreateFNeg(B);
4194 if (NegAcc)
4195 C = Builder.CreateFNeg(C);
4196
4197 A = Builder.CreateExtractElement(A, (uint64_t)0);
4198 B = Builder.CreateExtractElement(B, (uint64_t)0);
4199 C = Builder.CreateExtractElement(C, (uint64_t)0);
4200
4201 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4202 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4203 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4204
4205 Intrinsic::ID IID;
4206 if (Name.back() == 'd')
4207 IID = Intrinsic::x86_avx512_vfmadd_f64;
4208 else
4209 IID = Intrinsic::x86_avx512_vfmadd_f32;
4210 Rep = Builder.CreateIntrinsic(IID, Ops);
4211 } else {
4212 Rep = Builder.CreateFMA(A, B, C);
4213 }
4214
4215 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4216 : IsMask3 ? C
4217 : A;
4218
4219 // For Mask3 with NegAcc, we need to create a new extractelement that
4220 // avoids the negation above.
4221 if (NegAcc && IsMask3)
4222 PassThru =
4223 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4224
4225 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4226 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4227 (uint64_t)0);
4228 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4229 Name.starts_with("avx512.mask.vfnmadd.p") ||
4230 Name.starts_with("avx512.mask.vfnmsub.p") ||
4231 Name.starts_with("avx512.mask3.vfmadd.p") ||
4232 Name.starts_with("avx512.mask3.vfmsub.p") ||
4233 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4234 Name.starts_with("avx512.maskz.vfmadd.p")) {
4235 bool IsMask3 = Name[11] == '3';
4236 bool IsMaskZ = Name[11] == 'z';
4237 // Drop the "avx512.mask." to make it easier.
4238 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4239 bool NegMul = Name[2] == 'n';
4240 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4241
4242 Value *A = CI->getArgOperand(0);
4243 Value *B = CI->getArgOperand(1);
4244 Value *C = CI->getArgOperand(2);
4245
4246 if (NegMul && (IsMask3 || IsMaskZ))
4247 A = Builder.CreateFNeg(A);
4248 if (NegMul && !(IsMask3 || IsMaskZ))
4249 B = Builder.CreateFNeg(B);
4250 if (NegAcc)
4251 C = Builder.CreateFNeg(C);
4252
4253 if (CI->arg_size() == 5 &&
4254 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4255 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4256 Intrinsic::ID IID;
4257 // Check the character before ".512" in string.
4258 if (Name[Name.size() - 5] == 's')
4259 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4260 else
4261 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4262
4263 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4264 } else {
4265 Rep = Builder.CreateFMA(A, B, C);
4266 }
4267
4268 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4269 : IsMask3 ? CI->getArgOperand(2)
4270 : CI->getArgOperand(0);
4271
4272 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4273 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4274 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4275 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4276 Intrinsic::ID IID;
4277 if (VecWidth == 128 && EltWidth == 32)
4278 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4279 else if (VecWidth == 256 && EltWidth == 32)
4280 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4281 else if (VecWidth == 128 && EltWidth == 64)
4282 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4283 else if (VecWidth == 256 && EltWidth == 64)
4284 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4285 else
4286 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4287
4288 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4289 CI->getArgOperand(2)};
4290 Ops[2] = Builder.CreateFNeg(Ops[2]);
4291 Rep = Builder.CreateIntrinsic(IID, Ops);
4292 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4293 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4294 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4295 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4296 bool IsMask3 = Name[11] == '3';
4297 bool IsMaskZ = Name[11] == 'z';
4298 // Drop the "avx512.mask." to make it easier.
4299 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4300 bool IsSubAdd = Name[3] == 's';
4301 if (CI->arg_size() == 5) {
4302 Intrinsic::ID IID;
4303 // Check the character before ".512" in string.
4304 if (Name[Name.size() - 5] == 's')
4305 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4306 else
4307 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4308
4309 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4310 CI->getArgOperand(2), CI->getArgOperand(4)};
4311 if (IsSubAdd)
4312 Ops[2] = Builder.CreateFNeg(Ops[2]);
4313
4314 Rep = Builder.CreateIntrinsic(IID, Ops);
4315 } else {
4316 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4317
4318 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4319 CI->getArgOperand(2)};
4320
4322 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4323 Value *Odd = Builder.CreateCall(FMA, Ops);
4324 Ops[2] = Builder.CreateFNeg(Ops[2]);
4325 Value *Even = Builder.CreateCall(FMA, Ops);
4326
4327 if (IsSubAdd)
4328 std::swap(Even, Odd);
4329
4330 SmallVector<int, 32> Idxs(NumElts);
4331 for (int i = 0; i != NumElts; ++i)
4332 Idxs[i] = i + (i % 2) * NumElts;
4333
4334 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4335 }
4336
4337 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4338 : IsMask3 ? CI->getArgOperand(2)
4339 : CI->getArgOperand(0);
4340
4341 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4342 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4343 Name.starts_with("avx512.maskz.pternlog.")) {
4344 bool ZeroMask = Name[11] == 'z';
4345 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4346 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4347 Intrinsic::ID IID;
4348 if (VecWidth == 128 && EltWidth == 32)
4349 IID = Intrinsic::x86_avx512_pternlog_d_128;
4350 else if (VecWidth == 256 && EltWidth == 32)
4351 IID = Intrinsic::x86_avx512_pternlog_d_256;
4352 else if (VecWidth == 512 && EltWidth == 32)
4353 IID = Intrinsic::x86_avx512_pternlog_d_512;
4354 else if (VecWidth == 128 && EltWidth == 64)
4355 IID = Intrinsic::x86_avx512_pternlog_q_128;
4356 else if (VecWidth == 256 && EltWidth == 64)
4357 IID = Intrinsic::x86_avx512_pternlog_q_256;
4358 else if (VecWidth == 512 && EltWidth == 64)
4359 IID = Intrinsic::x86_avx512_pternlog_q_512;
4360 else
4361 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4362
4363 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4364 CI->getArgOperand(2), CI->getArgOperand(3)};
4365 Rep = Builder.CreateIntrinsic(IID, Args);
4366 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4367 : CI->getArgOperand(0);
4368 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4369 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4370 Name.starts_with("avx512.maskz.vpmadd52")) {
4371 bool ZeroMask = Name[11] == 'z';
4372 bool High = Name[20] == 'h' || Name[21] == 'h';
4373 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4374 Intrinsic::ID IID;
4375 if (VecWidth == 128 && !High)
4376 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4377 else if (VecWidth == 256 && !High)
4378 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4379 else if (VecWidth == 512 && !High)
4380 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4381 else if (VecWidth == 128 && High)
4382 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4383 else if (VecWidth == 256 && High)
4384 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4385 else if (VecWidth == 512 && High)
4386 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4387 else
4388 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4389
4390 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4391 CI->getArgOperand(2)};
4392 Rep = Builder.CreateIntrinsic(IID, Args);
4393 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4394 : CI->getArgOperand(0);
4395 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4396 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4397 Name.starts_with("avx512.mask.vpermt2var.") ||
4398 Name.starts_with("avx512.maskz.vpermt2var.")) {
4399 bool ZeroMask = Name[11] == 'z';
4400 bool IndexForm = Name[17] == 'i';
4401 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4402 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4403 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4404 Name.starts_with("avx512.mask.vpdpbusds.") ||
4405 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4406 bool ZeroMask = Name[11] == 'z';
4407 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4408 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4409 Intrinsic::ID IID;
4410 if (VecWidth == 128 && !IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4412 else if (VecWidth == 256 && !IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4414 else if (VecWidth == 512 && !IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4416 else if (VecWidth == 128 && IsSaturating)
4417 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4418 else if (VecWidth == 256 && IsSaturating)
4419 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4420 else if (VecWidth == 512 && IsSaturating)
4421 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4422 else
4423 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4424
4425 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4426 CI->getArgOperand(2)};
4427
4428 // Input arguments types were incorrectly set to vectors of i32 before but
4429 // they should be vectors of i8. Insert bit cast when encountering the old
4430 // types
4431 if (Args[1]->getType()->isVectorTy() &&
4432 cast<VectorType>(Args[1]->getType())
4433 ->getElementType()
4434 ->isIntegerTy(32) &&
4435 Args[2]->getType()->isVectorTy() &&
4436 cast<VectorType>(Args[2]->getType())
4437 ->getElementType()
4438 ->isIntegerTy(32)) {
4439 Type *NewArgType = nullptr;
4440 if (VecWidth == 128)
4441 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4442 else if (VecWidth == 256)
4443 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4444 else if (VecWidth == 512)
4445 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4446 else
4447 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4448 CI);
4449
4450 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4451 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4452 }
4453
4454 Rep = Builder.CreateIntrinsic(IID, Args);
4455 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4456 : CI->getArgOperand(0);
4457 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4458 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4459 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4460 Name.starts_with("avx512.mask.vpdpwssds.") ||
4461 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4462 bool ZeroMask = Name[11] == 'z';
4463 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4464 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4465 Intrinsic::ID IID;
4466 if (VecWidth == 128 && !IsSaturating)
4467 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4468 else if (VecWidth == 256 && !IsSaturating)
4469 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4470 else if (VecWidth == 512 && !IsSaturating)
4471 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4472 else if (VecWidth == 128 && IsSaturating)
4473 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4474 else if (VecWidth == 256 && IsSaturating)
4475 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4476 else if (VecWidth == 512 && IsSaturating)
4477 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4478 else
4479 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4480
4481 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4482 CI->getArgOperand(2)};
4483
4484 // Input arguments types were incorrectly set to vectors of i32 before but
4485 // they should be vectors of i16. Insert bit cast when encountering the old
4486 // types
4487 if (Args[1]->getType()->isVectorTy() &&
4488 cast<VectorType>(Args[1]->getType())
4489 ->getElementType()
4490 ->isIntegerTy(32) &&
4491 Args[2]->getType()->isVectorTy() &&
4492 cast<VectorType>(Args[2]->getType())
4493 ->getElementType()
4494 ->isIntegerTy(32)) {
4495 Type *NewArgType = nullptr;
4496 if (VecWidth == 128)
4497 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4498 else if (VecWidth == 256)
4499 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4500 else if (VecWidth == 512)
4501 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4502 else
4503 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4504 CI);
4505
4506 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4507 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4508 }
4509
4510 Rep = Builder.CreateIntrinsic(IID, Args);
4511 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4512 : CI->getArgOperand(0);
4513 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4514 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4515 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4516 Name == "subborrow.u32" || Name == "subborrow.u64") {
4517 Intrinsic::ID IID;
4518 if (Name[0] == 'a' && Name.back() == '2')
4519 IID = Intrinsic::x86_addcarry_32;
4520 else if (Name[0] == 'a' && Name.back() == '4')
4521 IID = Intrinsic::x86_addcarry_64;
4522 else if (Name[0] == 's' && Name.back() == '2')
4523 IID = Intrinsic::x86_subborrow_32;
4524 else if (Name[0] == 's' && Name.back() == '4')
4525 IID = Intrinsic::x86_subborrow_64;
4526 else
4527 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4528
4529 // Make a call with 3 operands.
4530 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4531 CI->getArgOperand(2)};
4532 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4533
4534 // Extract the second result and store it.
4535 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4536 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4537 // Replace the original call result with the first result of the new call.
4538 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4539
4540 CI->replaceAllUsesWith(CF);
4541 Rep = nullptr;
4542 } else if (Name.starts_with("avx512.mask.") &&
4543 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4544 // Rep will be updated by the call in the condition.
4545 } else
4546 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4547
4548 return Rep;
4549}
4550
4552 Function *F, IRBuilder<> &Builder) {
4553 if (Name.starts_with("neon.bfcvt")) {
4554 if (Name.starts_with("neon.bfcvtn2")) {
4555 SmallVector<int, 32> LoMask(4);
4556 std::iota(LoMask.begin(), LoMask.end(), 0);
4557 SmallVector<int, 32> ConcatMask(8);
4558 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4559 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4560 Value *Trunc =
4561 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4562 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4563 } else if (Name.starts_with("neon.bfcvtn")) {
4564 SmallVector<int, 32> ConcatMask(8);
4565 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4566 Type *V4BF16 =
4567 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4568 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4569 dbgs() << "Trunc: " << *Trunc << "\n";
4570 return Builder.CreateShuffleVector(
4571 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4572 } else {
4573 return Builder.CreateFPTrunc(CI->getOperand(0),
4574 Type::getBFloatTy(F->getContext()));
4575 }
4576 } else if (Name.starts_with("sve.fcvt")) {
4577 Intrinsic::ID NewID =
4579 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4580 .Case("sve.fcvtnt.bf16f32",
4581 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4583 if (NewID == Intrinsic::not_intrinsic)
4584 llvm_unreachable("Unhandled Intrinsic!");
4585
4586 SmallVector<Value *, 3> Args(CI->args());
4587
4588 // The original intrinsics incorrectly used a predicate based on the
4589 // smallest element type rather than the largest.
4590 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4591 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4592
4593 if (Args[1]->getType() != BadPredTy)
4594 llvm_unreachable("Unexpected predicate type!");
4595
4596 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4597 BadPredTy, Args[1]);
4598 Args[1] = Builder.CreateIntrinsic(
4599 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4600
4601 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4602 CI->getName());
4603 }
4604
4605 llvm_unreachable("Unhandled Intrinsic!");
4606}
4607
4609 IRBuilder<> &Builder) {
4610 if (Name == "mve.vctp64.old") {
4611 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4612 // correct type.
4613 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4614 CI->getArgOperand(0),
4615 /*FMFSource=*/nullptr, CI->getName());
4616 Value *C1 = Builder.CreateIntrinsic(
4617 Intrinsic::arm_mve_pred_v2i,
4618 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4619 return Builder.CreateIntrinsic(
4620 Intrinsic::arm_mve_pred_i2v,
4621 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4622 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4623 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4624 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4625 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4626 Name ==
4627 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4628 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4629 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4630 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4631 Name ==
4632 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4633 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4634 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4635 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4636 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4637 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4638 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4639 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4640 std::vector<Type *> Tys;
4641 unsigned ID = CI->getIntrinsicID();
4642 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4643 switch (ID) {
4644 case Intrinsic::arm_mve_mull_int_predicated:
4645 case Intrinsic::arm_mve_vqdmull_predicated:
4646 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4647 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4648 break;
4649 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4650 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4651 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4652 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4653 V2I1Ty};
4654 break;
4655 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4656 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4657 CI->getOperand(1)->getType(), V2I1Ty};
4658 break;
4659 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4660 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4661 CI->getOperand(2)->getType(), V2I1Ty};
4662 break;
4663 case Intrinsic::arm_cde_vcx1q_predicated:
4664 case Intrinsic::arm_cde_vcx1qa_predicated:
4665 case Intrinsic::arm_cde_vcx2q_predicated:
4666 case Intrinsic::arm_cde_vcx2qa_predicated:
4667 case Intrinsic::arm_cde_vcx3q_predicated:
4668 case Intrinsic::arm_cde_vcx3qa_predicated:
4669 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4670 break;
4671 default:
4672 llvm_unreachable("Unhandled Intrinsic!");
4673 }
4674
4675 std::vector<Value *> Ops;
4676 for (Value *Op : CI->args()) {
4677 Type *Ty = Op->getType();
4678 if (Ty->getScalarSizeInBits() == 1) {
4679 Value *C1 = Builder.CreateIntrinsic(
4680 Intrinsic::arm_mve_pred_v2i,
4681 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4682 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4683 }
4684 Ops.push_back(Op);
4685 }
4686
4687 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4688 CI->getName());
4689 }
4690 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4691}
4692
4693// These are expected to have the arguments:
4694// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4695//
4696// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4697//
4699 Function *F, IRBuilder<> &Builder) {
4700 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4701 // for compatibility.
4702 auto UpgradeLegacyWMMAIUIntrinsicCall =
4703 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4704 ArrayRef<Type *> OverloadTys) -> Value * {
4705 // Prepare arguments, append clamp=0 for compatibility
4706 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4707 Args.push_back(Builder.getFalse());
4708
4709 // Insert the declaration for the right overload types
4711 F->getParent(), F->getIntrinsicID(), OverloadTys);
4712
4713 // Copy operand bundles if any
4715 CI->getOperandBundlesAsDefs(Bundles);
4716
4717 // Create the new call and copy calling properties
4718 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4719 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4720 NewCall->setCallingConv(CI->getCallingConv());
4721 NewCall->setAttributes(CI->getAttributes());
4722 NewCall->setDebugLoc(CI->getDebugLoc());
4723 NewCall->copyMetadata(*CI);
4724 return NewCall;
4725 };
4726
4727 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4728 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4729 "intrinsic should have 7 arguments");
4730 Type *T1 = CI->getArgOperand(4)->getType();
4731 Type *T2 = CI->getArgOperand(1)->getType();
4732 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4733 }
4734 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4735 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4736 "intrinsic should have 8 arguments");
4737 Type *T1 = CI->getArgOperand(4)->getType();
4738 Type *T2 = CI->getArgOperand(1)->getType();
4739 Type *T3 = CI->getArgOperand(3)->getType();
4740 Type *T4 = CI->getArgOperand(5)->getType();
4741 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4742 }
4743
4744 switch (F->getIntrinsicID()) {
4745 default:
4746 break;
4747 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4748 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4749 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4750 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4751 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4752 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4753 // Drop src0 and src1 modifiers.
4754 const Value *Op0 = CI->getArgOperand(0);
4755 const Value *Op2 = CI->getArgOperand(2);
4756 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4757 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4758 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4759 if (!ModA->isZero() || !ModB->isZero())
4760 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4761
4763 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4764 Args.push_back(CI->getArgOperand(I));
4765
4766 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4767 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4768 Overloads.push_back(Args[3]->getType());
4770 F->getParent(), F->getIntrinsicID(), Overloads);
4771
4773 CI->getOperandBundlesAsDefs(Bundles);
4774
4775 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4776 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4777 NewCall->setCallingConv(CI->getCallingConv());
4778 NewCall->setAttributes(CI->getAttributes());
4779 NewCall->setDebugLoc(CI->getDebugLoc());
4780 NewCall->copyMetadata(*CI);
4781 NewCall->takeName(CI);
4782 return NewCall;
4783 }
4784 }
4785
4786 AtomicRMWInst::BinOp RMWOp =
4788 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4789 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4790 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4791 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4792 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4793 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4794 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4795 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4796 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4797 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4798 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4799 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4800 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4801
4802 unsigned NumOperands = CI->getNumOperands();
4803 if (NumOperands < 3) // Malformed bitcode.
4804 return nullptr;
4805
4806 Value *Ptr = CI->getArgOperand(0);
4807 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4808 if (!PtrTy) // Malformed.
4809 return nullptr;
4810
4811 Value *Val = CI->getArgOperand(1);
4812 if (Val->getType() != CI->getType()) // Malformed.
4813 return nullptr;
4814
4815 ConstantInt *OrderArg = nullptr;
4816 bool IsVolatile = false;
4817
4818 // These should have 5 arguments (plus the callee). A separate version of the
4819 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4820 if (NumOperands > 3)
4821 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4822
4823 // Ignore scope argument at 3
4824
4825 if (NumOperands > 5) {
4826 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4827 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4828 }
4829
4831 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4832 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4835
4836 LLVMContext &Ctx = F->getContext();
4837
4838 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4839 Type *RetTy = CI->getType();
4840 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4841 if (VT->getElementType()->isIntegerTy(16)) {
4842 VectorType *AsBF16 =
4843 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4844 Val = Builder.CreateBitCast(Val, AsBF16);
4845 }
4846 }
4847
4848 // The scope argument never really worked correctly. Use agent as the most
4849 // conservative option which should still always produce the instruction.
4850 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4851 AtomicRMWInst *RMW =
4852 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4853
4854 unsigned AddrSpace = PtrTy->getAddressSpace();
4855 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4856 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4857 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4858 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4859 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4860 }
4861
4862 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4863 MDBuilder MDB(F->getContext());
4864 MDNode *RangeNotPrivate =
4867 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4868 }
4869
4870 if (IsVolatile)
4871 RMW->setVolatile(true);
4872
4873 return Builder.CreateBitCast(RMW, RetTy);
4874}
4875
4876/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4877/// plain MDNode, as it's the verifier's job to check these are the correct
4878/// types later.
4879static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4880 if (Op < CI->arg_size()) {
4881 if (MetadataAsValue *MAV =
4883 Metadata *MD = MAV->getMetadata();
4884 return dyn_cast_if_present<MDNode>(MD);
4885 }
4886 }
4887 return nullptr;
4888}
4889
4890/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4891static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4892 if (Op < CI->arg_size())
4894 return MAV->getMetadata();
4895 return nullptr;
4896}
4897
4899 // The MDNode attached to this instruction might not be the correct type,
4900 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4901 return I->getDebugLoc().getAsMDNode();
4902}
4903
4904/// Convert debug intrinsic calls to non-instruction debug records.
4905/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4906/// \p CI - The debug intrinsic call.
4908 DbgRecord *DR = nullptr;
4909 if (Name == "label") {
4911 CI->getDebugLoc());
4912 } else if (Name == "assign") {
4915 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4916 unwrapMAVMetadataOp(CI, 4),
4917 /*The address is a Value ref, it will be stored as a Metadata */
4918 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4919 } else if (Name == "declare") {
4922 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4923 getDebugLocSafe(CI));
4924 } else if (Name == "addr") {
4925 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4926 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4927 // Don't try to add something to the expression if it's not an expression.
4928 // Instead, allow the verifier to fail later.
4929 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4930 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4931 }
4934 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4935 getDebugLocSafe(CI));
4936 } else if (Name == "value") {
4937 // An old version of dbg.value had an extra offset argument.
4938 unsigned VarOp = 1;
4939 unsigned ExprOp = 2;
4940 if (CI->arg_size() == 4) {
4942 // Nonzero offset dbg.values get dropped without a replacement.
4943 if (!Offset || !Offset->isNullValue())
4944 return;
4945 VarOp = 2;
4946 ExprOp = 3;
4947 }
4950 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4951 nullptr, getDebugLocSafe(CI));
4952 }
4953 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4954 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4955}
4956
4959 if (!Offset)
4960 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4961 int64_t OffsetVal = Offset->getSExtValue();
4962 return Builder.CreateIntrinsic(OffsetVal >= 0
4963 ? Intrinsic::vector_splice_left
4964 : Intrinsic::vector_splice_right,
4965 CI->getType(),
4966 {CI->getArgOperand(0), CI->getArgOperand(1),
4967 Builder.getInt32(std::abs(OffsetVal))});
4968}
4969
4971 Function *F, IRBuilder<> &Builder) {
4972 if (Name.starts_with("to.fp16")) {
4973 Value *Cast =
4974 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4975 return Builder.CreateBitCast(Cast, CI->getType());
4976 }
4977
4978 if (Name.starts_with("from.fp16")) {
4979 Value *Cast =
4980 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4981 return Builder.CreateFPExt(Cast, CI->getType());
4982 }
4983
4984 return nullptr;
4985}
4986
4987/// Upgrade a call to an old intrinsic. All argument and return casting must be
4988/// provided to seamlessly integrate with existing context.
4990 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4991 // checks the callee's function type matches. It's likely we need to handle
4992 // type changes here.
4994 if (!F)
4995 return;
4996
4997 LLVMContext &C = CI->getContext();
4998 IRBuilder<> Builder(C);
4999 if (isa<FPMathOperator>(CI))
5000 Builder.setFastMathFlags(CI->getFastMathFlags());
5001 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
5002
5003 if (!NewFn) {
5004 // Get the Function's name.
5005 StringRef Name = F->getName();
5006 if (!Name.consume_front("llvm."))
5007 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5008
5009 bool IsX86 = Name.consume_front("x86.");
5010 bool IsNVVM = Name.consume_front("nvvm.");
5011 bool IsAArch64 = Name.consume_front("aarch64.");
5012 bool IsARM = Name.consume_front("arm.");
5013 bool IsAMDGCN = Name.consume_front("amdgcn.");
5014 bool IsDbg = Name.consume_front("dbg.");
5015 bool IsOldSplice =
5016 (Name.consume_front("experimental.vector.splice") ||
5017 Name.consume_front("vector.splice")) &&
5018 !(Name.starts_with(".left") || Name.starts_with(".right"));
5019 Value *Rep = nullptr;
5020
5021 if (!IsX86 && Name == "stackprotectorcheck") {
5022 Rep = nullptr;
5023 } else if (IsNVVM) {
5024 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5025 } else if (IsX86) {
5026 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5027 } else if (IsAArch64) {
5028 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5029 } else if (IsARM) {
5030 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5031 } else if (IsAMDGCN) {
5032 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5033 } else if (IsDbg) {
5035 } else if (IsOldSplice) {
5036 Rep = upgradeVectorSplice(CI, Builder);
5037 } else if (Name.consume_front("convert.")) {
5038 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5039 } else {
5040 llvm_unreachable("Unknown function for CallBase upgrade.");
5041 }
5042
5043 if (Rep)
5044 CI->replaceAllUsesWith(Rep);
5045 CI->eraseFromParent();
5046 return;
5047 }
5048
5049 const auto &DefaultCase = [&]() -> void {
5050 if (F == NewFn)
5051 return;
5052
5053 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5054 // Handle generic mangling change.
5055 assert(
5056 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5057 "Unknown function for CallBase upgrade and isn't just a name change");
5058 CI->setCalledFunction(NewFn);
5059 return;
5060 }
5061
5062 // This must be an upgrade from a named to a literal struct.
5063 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5064 assert(OldST != NewFn->getReturnType() &&
5065 "Return type must have changed");
5066 assert(OldST->getNumElements() ==
5067 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5068 "Must have same number of elements");
5069
5070 SmallVector<Value *> Args(CI->args());
5071 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5072 NewCI->setAttributes(CI->getAttributes());
5073 Value *Res = PoisonValue::get(OldST);
5074 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5075 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5076 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5077 }
5078 CI->replaceAllUsesWith(Res);
5079 CI->eraseFromParent();
5080 return;
5081 }
5082
5083 // We're probably about to produce something invalid. Let the verifier catch
5084 // it instead of dying here.
5085 CI->setCalledOperand(
5087 return;
5088 };
5089 CallInst *NewCall = nullptr;
5090 switch (NewFn->getIntrinsicID()) {
5091 default: {
5092 DefaultCase();
5093 return;
5094 }
5095 case Intrinsic::arm_neon_vst1:
5096 case Intrinsic::arm_neon_vst2:
5097 case Intrinsic::arm_neon_vst3:
5098 case Intrinsic::arm_neon_vst4:
5099 case Intrinsic::arm_neon_vst2lane:
5100 case Intrinsic::arm_neon_vst3lane:
5101 case Intrinsic::arm_neon_vst4lane: {
5102 SmallVector<Value *, 4> Args(CI->args());
5103 NewCall = Builder.CreateCall(NewFn, Args);
5104 break;
5105 }
5106 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5107 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5108 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5109 LLVMContext &Ctx = F->getParent()->getContext();
5110 SmallVector<Value *, 4> Args(CI->args());
5111 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5112 cast<ConstantInt>(Args[3])->getZExtValue());
5113 NewCall = Builder.CreateCall(NewFn, Args);
5114 break;
5115 }
5116 case Intrinsic::aarch64_sve_ld3_sret:
5117 case Intrinsic::aarch64_sve_ld4_sret:
5118 case Intrinsic::aarch64_sve_ld2_sret: {
5119 // Is this a trivial remangle of the name to support ptr address spaces?
5120 if (isa<StructType>(F->getReturnType())) {
5121 DefaultCase();
5122 return;
5123 }
5124
5125 StringRef Name = F->getName();
5126 Name = Name.substr(5);
5127 unsigned N = StringSwitch<unsigned>(Name)
5128 .StartsWith("aarch64.sve.ld2", 2)
5129 .StartsWith("aarch64.sve.ld3", 3)
5130 .StartsWith("aarch64.sve.ld4", 4)
5131 .Default(0);
5132 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5133 unsigned MinElts = RetTy->getMinNumElements() / N;
5134 SmallVector<Value *, 2> Args(CI->args());
5135 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5136 Value *Ret = llvm::PoisonValue::get(RetTy);
5137 for (unsigned I = 0; I < N; I++) {
5138 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5139 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5140 }
5141 NewCall = dyn_cast<CallInst>(Ret);
5142 break;
5143 }
5144
5145 case Intrinsic::coro_end: {
5146 SmallVector<Value *, 3> Args(CI->args());
5147 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5148 NewCall = Builder.CreateCall(NewFn, Args);
5149 break;
5150 }
5151
5152 case Intrinsic::vector_extract: {
5153 StringRef Name = F->getName();
5154 Name = Name.substr(5); // Strip llvm
5155 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5156 DefaultCase();
5157 return;
5158 }
5159 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5160 unsigned MinElts = RetTy->getMinNumElements();
5161 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5162 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5163 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5164 break;
5165 }
5166
5167 case Intrinsic::vector_insert: {
5168 StringRef Name = F->getName();
5169 Name = Name.substr(5);
5170 if (!Name.starts_with("aarch64.sve.tuple")) {
5171 DefaultCase();
5172 return;
5173 }
5174 if (Name.starts_with("aarch64.sve.tuple.set")) {
5175 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5176 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5177 Value *NewIdx =
5178 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5179 NewCall = Builder.CreateCall(
5180 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5181 break;
5182 }
5183 if (Name.starts_with("aarch64.sve.tuple.create")) {
5184 unsigned N = StringSwitch<unsigned>(Name)
5185 .StartsWith("aarch64.sve.tuple.create2", 2)
5186 .StartsWith("aarch64.sve.tuple.create3", 3)
5187 .StartsWith("aarch64.sve.tuple.create4", 4)
5188 .Default(0);
5189 assert(N > 1 && "Create is expected to be between 2-4");
5190 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5191 Value *Ret = llvm::PoisonValue::get(RetTy);
5192 unsigned MinElts = RetTy->getMinNumElements() / N;
5193 for (unsigned I = 0; I < N; I++) {
5194 Value *V = CI->getArgOperand(I);
5195 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5196 }
5197 NewCall = dyn_cast<CallInst>(Ret);
5198 }
5199 break;
5200 }
5201
5202 case Intrinsic::arm_neon_bfdot:
5203 case Intrinsic::arm_neon_bfmmla:
5204 case Intrinsic::arm_neon_bfmlalb:
5205 case Intrinsic::arm_neon_bfmlalt:
5206 case Intrinsic::aarch64_neon_bfdot:
5207 case Intrinsic::aarch64_neon_bfmmla:
5208 case Intrinsic::aarch64_neon_bfmlalb:
5209 case Intrinsic::aarch64_neon_bfmlalt: {
5211 assert(CI->arg_size() == 3 &&
5212 "Mismatch between function args and call args");
5213 size_t OperandWidth =
5215 assert((OperandWidth == 64 || OperandWidth == 128) &&
5216 "Unexpected operand width");
5217 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5218 auto Iter = CI->args().begin();
5219 Args.push_back(*Iter++);
5220 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5221 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5222 NewCall = Builder.CreateCall(NewFn, Args);
5223 break;
5224 }
5225
5226 case Intrinsic::bitreverse:
5227 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5228 break;
5229
5230 case Intrinsic::ctlz:
5231 case Intrinsic::cttz: {
5232 if (CI->arg_size() != 1) {
5233 DefaultCase();
5234 return;
5235 }
5236
5237 NewCall =
5238 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5239 break;
5240 }
5241
5242 case Intrinsic::objectsize: {
5243 Value *NullIsUnknownSize =
5244 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5245 Value *Dynamic =
5246 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5247 NewCall = Builder.CreateCall(
5248 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5249 break;
5250 }
5251
5252 case Intrinsic::ctpop:
5253 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5254 break;
5255 case Intrinsic::dbg_value: {
5256 StringRef Name = F->getName();
5257 Name = Name.substr(5); // Strip llvm.
5258 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5259 if (Name.starts_with("dbg.addr")) {
5261 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5262 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5263 NewCall =
5264 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5265 MetadataAsValue::get(C, Expr)});
5266 break;
5267 }
5268
5269 // Upgrade from the old version that had an extra offset argument.
5270 assert(CI->arg_size() == 4);
5271 // Drop nonzero offsets instead of attempting to upgrade them.
5273 if (Offset->isNullValue()) {
5274 NewCall = Builder.CreateCall(
5275 NewFn,
5276 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5277 break;
5278 }
5279 CI->eraseFromParent();
5280 return;
5281 }
5282
5283 case Intrinsic::ptr_annotation:
5284 // Upgrade from versions that lacked the annotation attribute argument.
5285 if (CI->arg_size() != 4) {
5286 DefaultCase();
5287 return;
5288 }
5289
5290 // Create a new call with an added null annotation attribute argument.
5291 NewCall = Builder.CreateCall(
5292 NewFn,
5293 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5294 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5295 NewCall->takeName(CI);
5296 CI->replaceAllUsesWith(NewCall);
5297 CI->eraseFromParent();
5298 return;
5299
5300 case Intrinsic::var_annotation:
5301 // Upgrade from versions that lacked the annotation attribute argument.
5302 if (CI->arg_size() != 4) {
5303 DefaultCase();
5304 return;
5305 }
5306 // Create a new call with an added null annotation attribute argument.
5307 NewCall = Builder.CreateCall(
5308 NewFn,
5309 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5310 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5311 NewCall->takeName(CI);
5312 CI->replaceAllUsesWith(NewCall);
5313 CI->eraseFromParent();
5314 return;
5315
5316 case Intrinsic::riscv_aes32dsi:
5317 case Intrinsic::riscv_aes32dsmi:
5318 case Intrinsic::riscv_aes32esi:
5319 case Intrinsic::riscv_aes32esmi:
5320 case Intrinsic::riscv_sm4ks:
5321 case Intrinsic::riscv_sm4ed: {
5322 // The last argument to these intrinsics used to be i8 and changed to i32.
5323 // The type overload for sm4ks and sm4ed was removed.
5324 Value *Arg2 = CI->getArgOperand(2);
5325 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5326 return;
5327
5328 Value *Arg0 = CI->getArgOperand(0);
5329 Value *Arg1 = CI->getArgOperand(1);
5330 if (CI->getType()->isIntegerTy(64)) {
5331 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5332 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5333 }
5334
5335 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5336 cast<ConstantInt>(Arg2)->getZExtValue());
5337
5338 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5339 Value *Res = NewCall;
5340 if (Res->getType() != CI->getType())
5341 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5342 NewCall->takeName(CI);
5343 CI->replaceAllUsesWith(Res);
5344 CI->eraseFromParent();
5345 return;
5346 }
5347 case Intrinsic::nvvm_mapa_shared_cluster: {
5348 // Create a new call with the correct address space.
5349 NewCall =
5350 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5351 Value *Res = NewCall;
5352 Res = Builder.CreateAddrSpaceCast(
5353 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5354 NewCall->takeName(CI);
5355 CI->replaceAllUsesWith(Res);
5356 CI->eraseFromParent();
5357 return;
5358 }
5359 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5360 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5361 // Create a new call with the correct address space.
5362 SmallVector<Value *, 4> Args(CI->args());
5363 Args[0] = Builder.CreateAddrSpaceCast(
5364 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5365
5366 NewCall = Builder.CreateCall(NewFn, Args);
5367 NewCall->takeName(CI);
5368 CI->replaceAllUsesWith(NewCall);
5369 CI->eraseFromParent();
5370 return;
5371 }
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5374 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5375 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5376 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5377 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5378 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5379 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5380 SmallVector<Value *, 16> Args(CI->args());
5381
5382 // Create AddrSpaceCast to shared_cluster if needed.
5383 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5384 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5386 Args[0] = Builder.CreateAddrSpaceCast(
5387 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5388
5389 // Attach the flag argument for cta_group, with a
5390 // default value of 0. This handles case (2) in
5391 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5392 size_t NumArgs = CI->arg_size();
5393 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5394 if (!FlagArg->getType()->isIntegerTy(1))
5395 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5396
5397 NewCall = Builder.CreateCall(NewFn, Args);
5398 NewCall->takeName(CI);
5399 CI->replaceAllUsesWith(NewCall);
5400 CI->eraseFromParent();
5401 return;
5402 }
5403 case Intrinsic::riscv_sha256sig0:
5404 case Intrinsic::riscv_sha256sig1:
5405 case Intrinsic::riscv_sha256sum0:
5406 case Intrinsic::riscv_sha256sum1:
5407 case Intrinsic::riscv_sm3p0:
5408 case Intrinsic::riscv_sm3p1: {
5409 // The last argument to these intrinsics used to be i8 and changed to i32.
5410 // The type overload for sm4ks and sm4ed was removed.
5411 if (!CI->getType()->isIntegerTy(64))
5412 return;
5413
5414 Value *Arg =
5415 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5416
5417 NewCall = Builder.CreateCall(NewFn, Arg);
5418 Value *Res =
5419 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5420 NewCall->takeName(CI);
5421 CI->replaceAllUsesWith(Res);
5422 CI->eraseFromParent();
5423 return;
5424 }
5425
5426 case Intrinsic::x86_xop_vfrcz_ss:
5427 case Intrinsic::x86_xop_vfrcz_sd:
5428 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5429 break;
5430
5431 case Intrinsic::x86_xop_vpermil2pd:
5432 case Intrinsic::x86_xop_vpermil2ps:
5433 case Intrinsic::x86_xop_vpermil2pd_256:
5434 case Intrinsic::x86_xop_vpermil2ps_256: {
5435 SmallVector<Value *, 4> Args(CI->args());
5436 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5437 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5438 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5439 NewCall = Builder.CreateCall(NewFn, Args);
5440 break;
5441 }
5442
5443 case Intrinsic::x86_sse41_ptestc:
5444 case Intrinsic::x86_sse41_ptestz:
5445 case Intrinsic::x86_sse41_ptestnzc: {
5446 // The arguments for these intrinsics used to be v4f32, and changed
5447 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5448 // So, the only thing required is a bitcast for both arguments.
5449 // First, check the arguments have the old type.
5450 Value *Arg0 = CI->getArgOperand(0);
5451 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5452 return;
5453
5454 // Old intrinsic, add bitcasts
5455 Value *Arg1 = CI->getArgOperand(1);
5456
5457 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5458
5459 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5460 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5461
5462 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5463 break;
5464 }
5465
5466 case Intrinsic::x86_rdtscp: {
5467 // This used to take 1 arguments. If we have no arguments, it is already
5468 // upgraded.
5469 if (CI->getNumOperands() == 0)
5470 return;
5471
5472 NewCall = Builder.CreateCall(NewFn);
5473 // Extract the second result and store it.
5474 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5475 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5476 // Replace the original call result with the first result of the new call.
5477 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5478
5479 NewCall->takeName(CI);
5480 CI->replaceAllUsesWith(TSC);
5481 CI->eraseFromParent();
5482 return;
5483 }
5484
5485 case Intrinsic::x86_sse41_insertps:
5486 case Intrinsic::x86_sse41_dppd:
5487 case Intrinsic::x86_sse41_dpps:
5488 case Intrinsic::x86_sse41_mpsadbw:
5489 case Intrinsic::x86_avx_dp_ps_256:
5490 case Intrinsic::x86_avx2_mpsadbw: {
5491 // Need to truncate the last argument from i32 to i8 -- this argument models
5492 // an inherently 8-bit immediate operand to these x86 instructions.
5493 SmallVector<Value *, 4> Args(CI->args());
5494
5495 // Replace the last argument with a trunc.
5496 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5497 NewCall = Builder.CreateCall(NewFn, Args);
5498 break;
5499 }
5500
5501 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5502 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5503 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5504 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5505 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5506 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5507 SmallVector<Value *, 4> Args(CI->args());
5508 unsigned NumElts =
5509 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5510 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5511
5512 NewCall = Builder.CreateCall(NewFn, Args);
5513 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5514
5515 NewCall->takeName(CI);
5516 CI->replaceAllUsesWith(Res);
5517 CI->eraseFromParent();
5518 return;
5519 }
5520
5521 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5522 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5523 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5524 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5525 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5526 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5527 SmallVector<Value *, 4> Args(CI->args());
5528 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5529 if (NewFn->getIntrinsicID() ==
5530 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5531 Args[1] = Builder.CreateBitCast(
5532 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5533
5534 NewCall = Builder.CreateCall(NewFn, Args);
5535 Value *Res = Builder.CreateBitCast(
5536 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5537
5538 NewCall->takeName(CI);
5539 CI->replaceAllUsesWith(Res);
5540 CI->eraseFromParent();
5541 return;
5542 }
5543 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5544 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5545 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5546 SmallVector<Value *, 4> Args(CI->args());
5547 unsigned NumElts =
5548 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5549 Args[1] = Builder.CreateBitCast(
5550 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5551 Args[2] = Builder.CreateBitCast(
5552 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5553
5554 NewCall = Builder.CreateCall(NewFn, Args);
5555 break;
5556 }
5557
5558 case Intrinsic::thread_pointer: {
5559 NewCall = Builder.CreateCall(NewFn, {});
5560 break;
5561 }
5562
5563 case Intrinsic::memcpy:
5564 case Intrinsic::memmove:
5565 case Intrinsic::memset: {
5566 // We have to make sure that the call signature is what we're expecting.
5567 // We only want to change the old signatures by removing the alignment arg:
5568 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5569 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5570 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5571 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5572 // Note: i8*'s in the above can be any pointer type
5573 if (CI->arg_size() != 5) {
5574 DefaultCase();
5575 return;
5576 }
5577 // Remove alignment argument (3), and add alignment attributes to the
5578 // dest/src pointers.
5579 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5580 CI->getArgOperand(2), CI->getArgOperand(4)};
5581 NewCall = Builder.CreateCall(NewFn, Args);
5582 AttributeList OldAttrs = CI->getAttributes();
5583 AttributeList NewAttrs = AttributeList::get(
5584 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5585 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5586 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5587 NewCall->setAttributes(NewAttrs);
5588 auto *MemCI = cast<MemIntrinsic>(NewCall);
5589 // All mem intrinsics support dest alignment.
5591 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5592 // Memcpy/Memmove also support source alignment.
5593 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5594 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5595 break;
5596 }
5597
5598 case Intrinsic::masked_load:
5599 case Intrinsic::masked_gather:
5600 case Intrinsic::masked_store:
5601 case Intrinsic::masked_scatter: {
5602 if (CI->arg_size() != 4) {
5603 DefaultCase();
5604 return;
5605 }
5606
5607 auto GetMaybeAlign = [](Value *Op) {
5608 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5609 uint64_t Val = CI->getZExtValue();
5610 if (Val == 0)
5611 return MaybeAlign();
5612 if (isPowerOf2_64(Val))
5613 return MaybeAlign(Val);
5614 }
5615 reportFatalUsageError("Invalid alignment argument");
5616 };
5617 auto GetAlign = [&](Value *Op) {
5618 MaybeAlign Align = GetMaybeAlign(Op);
5619 if (Align)
5620 return *Align;
5621 reportFatalUsageError("Invalid zero alignment argument");
5622 };
5623
5624 const DataLayout &DL = CI->getDataLayout();
5625 switch (NewFn->getIntrinsicID()) {
5626 case Intrinsic::masked_load:
5627 NewCall = Builder.CreateMaskedLoad(
5628 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5629 CI->getArgOperand(2), CI->getArgOperand(3));
5630 break;
5631 case Intrinsic::masked_gather:
5632 NewCall = Builder.CreateMaskedGather(
5633 CI->getType(), CI->getArgOperand(0),
5634 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5635 CI->getType()->getScalarType()),
5636 CI->getArgOperand(2), CI->getArgOperand(3));
5637 break;
5638 case Intrinsic::masked_store:
5639 NewCall = Builder.CreateMaskedStore(
5640 CI->getArgOperand(0), CI->getArgOperand(1),
5641 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5642 break;
5643 case Intrinsic::masked_scatter:
5644 NewCall = Builder.CreateMaskedScatter(
5645 CI->getArgOperand(0), CI->getArgOperand(1),
5646 DL.getValueOrABITypeAlignment(
5647 GetMaybeAlign(CI->getArgOperand(2)),
5648 CI->getArgOperand(0)->getType()->getScalarType()),
5649 CI->getArgOperand(3));
5650 break;
5651 default:
5652 llvm_unreachable("Unexpected intrinsic ID");
5653 }
5654 // Previous metadata is still valid.
5655 NewCall->copyMetadata(*CI);
5656 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5657 break;
5658 }
5659
5660 case Intrinsic::lifetime_start:
5661 case Intrinsic::lifetime_end: {
5662 if (CI->arg_size() != 2) {
5663 DefaultCase();
5664 return;
5665 }
5666
5667 Value *Ptr = CI->getArgOperand(1);
5668 // Try to strip pointer casts, such that the lifetime works on an alloca.
5669 Ptr = Ptr->stripPointerCasts();
5670 if (isa<AllocaInst>(Ptr)) {
5671 // Don't use NewFn, as we might have looked through an addrspacecast.
5672 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5673 NewCall = Builder.CreateLifetimeStart(Ptr);
5674 else
5675 NewCall = Builder.CreateLifetimeEnd(Ptr);
5676 break;
5677 }
5678
5679 // Otherwise remove the lifetime marker.
5680 CI->eraseFromParent();
5681 return;
5682 }
5683
5684 case Intrinsic::x86_avx512_vpdpbusd_128:
5685 case Intrinsic::x86_avx512_vpdpbusd_256:
5686 case Intrinsic::x86_avx512_vpdpbusd_512:
5687 case Intrinsic::x86_avx512_vpdpbusds_128:
5688 case Intrinsic::x86_avx512_vpdpbusds_256:
5689 case Intrinsic::x86_avx512_vpdpbusds_512:
5690 case Intrinsic::x86_avx2_vpdpbssd_128:
5691 case Intrinsic::x86_avx2_vpdpbssd_256:
5692 case Intrinsic::x86_avx10_vpdpbssd_512:
5693 case Intrinsic::x86_avx2_vpdpbssds_128:
5694 case Intrinsic::x86_avx2_vpdpbssds_256:
5695 case Intrinsic::x86_avx10_vpdpbssds_512:
5696 case Intrinsic::x86_avx2_vpdpbsud_128:
5697 case Intrinsic::x86_avx2_vpdpbsud_256:
5698 case Intrinsic::x86_avx10_vpdpbsud_512:
5699 case Intrinsic::x86_avx2_vpdpbsuds_128:
5700 case Intrinsic::x86_avx2_vpdpbsuds_256:
5701 case Intrinsic::x86_avx10_vpdpbsuds_512:
5702 case Intrinsic::x86_avx2_vpdpbuud_128:
5703 case Intrinsic::x86_avx2_vpdpbuud_256:
5704 case Intrinsic::x86_avx10_vpdpbuud_512:
5705 case Intrinsic::x86_avx2_vpdpbuuds_128:
5706 case Intrinsic::x86_avx2_vpdpbuuds_256:
5707 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5708 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5709 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5710 CI->getArgOperand(2)};
5711 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5712 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5713 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5714
5715 NewCall = Builder.CreateCall(NewFn, Args);
5716 break;
5717 }
5718 case Intrinsic::x86_avx512_vpdpwssd_128:
5719 case Intrinsic::x86_avx512_vpdpwssd_256:
5720 case Intrinsic::x86_avx512_vpdpwssd_512:
5721 case Intrinsic::x86_avx512_vpdpwssds_128:
5722 case Intrinsic::x86_avx512_vpdpwssds_256:
5723 case Intrinsic::x86_avx512_vpdpwssds_512:
5724 case Intrinsic::x86_avx2_vpdpwsud_128:
5725 case Intrinsic::x86_avx2_vpdpwsud_256:
5726 case Intrinsic::x86_avx10_vpdpwsud_512:
5727 case Intrinsic::x86_avx2_vpdpwsuds_128:
5728 case Intrinsic::x86_avx2_vpdpwsuds_256:
5729 case Intrinsic::x86_avx10_vpdpwsuds_512:
5730 case Intrinsic::x86_avx2_vpdpwusd_128:
5731 case Intrinsic::x86_avx2_vpdpwusd_256:
5732 case Intrinsic::x86_avx10_vpdpwusd_512:
5733 case Intrinsic::x86_avx2_vpdpwusds_128:
5734 case Intrinsic::x86_avx2_vpdpwusds_256:
5735 case Intrinsic::x86_avx10_vpdpwusds_512:
5736 case Intrinsic::x86_avx2_vpdpwuud_128:
5737 case Intrinsic::x86_avx2_vpdpwuud_256:
5738 case Intrinsic::x86_avx10_vpdpwuud_512:
5739 case Intrinsic::x86_avx2_vpdpwuuds_128:
5740 case Intrinsic::x86_avx2_vpdpwuuds_256:
5741 case Intrinsic::x86_avx10_vpdpwuuds_512:
5742 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5743 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5744 CI->getArgOperand(2)};
5745 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5746 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5747 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5748
5749 NewCall = Builder.CreateCall(NewFn, Args);
5750 break;
5751 }
5752 assert(NewCall && "Should have either set this variable or returned through "
5753 "the default case");
5754 NewCall->takeName(CI);
5755 CI->replaceAllUsesWith(NewCall);
5756 CI->eraseFromParent();
5757}
5758
5760 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5761
5762 // Check if this function should be upgraded and get the replacement function
5763 // if there is one.
5764 Function *NewFn;
5765 if (UpgradeIntrinsicFunction(F, NewFn)) {
5766 // Replace all users of the old function with the new function or new
5767 // instructions. This is not a range loop because the call is deleted.
5768 for (User *U : make_early_inc_range(F->users()))
5769 if (CallBase *CB = dyn_cast<CallBase>(U))
5770 UpgradeIntrinsicCall(CB, NewFn);
5771
5772 // Remove old function, no longer used, from the module.
5773 if (F != NewFn)
5774 F->eraseFromParent();
5775 }
5776}
5777
5779 const unsigned NumOperands = MD.getNumOperands();
5780 if (NumOperands == 0)
5781 return &MD; // Invalid, punt to a verifier error.
5782
5783 // Check if the tag uses struct-path aware TBAA format.
5784 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5785 return &MD;
5786
5787 auto &Context = MD.getContext();
5788 if (NumOperands == 3) {
5789 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5790 MDNode *ScalarType = MDNode::get(Context, Elts);
5791 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5792 Metadata *Elts2[] = {ScalarType, ScalarType,
5795 MD.getOperand(2)};
5796 return MDNode::get(Context, Elts2);
5797 }
5798 // Create a MDNode <MD, MD, offset 0>
5800 Type::getInt64Ty(Context)))};
5801 return MDNode::get(Context, Elts);
5802}
5803
5805 Instruction *&Temp) {
5806 if (Opc != Instruction::BitCast)
5807 return nullptr;
5808
5809 Temp = nullptr;
5810 Type *SrcTy = V->getType();
5811 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5812 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5813 LLVMContext &Context = V->getContext();
5814
5815 // We have no information about target data layout, so we assume that
5816 // the maximum pointer size is 64bit.
5817 Type *MidTy = Type::getInt64Ty(Context);
5818 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5819
5820 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5821 }
5822
5823 return nullptr;
5824}
5825
5827 if (Opc != Instruction::BitCast)
5828 return nullptr;
5829
5830 Type *SrcTy = C->getType();
5831 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5832 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5833 LLVMContext &Context = C->getContext();
5834
5835 // We have no information about target data layout, so we assume that
5836 // the maximum pointer size is 64bit.
5837 Type *MidTy = Type::getInt64Ty(Context);
5838
5840 DestTy);
5841 }
5842
5843 return nullptr;
5844}
5845
5846/// Check the debug info version number, if it is out-dated, drop the debug
5847/// info. Return true if module is modified.
5850 return false;
5851
5852 llvm::TimeTraceScope timeScope("Upgrade debug info");
5853 // We need to get metadata before the module is verified (i.e., getModuleFlag
5854 // makes assumptions that we haven't verified yet). Carefully extract the flag
5855 // from the metadata.
5856 unsigned Version = 0;
5857 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5858 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5859 if (Flag->getNumOperands() < 3)
5860 return false;
5861 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5862 return K->getString() == "Debug Info Version";
5863 return false;
5864 });
5865 if (OpIt != ModFlags->op_end()) {
5866 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5867 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5868 Version = CI->getZExtValue();
5869 }
5870 }
5871
5873 bool BrokenDebugInfo = false;
5874 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5875 report_fatal_error("Broken module found, compilation aborted!");
5876 if (!BrokenDebugInfo)
5877 // Everything is ok.
5878 return false;
5879 else {
5880 // Diagnose malformed debug info.
5882 M.getContext().diagnose(Diag);
5883 }
5884 }
5885 bool Modified = StripDebugInfo(M);
5887 // Diagnose a version mismatch.
5889 M.getContext().diagnose(DiagVersion);
5890 }
5891 return Modified;
5892}
5893
5894static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5895 GlobalValue *GV, const Metadata *V) {
5896 Function *F = cast<Function>(GV);
5897
5898 constexpr StringLiteral DefaultValue = "1";
5899 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5900 unsigned Length = 0;
5901
5902 if (F->hasFnAttribute(Attr)) {
5903 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5904 // parse these elements placing them into Vect3
5905 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5906 for (; Length < 3 && !S.empty(); Length++) {
5907 auto [Part, Rest] = S.split(',');
5908 Vect3[Length] = Part.trim();
5909 S = Rest;
5910 }
5911 }
5912
5913 const unsigned Dim = DimC - 'x';
5914 assert(Dim < 3 && "Unexpected dim char");
5915
5916 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5917
5918 // local variable required for StringRef in Vect3 to point to.
5919 const std::string VStr = llvm::utostr(VInt);
5920 Vect3[Dim] = VStr;
5921 Length = std::max(Length, Dim + 1);
5922
5923 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5924 F->addFnAttr(Attr, NewAttr);
5925}
5926
5927static inline bool isXYZ(StringRef S) {
5928 return S == "x" || S == "y" || S == "z";
5929}
5930
5932 const Metadata *V) {
5933 if (K == "kernel") {
5935 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5936 return true;
5937 }
5938 if (K == "align") {
5939 // V is a bitfeild specifying two 16-bit values. The alignment value is
5940 // specfied in low 16-bits, The index is specified in the high bits. For the
5941 // index, 0 indicates the return value while higher values correspond to
5942 // each parameter (idx = param + 1).
5943 const uint64_t AlignIdxValuePair =
5944 mdconst::extract<ConstantInt>(V)->getZExtValue();
5945 const unsigned Idx = (AlignIdxValuePair >> 16);
5946 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5947 cast<Function>(GV)->addAttributeAtIndex(
5948 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5949 return true;
5950 }
5951 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5952 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5954 return true;
5955 }
5956 if (K == "minctasm") {
5957 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5958 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
5959 return true;
5960 }
5961 if (K == "maxnreg") {
5962 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5963 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
5964 return true;
5965 }
5966 if (K.consume_front("maxntid") && isXYZ(K)) {
5968 return true;
5969 }
5970 if (K.consume_front("reqntid") && isXYZ(K)) {
5972 return true;
5973 }
5974 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5976 return true;
5977 }
5978 if (K == "grid_constant") {
5979 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
5980 for (const auto &Op : cast<MDNode>(V)->operands()) {
5981 // For some reason, the index is 1-based in the metadata. Good thing we're
5982 // able to auto-upgrade it!
5983 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5984 cast<Function>(GV)->addParamAttr(Index, Attr);
5985 }
5986 return true;
5987 }
5988
5989 return false;
5990}
5991
5993 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5994 if (!NamedMD)
5995 return;
5996
5997 SmallVector<MDNode *, 8> NewNodes;
5999 for (MDNode *MD : NamedMD->operands()) {
6000 if (!SeenNodes.insert(MD).second)
6001 continue;
6002
6003 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
6004 if (!GV)
6005 continue;
6006
6007 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6008
6009 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6010 // Each nvvm.annotations metadata entry will be of the following form:
6011 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6012 // start index = 1, to skip the global variable key
6013 // increment = 2, to skip the value for each property-value pairs
6014 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6015 MDString *K = cast<MDString>(MD->getOperand(j));
6016 const MDOperand &V = MD->getOperand(j + 1);
6017 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6018 if (!Upgraded)
6019 NewOperands.append({K, V});
6020 }
6021
6022 if (NewOperands.size() > 1)
6023 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6024 }
6025
6026 NamedMD->clearOperands();
6027 for (MDNode *N : NewNodes)
6028 NamedMD->addOperand(N);
6029}
6030
6031/// This checks for objc retain release marker which should be upgraded. It
6032/// returns true if module is modified.
6034 bool Changed = false;
6035 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6036 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6037 if (ModRetainReleaseMarker) {
6038 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6039 if (Op) {
6040 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6041 if (ID) {
6042 SmallVector<StringRef, 4> ValueComp;
6043 ID->getString().split(ValueComp, "#");
6044 if (ValueComp.size() == 2) {
6045 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6046 ID = MDString::get(M.getContext(), NewValue);
6047 }
6048 M.addModuleFlag(Module::Error, MarkerKey, ID);
6049 M.eraseNamedMetadata(ModRetainReleaseMarker);
6050 Changed = true;
6051 }
6052 }
6053 }
6054 return Changed;
6055}
6056
6058 // This lambda converts normal function calls to ARC runtime functions to
6059 // intrinsic calls.
6060 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6061 llvm::Intrinsic::ID IntrinsicFunc) {
6062 Function *Fn = M.getFunction(OldFunc);
6063
6064 if (!Fn)
6065 return;
6066
6067 Function *NewFn =
6068 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6069
6070 for (User *U : make_early_inc_range(Fn->users())) {
6072 if (!CI || CI->getCalledFunction() != Fn)
6073 continue;
6074
6075 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6076 FunctionType *NewFuncTy = NewFn->getFunctionType();
6078
6079 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6080 // value to the return type of the old function.
6081 if (NewFuncTy->getReturnType() != CI->getType() &&
6082 !CastInst::castIsValid(Instruction::BitCast, CI,
6083 NewFuncTy->getReturnType()))
6084 continue;
6085
6086 bool InvalidCast = false;
6087
6088 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6089 Value *Arg = CI->getArgOperand(I);
6090
6091 // Bitcast argument to the parameter type of the new function if it's
6092 // not a variadic argument.
6093 if (I < NewFuncTy->getNumParams()) {
6094 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6095 // to the parameter type of the new function.
6096 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6097 NewFuncTy->getParamType(I))) {
6098 InvalidCast = true;
6099 break;
6100 }
6101 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6102 }
6103 Args.push_back(Arg);
6104 }
6105
6106 if (InvalidCast)
6107 continue;
6108
6109 // Create a call instruction that calls the new function.
6110 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6111 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6112 NewCall->takeName(CI);
6113
6114 // Bitcast the return value back to the type of the old call.
6115 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6116
6117 if (!CI->use_empty())
6118 CI->replaceAllUsesWith(NewRetVal);
6119 CI->eraseFromParent();
6120 }
6121
6122 if (Fn->use_empty())
6123 Fn->eraseFromParent();
6124 };
6125
6126 // Unconditionally convert a call to "clang.arc.use" to a call to
6127 // "llvm.objc.clang.arc.use".
6128 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6129
6130 // Upgrade the retain release marker. If there is no need to upgrade
6131 // the marker, that means either the module is already new enough to contain
6132 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6134 return;
6135
6136 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6137 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6138 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6139 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6140 {"objc_autoreleaseReturnValue",
6141 llvm::Intrinsic::objc_autoreleaseReturnValue},
6142 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6143 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6144 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6145 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6146 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6147 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6148 {"objc_release", llvm::Intrinsic::objc_release},
6149 {"objc_retain", llvm::Intrinsic::objc_retain},
6150 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6151 {"objc_retainAutoreleaseReturnValue",
6152 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6153 {"objc_retainAutoreleasedReturnValue",
6154 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6155 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6156 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6157 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6158 {"objc_unsafeClaimAutoreleasedReturnValue",
6159 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6160 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6161 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6162 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6163 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6164 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6165 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6166 {"objc_arc_annotation_topdown_bbstart",
6167 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6168 {"objc_arc_annotation_topdown_bbend",
6169 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6170 {"objc_arc_annotation_bottomup_bbstart",
6171 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6172 {"objc_arc_annotation_bottomup_bbend",
6173 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6174
6175 for (auto &I : RuntimeFuncs)
6176 UpgradeToIntrinsic(I.first, I.second);
6177}
6178
6180 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6181 if (!ModFlags)
6182 return false;
6183
6184 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6185 bool HasSwiftVersionFlag = false;
6186 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6187 uint32_t SwiftABIVersion;
6188 auto Int8Ty = Type::getInt8Ty(M.getContext());
6189 auto Int32Ty = Type::getInt32Ty(M.getContext());
6190
6191 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6192 MDNode *Op = ModFlags->getOperand(I);
6193 if (Op->getNumOperands() != 3)
6194 continue;
6195 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6196 if (!ID)
6197 continue;
6198 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6199 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6200 Type::getInt32Ty(M.getContext()), B)),
6201 MDString::get(M.getContext(), ID->getString()),
6202 Op->getOperand(2)};
6203 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6204 Changed = true;
6205 };
6206
6207 if (ID->getString() == "Objective-C Image Info Version")
6208 HasObjCFlag = true;
6209 if (ID->getString() == "Objective-C Class Properties")
6210 HasClassProperties = true;
6211 // Upgrade PIC from Error/Max to Min.
6212 if (ID->getString() == "PIC Level") {
6213 if (auto *Behavior =
6215 uint64_t V = Behavior->getLimitedValue();
6216 if (V == Module::Error || V == Module::Max)
6217 SetBehavior(Module::Min);
6218 }
6219 }
6220 // Upgrade "PIE Level" from Error to Max.
6221 if (ID->getString() == "PIE Level")
6222 if (auto *Behavior =
6224 if (Behavior->getLimitedValue() == Module::Error)
6225 SetBehavior(Module::Max);
6226
6227 // Upgrade branch protection and return address signing module flags. The
6228 // module flag behavior for these fields were Error and now they are Min.
6229 if (ID->getString() == "branch-target-enforcement" ||
6230 ID->getString().starts_with("sign-return-address")) {
6231 if (auto *Behavior =
6233 if (Behavior->getLimitedValue() == Module::Error) {
6234 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6235 Metadata *Ops[3] = {
6236 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6237 Op->getOperand(1), Op->getOperand(2)};
6238 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6239 Changed = true;
6240 }
6241 }
6242 }
6243
6244 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6245 // section name so that llvm-lto will not complain about mismatching
6246 // module flags that is functionally the same.
6247 if (ID->getString() == "Objective-C Image Info Section") {
6248 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6249 SmallVector<StringRef, 4> ValueComp;
6250 Value->getString().split(ValueComp, " ");
6251 if (ValueComp.size() != 1) {
6252 std::string NewValue;
6253 for (auto &S : ValueComp)
6254 NewValue += S.str();
6255 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6256 MDString::get(M.getContext(), NewValue)};
6257 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6258 Changed = true;
6259 }
6260 }
6261 }
6262
6263 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6264 // If the higher bits are set, it adds new module flag for swift info.
6265 if (ID->getString() == "Objective-C Garbage Collection") {
6266 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6267 if (Md) {
6268 assert(Md->getValue() && "Expected non-empty metadata");
6269 auto Type = Md->getValue()->getType();
6270 if (Type == Int8Ty)
6271 continue;
6272 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6273 if ((Val & 0xff) != Val) {
6274 HasSwiftVersionFlag = true;
6275 SwiftABIVersion = (Val & 0xff00) >> 8;
6276 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6277 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6278 }
6279 Metadata *Ops[3] = {
6281 Op->getOperand(1),
6282 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6283 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6284 Changed = true;
6285 }
6286 }
6287
6288 if (ID->getString() == "amdgpu_code_object_version") {
6289 Metadata *Ops[3] = {
6290 Op->getOperand(0),
6291 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6292 Op->getOperand(2)};
6293 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6294 Changed = true;
6295 }
6296 }
6297
6298 // "Objective-C Class Properties" is recently added for Objective-C. We
6299 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6300 // flag of value 0, so we can correclty downgrade this flag when trying to
6301 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6302 // this module flag.
6303 if (HasObjCFlag && !HasClassProperties) {
6304 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6305 (uint32_t)0);
6306 Changed = true;
6307 }
6308
6309 if (HasSwiftVersionFlag) {
6310 M.addModuleFlag(Module::Error, "Swift ABI Version",
6311 SwiftABIVersion);
6312 M.addModuleFlag(Module::Error, "Swift Major Version",
6313 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6314 M.addModuleFlag(Module::Error, "Swift Minor Version",
6315 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6316 Changed = true;
6317 }
6318
6319 return Changed;
6320}
6321
6323 auto TrimSpaces = [](StringRef Section) -> std::string {
6324 SmallVector<StringRef, 5> Components;
6325 Section.split(Components, ',');
6326
6327 SmallString<32> Buffer;
6328 raw_svector_ostream OS(Buffer);
6329
6330 for (auto Component : Components)
6331 OS << ',' << Component.trim();
6332
6333 return std::string(OS.str().substr(1));
6334 };
6335
6336 for (auto &GV : M.globals()) {
6337 if (!GV.hasSection())
6338 continue;
6339
6340 StringRef Section = GV.getSection();
6341
6342 if (!Section.starts_with("__DATA, __objc_catlist"))
6343 continue;
6344
6345 // __DATA, __objc_catlist, regular, no_dead_strip
6346 // __DATA,__objc_catlist,regular,no_dead_strip
6347 GV.setSection(TrimSpaces(Section));
6348 }
6349}
6350
6351namespace {
6352// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6353// callsites within a function that did not also have the strictfp attribute.
6354// Since 10.0, if strict FP semantics are needed within a function, the
6355// function must have the strictfp attribute and all calls within the function
6356// must also have the strictfp attribute. This latter restriction is
6357// necessary to prevent unwanted libcall simplification when a function is
6358// being cloned (such as for inlining).
6359//
6360// The "dangling" strictfp attribute usage was only used to prevent constant
6361// folding and other libcall simplification. The nobuiltin attribute on the
6362// callsite has the same effect.
6363struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6364 StrictFPUpgradeVisitor() = default;
6365
6366 void visitCallBase(CallBase &Call) {
6367 if (!Call.isStrictFP())
6368 return;
6370 return;
6371 // If we get here, the caller doesn't have the strictfp attribute
6372 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6373 Call.removeFnAttr(Attribute::StrictFP);
6374 Call.addFnAttr(Attribute::NoBuiltin);
6375 }
6376};
6377
6378/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6379struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6380 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6381 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6382
6383 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6384 if (!RMW.isFloatingPointOperation())
6385 return;
6386
6387 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6388 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6389 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6390 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6391 }
6392};
6393} // namespace
6394
6396 // If a function definition doesn't have the strictfp attribute,
6397 // convert any callsite strictfp attributes to nobuiltin.
6398 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6399 StrictFPUpgradeVisitor SFPV;
6400 SFPV.visit(F);
6401 }
6402
6403 // Remove all incompatibile attributes from function.
6404 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6405 F.getReturnType(), F.getAttributes().getRetAttrs()));
6406 for (auto &Arg : F.args())
6407 Arg.removeAttrs(
6408 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6409
6410 bool AddingAttrs = false, RemovingAttrs = false;
6411 AttrBuilder AttrsToAdd(F.getContext());
6412 AttributeMask AttrsToRemove;
6413
6414 // Older versions of LLVM treated an "implicit-section-name" attribute
6415 // similarly to directly setting the section on a Function.
6416 if (Attribute A = F.getFnAttribute("implicit-section-name");
6417 A.isValid() && A.isStringAttribute()) {
6418 F.setSection(A.getValueAsString());
6419 AttrsToRemove.addAttribute("implicit-section-name");
6420 RemovingAttrs = true;
6421 }
6422
6423 if (Attribute A = F.getFnAttribute("nooutline");
6424 A.isValid() && A.isStringAttribute()) {
6425 AttrsToRemove.addAttribute("nooutline");
6426 AttrsToAdd.addAttribute(Attribute::NoOutline);
6427 AddingAttrs = RemovingAttrs = true;
6428 }
6429
6430 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6431 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6432 AttrsToRemove.addAttribute("uniform-work-group-size");
6433 RemovingAttrs = true;
6434 if (A.getValueAsString() == "true") {
6435 AttrsToAdd.addAttribute("uniform-work-group-size");
6436 AddingAttrs = true;
6437 }
6438 }
6439
6440 if (!F.empty()) {
6441 // For some reason this is called twice, and the first time is before any
6442 // instructions are loaded into the body.
6443
6444 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6445 A.isValid()) {
6446
6447 if (A.getValueAsBool()) {
6448 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6449 Visitor.visit(F);
6450 }
6451
6452 // We will leave behind dead attribute uses on external declarations, but
6453 // clang never added these to declarations anyway.
6454 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6455 RemovingAttrs = true;
6456 }
6457 }
6458
6459 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6460 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6461
6462 bool HandleDenormalMode = false;
6463
6464 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6465 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6466 if (ParsedMode.isValid()) {
6467 DenormalFPMath = ParsedMode;
6468 AttrsToRemove.addAttribute("denormal-fp-math");
6469 AddingAttrs = RemovingAttrs = true;
6470 HandleDenormalMode = true;
6471 }
6472 }
6473
6474 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6475 Attr.isValid()) {
6476 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6477 if (ParsedMode.isValid()) {
6478 DenormalFPMathF32 = ParsedMode;
6479 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6480 AddingAttrs = RemovingAttrs = true;
6481 HandleDenormalMode = true;
6482 }
6483 }
6484
6485 if (HandleDenormalMode)
6486 AttrsToAdd.addDenormalFPEnvAttr(
6487 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6488
6489 if (RemovingAttrs)
6490 F.removeFnAttrs(AttrsToRemove);
6491
6492 if (AddingAttrs)
6493 F.addFnAttrs(AttrsToAdd);
6494}
6495
6496// Check if the function attribute is not present and set it.
6498 StringRef Value) {
6499 if (!F.hasFnAttribute(FnAttrName))
6500 F.addFnAttr(FnAttrName, Value);
6501}
6502
6503// Check if the function attribute is not present and set it if needed.
6504// If the attribute is "false" then removes it.
6505// If the attribute is "true" resets it to a valueless attribute.
6506static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6507 if (!F.hasFnAttribute(FnAttrName)) {
6508 if (Set)
6509 F.addFnAttr(FnAttrName);
6510 } else {
6511 auto A = F.getFnAttribute(FnAttrName);
6512 if ("false" == A.getValueAsString())
6513 F.removeFnAttr(FnAttrName);
6514 else if ("true" == A.getValueAsString()) {
6515 F.removeFnAttr(FnAttrName);
6516 F.addFnAttr(FnAttrName);
6517 }
6518 }
6519}
6520
6522 Triple T(M.getTargetTriple());
6523 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6524 return;
6525
6526 uint64_t BTEValue = 0;
6527 uint64_t BPPLRValue = 0;
6528 uint64_t GCSValue = 0;
6529 uint64_t SRAValue = 0;
6530 uint64_t SRAALLValue = 0;
6531 uint64_t SRABKeyValue = 0;
6532
6533 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6534 if (ModFlags) {
6535 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6536 MDNode *Op = ModFlags->getOperand(I);
6537 if (Op->getNumOperands() != 3)
6538 continue;
6539
6540 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6541 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6542 if (!ID || !CI)
6543 continue;
6544
6545 StringRef IDStr = ID->getString();
6546 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6547 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6548 : IDStr == "guarded-control-stack" ? &GCSValue
6549 : IDStr == "sign-return-address" ? &SRAValue
6550 : IDStr == "sign-return-address-all" ? &SRAALLValue
6551 : IDStr == "sign-return-address-with-bkey"
6552 ? &SRABKeyValue
6553 : nullptr;
6554 if (!ValPtr)
6555 continue;
6556
6557 *ValPtr = CI->getZExtValue();
6558 if (*ValPtr == 2)
6559 return;
6560 }
6561 }
6562
6563 bool BTE = BTEValue == 1;
6564 bool BPPLR = BPPLRValue == 1;
6565 bool GCS = GCSValue == 1;
6566 bool SRA = SRAValue == 1;
6567
6568 StringRef SignTypeValue = "non-leaf";
6569 if (SRA && SRAALLValue == 1)
6570 SignTypeValue = "all";
6571
6572 StringRef SignKeyValue = "a_key";
6573 if (SRA && SRABKeyValue == 1)
6574 SignKeyValue = "b_key";
6575
6576 for (Function &F : M.getFunctionList()) {
6577 if (F.isDeclaration())
6578 continue;
6579
6580 if (SRA) {
6581 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6582 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6583 } else {
6584 if (auto A = F.getFnAttribute("sign-return-address");
6585 A.isValid() && "none" == A.getValueAsString()) {
6586 F.removeFnAttr("sign-return-address");
6587 F.removeFnAttr("sign-return-address-key");
6588 }
6589 }
6590 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6591 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6592 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6593 }
6594
6595 if (BTE)
6596 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6597 if (BPPLR)
6598 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6599 if (GCS)
6600 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6601 if (SRA) {
6602 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6603 if (SRAALLValue == 1)
6604 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6605 if (SRABKeyValue == 1)
6606 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6607 }
6608}
6609
6610static bool isOldLoopArgument(Metadata *MD) {
6611 auto *T = dyn_cast_or_null<MDTuple>(MD);
6612 if (!T)
6613 return false;
6614 if (T->getNumOperands() < 1)
6615 return false;
6616 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6617 if (!S)
6618 return false;
6619 return S->getString().starts_with("llvm.vectorizer.");
6620}
6621
6623 StringRef OldPrefix = "llvm.vectorizer.";
6624 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6625
6626 if (OldTag == "llvm.vectorizer.unroll")
6627 return MDString::get(C, "llvm.loop.interleave.count");
6628
6629 return MDString::get(
6630 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6631 .str());
6632}
6633
6635 auto *T = dyn_cast_or_null<MDTuple>(MD);
6636 if (!T)
6637 return MD;
6638 if (T->getNumOperands() < 1)
6639 return MD;
6640 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6641 if (!OldTag)
6642 return MD;
6643 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6644 return MD;
6645
6646 // This has an old tag. Upgrade it.
6648 Ops.reserve(T->getNumOperands());
6649 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6650 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6651 Ops.push_back(T->getOperand(I));
6652
6653 return MDTuple::get(T->getContext(), Ops);
6654}
6655
6657 auto *T = dyn_cast<MDTuple>(&N);
6658 if (!T)
6659 return &N;
6660
6661 if (none_of(T->operands(), isOldLoopArgument))
6662 return &N;
6663
6665 Ops.reserve(T->getNumOperands());
6666 for (Metadata *MD : T->operands())
6667 Ops.push_back(upgradeLoopArgument(MD));
6668
6669 return MDTuple::get(T->getContext(), Ops);
6670}
6671
6673 Triple T(TT);
6674 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6675 // the address space of globals to 1. This does not apply to SPIRV Logical.
6676 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6677 !DL.contains("-G") && !DL.starts_with("G")) {
6678 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6679 }
6680
6681 if (T.isLoongArch64() || T.isRISCV64()) {
6682 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6683 auto I = DL.find("-n64-");
6684 if (I != StringRef::npos)
6685 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6686 return DL.str();
6687 }
6688
6689 // AMDGPU data layout upgrades.
6690 std::string Res = DL.str();
6691 if (T.isAMDGPU()) {
6692 // Define address spaces for constants.
6693 if (!DL.contains("-G") && !DL.starts_with("G"))
6694 Res.append(Res.empty() ? "G1" : "-G1");
6695
6696 // AMDGCN data layout upgrades.
6697 if (T.isAMDGCN()) {
6698
6699 // Add missing non-integral declarations.
6700 // This goes before adding new address spaces to prevent incoherent string
6701 // values.
6702 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6703 Res.append("-ni:7:8:9");
6704 // Update ni:7 to ni:7:8:9.
6705 if (DL.ends_with("ni:7"))
6706 Res.append(":8:9");
6707 if (DL.ends_with("ni:7:8"))
6708 Res.append(":9");
6709
6710 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6711 // resources) An empty data layout has already been upgraded to G1 by now.
6712 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6713 Res.append("-p7:160:256:256:32");
6714 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6715 Res.append("-p8:128:128:128:48");
6716 constexpr StringRef OldP8("-p8:128:128-");
6717 if (DL.contains(OldP8))
6718 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6719 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6720 Res.append("-p9:192:256:256:32");
6721 }
6722
6723 // Upgrade the ELF mangling mode.
6724 if (!DL.contains("m:e"))
6725 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6726
6727 return Res;
6728 }
6729
6730 if (T.isSystemZ() && !DL.empty()) {
6731 // Make sure the stack alignment is present.
6732 if (!DL.contains("-S64"))
6733 return "E-S64" + DL.drop_front(1).str();
6734 return DL.str();
6735 }
6736
6737 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6738 // If the datalayout matches the expected format, add pointer size address
6739 // spaces to the datalayout.
6740 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6741 if (!DL.contains(AddrSpaces)) {
6743 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6744 if (R.match(Res, &Groups))
6745 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6746 }
6747 };
6748
6749 // AArch64 data layout upgrades.
6750 if (T.isAArch64()) {
6751 // Add "-Fn32"
6752 if (!DL.empty() && !DL.contains("-Fn32"))
6753 Res.append("-Fn32");
6754 AddPtr32Ptr64AddrSpaces();
6755 return Res;
6756 }
6757
6758 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6759 T.isWasm()) {
6760 // Mips64 with o32 ABI did not add "-i128:128".
6761 // Add "-i128:128"
6762 std::string I64 = "-i64:64";
6763 std::string I128 = "-i128:128";
6764 if (!StringRef(Res).contains(I128)) {
6765 size_t Pos = Res.find(I64);
6766 if (Pos != size_t(-1))
6767 Res.insert(Pos + I64.size(), I128);
6768 }
6769 }
6770
6771 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6772 size_t Pos = Res.find("-S128");
6773 if (Pos == StringRef::npos)
6774 Pos = Res.size();
6775 Res.insert(Pos, "-f64:32:64");
6776 }
6777
6778 if (!T.isX86())
6779 return Res;
6780
6781 AddPtr32Ptr64AddrSpaces();
6782
6783 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6784 // for i128 operations prior to this being reflected in the data layout, and
6785 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6786 // boundaries, so although this is a breaking change, the upgrade is expected
6787 // to fix more IR than it breaks.
6788 // Intel MCU is an exception and uses 4-byte-alignment.
6789 if (!T.isOSIAMCU()) {
6790 std::string I128 = "-i128:128";
6791 if (StringRef Ref = Res; !Ref.contains(I128)) {
6793 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6794 if (R.match(Res, &Groups))
6795 Res = (Groups[1] + I128 + Groups[3]).str();
6796 }
6797 }
6798
6799 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6800 // Raising the alignment is safe because Clang did not produce f80 values in
6801 // the MSVC environment before this upgrade was added.
6802 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6803 StringRef Ref = Res;
6804 auto I = Ref.find("-f80:32-");
6805 if (I != StringRef::npos)
6806 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6807 }
6808
6809 return Res;
6810}
6811
6812void llvm::UpgradeAttributes(AttrBuilder &B) {
6813 StringRef FramePointer;
6814 Attribute A = B.getAttribute("no-frame-pointer-elim");
6815 if (A.isValid()) {
6816 // The value can be "true" or "false".
6817 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6818 B.removeAttribute("no-frame-pointer-elim");
6819 }
6820 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6821 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6822 if (FramePointer != "all")
6823 FramePointer = "non-leaf";
6824 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6825 }
6826 if (!FramePointer.empty())
6827 B.addAttribute("frame-pointer", FramePointer);
6828
6829 A = B.getAttribute("null-pointer-is-valid");
6830 if (A.isValid()) {
6831 // The value can be "true" or "false".
6832 bool NullPointerIsValid = A.getValueAsString() == "true";
6833 B.removeAttribute("null-pointer-is-valid");
6834 if (NullPointerIsValid)
6835 B.addAttribute(Attribute::NullPointerIsValid);
6836 }
6837
6838 A = B.getAttribute("uniform-work-group-size");
6839 if (A.isValid()) {
6840 StringRef Val = A.getValueAsString();
6841 if (!Val.empty()) {
6842 bool IsTrue = Val == "true";
6843 B.removeAttribute("uniform-work-group-size");
6844 if (IsTrue)
6845 B.addAttribute("uniform-work-group-size");
6846 }
6847 }
6848}
6849
6850void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6851 // clang.arc.attachedcall bundles are now required to have an operand.
6852 // If they don't, it's okay to drop them entirely: when there is an operand,
6853 // the "attachedcall" is meaningful and required, but without an operand,
6854 // it's just a marker NOP. Dropping it merely prevents an optimization.
6855 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6856 return OBD.getTag() == "clang.arc.attachedcall" &&
6857 OBD.inputs().empty();
6858 });
6859}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:740
@ ICMP_SLT
signed less than
Definition InstrTypes.h:769
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:770
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:764
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:763
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:767
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:765
@ ICMP_NE
not equal
Definition InstrTypes.h:762
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:768
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:766
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2858
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
LLVM_ABI SyncScope::ID getOrInsertSyncScopeID(StringRef SSN)
getOrInsertSyncScopeID - Maps synchronization scope name to synchronization scope ID.
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:895
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
Represent a constant reference to a string, i.e.
Definition StringRef.h:56
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:58
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
Check if the string is empty.
Definition StringRef.h:141
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
Get the string size.
Definition StringRef.h:144
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool isSignatureValid(Intrinsic::ID ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys, raw_ostream &OS=nulls())
Returns true if FT is a valid function type for intrinsic ID.
LLVM_ABI bool hasStructReturnType(ID id)
Returns true if id has a struct return type.
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:558
@ Length
Definition DWP.cpp:558
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:876
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106