LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
52#include "llvm/Support/Regex.h"
55#include <cstdint>
56#include <cstring>
57#include <numeric>
58
59using namespace llvm;
60
61static cl::opt<bool>
62 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
63 cl::desc("Disable autoupgrade of debug info"));
64
65static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
66
67// Report a fatal error along with the
68// Call Instruction which caused the error
69[[noreturn]] static void reportFatalUsageErrorWithCI(StringRef reason,
70 CallBase *CI) {
71 CI->print(llvm::errs());
72 llvm::errs() << "\n";
74}
75
76// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
77// changed their type from v4f32 to v2i64.
79 Function *&NewFn) {
80 // Check whether this is an old version of the function, which received
81 // v4f32 arguments.
82 Type *Arg0Type = F->getFunctionType()->getParamType(0);
83 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
84 return false;
85
86 // Yes, it's old, replace it with new version.
87 rename(F);
88 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
89 return true;
90}
91
92// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
93// arguments have changed their type from i32 to i8.
95 Function *&NewFn) {
96 // Check that the last argument is an i32.
97 Type *LastArgType = F->getFunctionType()->getParamType(
98 F->getFunctionType()->getNumParams() - 1);
99 if (!LastArgType->isIntegerTy(32))
100 return false;
101
102 // Move this function aside and map down.
103 rename(F);
104 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
105 return true;
106}
107
108// Upgrade the declaration of fp compare intrinsics that change return type
109// from scalar to vXi1 mask.
111 Function *&NewFn) {
112 // Check if the return type is a vector.
113 if (F->getReturnType()->isVectorTy())
114 return false;
115
116 rename(F);
117 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
118 return true;
119}
120
121// Upgrade the declaration of multiply and add bytes intrinsics whose input
122// arguments' types have changed from vectors of i32 to vectors of i8
124 Function *&NewFn) {
125 // check if input argument type is a vector of i8
126 Type *Arg1Type = F->getFunctionType()->getParamType(1);
127 Type *Arg2Type = F->getFunctionType()->getParamType(2);
128 if (Arg1Type->isVectorTy() &&
129 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
130 Arg2Type->isVectorTy() &&
131 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
132 return false;
133
134 rename(F);
135 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
136 return true;
137}
138
139// Upgrade the declaration of multipy and add words intrinsics whose input
140// arguments' types have changed to vectors of i32 to vectors of i16
142 Function *&NewFn) {
143 // check if input argument type is a vector of i16
144 Type *Arg1Type = F->getFunctionType()->getParamType(1);
145 Type *Arg2Type = F->getFunctionType()->getParamType(2);
146 if (Arg1Type->isVectorTy() &&
147 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
148 Arg2Type->isVectorTy() &&
149 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getReturnType()->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 Function *&NewFn) {
169 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
170 return false;
171
172 rename(F);
173 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
174 return true;
175}
176
178 // All of the intrinsics matches below should be marked with which llvm
179 // version started autoupgrading them. At some point in the future we would
180 // like to use this information to remove upgrade code for some older
181 // intrinsics. It is currently undecided how we will determine that future
182 // point.
183 if (Name.consume_front("avx."))
184 return (Name.starts_with("blend.p") || // Added in 3.7
185 Name == "cvt.ps2.pd.256" || // Added in 3.9
186 Name == "cvtdq2.pd.256" || // Added in 3.9
187 Name == "cvtdq2.ps.256" || // Added in 7.0
188 Name.starts_with("movnt.") || // Added in 3.2
189 Name.starts_with("sqrt.p") || // Added in 7.0
190 Name.starts_with("storeu.") || // Added in 3.9
191 Name.starts_with("vbroadcast.s") || // Added in 3.5
192 Name.starts_with("vbroadcastf128") || // Added in 4.0
193 Name.starts_with("vextractf128.") || // Added in 3.7
194 Name.starts_with("vinsertf128.") || // Added in 3.7
195 Name.starts_with("vperm2f128.") || // Added in 6.0
196 Name.starts_with("vpermil.")); // Added in 3.1
197
198 if (Name.consume_front("avx2."))
199 return (Name == "movntdqa" || // Added in 5.0
200 Name.starts_with("pabs.") || // Added in 6.0
201 Name.starts_with("padds.") || // Added in 8.0
202 Name.starts_with("paddus.") || // Added in 8.0
203 Name.starts_with("pblendd.") || // Added in 3.7
204 Name == "pblendw" || // Added in 3.7
205 Name.starts_with("pbroadcast") || // Added in 3.8
206 Name.starts_with("pcmpeq.") || // Added in 3.1
207 Name.starts_with("pcmpgt.") || // Added in 3.1
208 Name.starts_with("pmax") || // Added in 3.9
209 Name.starts_with("pmin") || // Added in 3.9
210 Name.starts_with("pmovsx") || // Added in 3.9
211 Name.starts_with("pmovzx") || // Added in 3.9
212 Name == "pmul.dq" || // Added in 7.0
213 Name == "pmulu.dq" || // Added in 7.0
214 Name.starts_with("psll.dq") || // Added in 3.7
215 Name.starts_with("psrl.dq") || // Added in 3.7
216 Name.starts_with("psubs.") || // Added in 8.0
217 Name.starts_with("psubus.") || // Added in 8.0
218 Name.starts_with("vbroadcast") || // Added in 3.8
219 Name == "vbroadcasti128" || // Added in 3.7
220 Name == "vextracti128" || // Added in 3.7
221 Name == "vinserti128" || // Added in 3.7
222 Name == "vperm2i128"); // Added in 6.0
223
224 if (Name.consume_front("avx512.")) {
225 if (Name.consume_front("mask."))
226 // 'avx512.mask.*'
227 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
228 Name.starts_with("and.") || // Added in 3.9
229 Name.starts_with("andn.") || // Added in 3.9
230 Name.starts_with("broadcast.s") || // Added in 3.9
231 Name.starts_with("broadcastf32x4.") || // Added in 6.0
232 Name.starts_with("broadcastf32x8.") || // Added in 6.0
233 Name.starts_with("broadcastf64x2.") || // Added in 6.0
234 Name.starts_with("broadcastf64x4.") || // Added in 6.0
235 Name.starts_with("broadcasti32x4.") || // Added in 6.0
236 Name.starts_with("broadcasti32x8.") || // Added in 6.0
237 Name.starts_with("broadcasti64x2.") || // Added in 6.0
238 Name.starts_with("broadcasti64x4.") || // Added in 6.0
239 Name.starts_with("cmp.b") || // Added in 5.0
240 Name.starts_with("cmp.d") || // Added in 5.0
241 Name.starts_with("cmp.q") || // Added in 5.0
242 Name.starts_with("cmp.w") || // Added in 5.0
243 Name.starts_with("compress.b") || // Added in 9.0
244 Name.starts_with("compress.d") || // Added in 9.0
245 Name.starts_with("compress.p") || // Added in 9.0
246 Name.starts_with("compress.q") || // Added in 9.0
247 Name.starts_with("compress.store.") || // Added in 7.0
248 Name.starts_with("compress.w") || // Added in 9.0
249 Name.starts_with("conflict.") || // Added in 9.0
250 Name.starts_with("cvtdq2pd.") || // Added in 4.0
251 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
252 Name == "cvtpd2dq.256" || // Added in 7.0
253 Name == "cvtpd2ps.256" || // Added in 7.0
254 Name == "cvtps2pd.128" || // Added in 7.0
255 Name == "cvtps2pd.256" || // Added in 7.0
256 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
257 Name == "cvtqq2ps.256" || // Added in 9.0
258 Name == "cvtqq2ps.512" || // Added in 9.0
259 Name == "cvttpd2dq.256" || // Added in 7.0
260 Name == "cvttps2dq.128" || // Added in 7.0
261 Name == "cvttps2dq.256" || // Added in 7.0
262 Name.starts_with("cvtudq2pd.") || // Added in 4.0
263 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
264 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
265 Name == "cvtuqq2ps.256" || // Added in 9.0
266 Name == "cvtuqq2ps.512" || // Added in 9.0
267 Name.starts_with("dbpsadbw.") || // Added in 7.0
268 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
269 Name.starts_with("expand.b") || // Added in 9.0
270 Name.starts_with("expand.d") || // Added in 9.0
271 Name.starts_with("expand.load.") || // Added in 7.0
272 Name.starts_with("expand.p") || // Added in 9.0
273 Name.starts_with("expand.q") || // Added in 9.0
274 Name.starts_with("expand.w") || // Added in 9.0
275 Name.starts_with("fpclass.p") || // Added in 7.0
276 Name.starts_with("insert") || // Added in 4.0
277 Name.starts_with("load.") || // Added in 3.9
278 Name.starts_with("loadu.") || // Added in 3.9
279 Name.starts_with("lzcnt.") || // Added in 5.0
280 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
281 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
282 Name.starts_with("movddup") || // Added in 3.9
283 Name.starts_with("move.s") || // Added in 4.0
284 Name.starts_with("movshdup") || // Added in 3.9
285 Name.starts_with("movsldup") || // Added in 3.9
286 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
287 Name.starts_with("or.") || // Added in 3.9
288 Name.starts_with("pabs.") || // Added in 6.0
289 Name.starts_with("packssdw.") || // Added in 5.0
290 Name.starts_with("packsswb.") || // Added in 5.0
291 Name.starts_with("packusdw.") || // Added in 5.0
292 Name.starts_with("packuswb.") || // Added in 5.0
293 Name.starts_with("padd.") || // Added in 4.0
294 Name.starts_with("padds.") || // Added in 8.0
295 Name.starts_with("paddus.") || // Added in 8.0
296 Name.starts_with("palignr.") || // Added in 3.9
297 Name.starts_with("pand.") || // Added in 3.9
298 Name.starts_with("pandn.") || // Added in 3.9
299 Name.starts_with("pavg") || // Added in 6.0
300 Name.starts_with("pbroadcast") || // Added in 6.0
301 Name.starts_with("pcmpeq.") || // Added in 3.9
302 Name.starts_with("pcmpgt.") || // Added in 3.9
303 Name.starts_with("perm.df.") || // Added in 3.9
304 Name.starts_with("perm.di.") || // Added in 3.9
305 Name.starts_with("permvar.") || // Added in 7.0
306 Name.starts_with("pmaddubs.w.") || // Added in 7.0
307 Name.starts_with("pmaddw.d.") || // Added in 7.0
308 Name.starts_with("pmax") || // Added in 4.0
309 Name.starts_with("pmin") || // Added in 4.0
310 Name == "pmov.qd.256" || // Added in 9.0
311 Name == "pmov.qd.512" || // Added in 9.0
312 Name == "pmov.wb.256" || // Added in 9.0
313 Name == "pmov.wb.512" || // Added in 9.0
314 Name.starts_with("pmovsx") || // Added in 4.0
315 Name.starts_with("pmovzx") || // Added in 4.0
316 Name.starts_with("pmul.dq.") || // Added in 4.0
317 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
318 Name.starts_with("pmulh.w.") || // Added in 7.0
319 Name.starts_with("pmulhu.w.") || // Added in 7.0
320 Name.starts_with("pmull.") || // Added in 4.0
321 Name.starts_with("pmultishift.qb.") || // Added in 8.0
322 Name.starts_with("pmulu.dq.") || // Added in 4.0
323 Name.starts_with("por.") || // Added in 3.9
324 Name.starts_with("prol.") || // Added in 8.0
325 Name.starts_with("prolv.") || // Added in 8.0
326 Name.starts_with("pror.") || // Added in 8.0
327 Name.starts_with("prorv.") || // Added in 8.0
328 Name.starts_with("pshuf.b.") || // Added in 4.0
329 Name.starts_with("pshuf.d.") || // Added in 3.9
330 Name.starts_with("pshufh.w.") || // Added in 3.9
331 Name.starts_with("pshufl.w.") || // Added in 3.9
332 Name.starts_with("psll.d") || // Added in 4.0
333 Name.starts_with("psll.q") || // Added in 4.0
334 Name.starts_with("psll.w") || // Added in 4.0
335 Name.starts_with("pslli") || // Added in 4.0
336 Name.starts_with("psllv") || // Added in 4.0
337 Name.starts_with("psra.d") || // Added in 4.0
338 Name.starts_with("psra.q") || // Added in 4.0
339 Name.starts_with("psra.w") || // Added in 4.0
340 Name.starts_with("psrai") || // Added in 4.0
341 Name.starts_with("psrav") || // Added in 4.0
342 Name.starts_with("psrl.d") || // Added in 4.0
343 Name.starts_with("psrl.q") || // Added in 4.0
344 Name.starts_with("psrl.w") || // Added in 4.0
345 Name.starts_with("psrli") || // Added in 4.0
346 Name.starts_with("psrlv") || // Added in 4.0
347 Name.starts_with("psub.") || // Added in 4.0
348 Name.starts_with("psubs.") || // Added in 8.0
349 Name.starts_with("psubus.") || // Added in 8.0
350 Name.starts_with("pternlog.") || // Added in 7.0
351 Name.starts_with("punpckh") || // Added in 3.9
352 Name.starts_with("punpckl") || // Added in 3.9
353 Name.starts_with("pxor.") || // Added in 3.9
354 Name.starts_with("shuf.f") || // Added in 6.0
355 Name.starts_with("shuf.i") || // Added in 6.0
356 Name.starts_with("shuf.p") || // Added in 4.0
357 Name.starts_with("sqrt.p") || // Added in 7.0
358 Name.starts_with("store.b.") || // Added in 3.9
359 Name.starts_with("store.d.") || // Added in 3.9
360 Name.starts_with("store.p") || // Added in 3.9
361 Name.starts_with("store.q.") || // Added in 3.9
362 Name.starts_with("store.w.") || // Added in 3.9
363 Name == "store.ss" || // Added in 7.0
364 Name.starts_with("storeu.") || // Added in 3.9
365 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
366 Name.starts_with("ucmp.") || // Added in 5.0
367 Name.starts_with("unpckh.") || // Added in 3.9
368 Name.starts_with("unpckl.") || // Added in 3.9
369 Name.starts_with("valign.") || // Added in 4.0
370 Name == "vcvtph2ps.128" || // Added in 11.0
371 Name == "vcvtph2ps.256" || // Added in 11.0
372 Name.starts_with("vextract") || // Added in 4.0
373 Name.starts_with("vfmadd.") || // Added in 7.0
374 Name.starts_with("vfmaddsub.") || // Added in 7.0
375 Name.starts_with("vfnmadd.") || // Added in 7.0
376 Name.starts_with("vfnmsub.") || // Added in 7.0
377 Name.starts_with("vpdpbusd.") || // Added in 7.0
378 Name.starts_with("vpdpbusds.") || // Added in 7.0
379 Name.starts_with("vpdpwssd.") || // Added in 7.0
380 Name.starts_with("vpdpwssds.") || // Added in 7.0
381 Name.starts_with("vpermi2var.") || // Added in 7.0
382 Name.starts_with("vpermil.p") || // Added in 3.9
383 Name.starts_with("vpermilvar.") || // Added in 4.0
384 Name.starts_with("vpermt2var.") || // Added in 7.0
385 Name.starts_with("vpmadd52") || // Added in 7.0
386 Name.starts_with("vpshld.") || // Added in 7.0
387 Name.starts_with("vpshldv.") || // Added in 8.0
388 Name.starts_with("vpshrd.") || // Added in 7.0
389 Name.starts_with("vpshrdv.") || // Added in 8.0
390 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
391 Name.starts_with("xor.")); // Added in 3.9
392
393 if (Name.consume_front("mask3."))
394 // 'avx512.mask3.*'
395 return (Name.starts_with("vfmadd.") || // Added in 7.0
396 Name.starts_with("vfmaddsub.") || // Added in 7.0
397 Name.starts_with("vfmsub.") || // Added in 7.0
398 Name.starts_with("vfmsubadd.") || // Added in 7.0
399 Name.starts_with("vfnmsub.")); // Added in 7.0
400
401 if (Name.consume_front("maskz."))
402 // 'avx512.maskz.*'
403 return (Name.starts_with("pternlog.") || // Added in 7.0
404 Name.starts_with("vfmadd.") || // Added in 7.0
405 Name.starts_with("vfmaddsub.") || // Added in 7.0
406 Name.starts_with("vpdpbusd.") || // Added in 7.0
407 Name.starts_with("vpdpbusds.") || // Added in 7.0
408 Name.starts_with("vpdpwssd.") || // Added in 7.0
409 Name.starts_with("vpdpwssds.") || // Added in 7.0
410 Name.starts_with("vpermt2var.") || // Added in 7.0
411 Name.starts_with("vpmadd52") || // Added in 7.0
412 Name.starts_with("vpshldv.") || // Added in 8.0
413 Name.starts_with("vpshrdv.")); // Added in 8.0
414
415 // 'avx512.*'
416 return (Name == "movntdqa" || // Added in 5.0
417 Name == "pmul.dq.512" || // Added in 7.0
418 Name == "pmulu.dq.512" || // Added in 7.0
419 Name.starts_with("broadcastm") || // Added in 6.0
420 Name.starts_with("cmp.p") || // Added in 12.0
421 Name.starts_with("cvtb2mask.") || // Added in 7.0
422 Name.starts_with("cvtd2mask.") || // Added in 7.0
423 Name.starts_with("cvtmask2") || // Added in 5.0
424 Name.starts_with("cvtq2mask.") || // Added in 7.0
425 Name == "cvtusi2sd" || // Added in 7.0
426 Name.starts_with("cvtw2mask.") || // Added in 7.0
427 Name == "kand.w" || // Added in 7.0
428 Name == "kandn.w" || // Added in 7.0
429 Name == "knot.w" || // Added in 7.0
430 Name == "kor.w" || // Added in 7.0
431 Name == "kortestc.w" || // Added in 7.0
432 Name == "kortestz.w" || // Added in 7.0
433 Name.starts_with("kunpck") || // added in 6.0
434 Name == "kxnor.w" || // Added in 7.0
435 Name == "kxor.w" || // Added in 7.0
436 Name.starts_with("padds.") || // Added in 8.0
437 Name.starts_with("pbroadcast") || // Added in 3.9
438 Name.starts_with("prol") || // Added in 8.0
439 Name.starts_with("pror") || // Added in 8.0
440 Name.starts_with("psll.dq") || // Added in 3.9
441 Name.starts_with("psrl.dq") || // Added in 3.9
442 Name.starts_with("psubs.") || // Added in 8.0
443 Name.starts_with("ptestm") || // Added in 6.0
444 Name.starts_with("ptestnm") || // Added in 6.0
445 Name.starts_with("storent.") || // Added in 3.9
446 Name.starts_with("vbroadcast.s") || // Added in 7.0
447 Name.starts_with("vpshld.") || // Added in 8.0
448 Name.starts_with("vpshrd.")); // Added in 8.0
449 }
450
451 if (Name.consume_front("fma."))
452 return (Name.starts_with("vfmadd.") || // Added in 7.0
453 Name.starts_with("vfmsub.") || // Added in 7.0
454 Name.starts_with("vfmsubadd.") || // Added in 7.0
455 Name.starts_with("vfnmadd.") || // Added in 7.0
456 Name.starts_with("vfnmsub.")); // Added in 7.0
457
458 if (Name.consume_front("fma4."))
459 return Name.starts_with("vfmadd.s"); // Added in 7.0
460
461 if (Name.consume_front("sse."))
462 return (Name == "add.ss" || // Added in 4.0
463 Name == "cvtsi2ss" || // Added in 7.0
464 Name == "cvtsi642ss" || // Added in 7.0
465 Name == "div.ss" || // Added in 4.0
466 Name == "mul.ss" || // Added in 4.0
467 Name.starts_with("sqrt.p") || // Added in 7.0
468 Name == "sqrt.ss" || // Added in 7.0
469 Name.starts_with("storeu.") || // Added in 3.9
470 Name == "sub.ss"); // Added in 4.0
471
472 if (Name.consume_front("sse2."))
473 return (Name == "add.sd" || // Added in 4.0
474 Name == "cvtdq2pd" || // Added in 3.9
475 Name == "cvtdq2ps" || // Added in 7.0
476 Name == "cvtps2pd" || // Added in 3.9
477 Name == "cvtsi2sd" || // Added in 7.0
478 Name == "cvtsi642sd" || // Added in 7.0
479 Name == "cvtss2sd" || // Added in 7.0
480 Name == "div.sd" || // Added in 4.0
481 Name == "mul.sd" || // Added in 4.0
482 Name.starts_with("padds.") || // Added in 8.0
483 Name.starts_with("paddus.") || // Added in 8.0
484 Name.starts_with("pcmpeq.") || // Added in 3.1
485 Name.starts_with("pcmpgt.") || // Added in 3.1
486 Name == "pmaxs.w" || // Added in 3.9
487 Name == "pmaxu.b" || // Added in 3.9
488 Name == "pmins.w" || // Added in 3.9
489 Name == "pminu.b" || // Added in 3.9
490 Name == "pmulu.dq" || // Added in 7.0
491 Name.starts_with("pshuf") || // Added in 3.9
492 Name.starts_with("psll.dq") || // Added in 3.7
493 Name.starts_with("psrl.dq") || // Added in 3.7
494 Name.starts_with("psubs.") || // Added in 8.0
495 Name.starts_with("psubus.") || // Added in 8.0
496 Name.starts_with("sqrt.p") || // Added in 7.0
497 Name == "sqrt.sd" || // Added in 7.0
498 Name == "storel.dq" || // Added in 3.9
499 Name.starts_with("storeu.") || // Added in 3.9
500 Name == "sub.sd"); // Added in 4.0
501
502 if (Name.consume_front("sse41."))
503 return (Name.starts_with("blendp") || // Added in 3.7
504 Name == "movntdqa" || // Added in 5.0
505 Name == "pblendw" || // Added in 3.7
506 Name == "pmaxsb" || // Added in 3.9
507 Name == "pmaxsd" || // Added in 3.9
508 Name == "pmaxud" || // Added in 3.9
509 Name == "pmaxuw" || // Added in 3.9
510 Name == "pminsb" || // Added in 3.9
511 Name == "pminsd" || // Added in 3.9
512 Name == "pminud" || // Added in 3.9
513 Name == "pminuw" || // Added in 3.9
514 Name.starts_with("pmovsx") || // Added in 3.8
515 Name.starts_with("pmovzx") || // Added in 3.9
516 Name == "pmuldq"); // Added in 7.0
517
518 if (Name.consume_front("sse42."))
519 return Name == "crc32.64.8"; // Added in 3.4
520
521 if (Name.consume_front("sse4a."))
522 return Name.starts_with("movnt."); // Added in 3.9
523
524 if (Name.consume_front("ssse3."))
525 return (Name == "pabs.b.128" || // Added in 6.0
526 Name == "pabs.d.128" || // Added in 6.0
527 Name == "pabs.w.128"); // Added in 6.0
528
529 if (Name.consume_front("xop."))
530 return (Name == "vpcmov" || // Added in 3.8
531 Name == "vpcmov.256" || // Added in 5.0
532 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
533 Name.starts_with("vprot")); // Added in 8.0
534
535 return (Name == "addcarry.u32" || // Added in 8.0
536 Name == "addcarry.u64" || // Added in 8.0
537 Name == "addcarryx.u32" || // Added in 8.0
538 Name == "addcarryx.u64" || // Added in 8.0
539 Name == "subborrow.u32" || // Added in 8.0
540 Name == "subborrow.u64" || // Added in 8.0
541 Name.starts_with("vcvtph2ps.")); // Added in 11.0
542}
543
545 Function *&NewFn) {
546 // Only handle intrinsics that start with "x86.".
547 if (!Name.consume_front("x86."))
548 return false;
549
550 if (shouldUpgradeX86Intrinsic(F, Name)) {
551 NewFn = nullptr;
552 return true;
553 }
554
555 if (Name == "rdtscp") { // Added in 8.0
556 // If this intrinsic has 0 operands, it's the new version.
557 if (F->getFunctionType()->getNumParams() == 0)
558 return false;
559
560 rename(F);
561 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
562 Intrinsic::x86_rdtscp);
563 return true;
564 }
565
567
568 // SSE4.1 ptest functions may have an old signature.
569 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
571 .Case("c", Intrinsic::x86_sse41_ptestc)
572 .Case("z", Intrinsic::x86_sse41_ptestz)
573 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
576 return upgradePTESTIntrinsic(F, ID, NewFn);
577
578 return false;
579 }
580
581 // Several blend and other instructions with masks used the wrong number of
582 // bits.
583
584 // Added in 3.6
586 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
587 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
588 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
589 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
590 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
591 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
594 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
595
596 if (Name.consume_front("avx512.")) {
597 if (Name.consume_front("mask.cmp.")) {
598 // Added in 7.0
600 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
601 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
602 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
603 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
604 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
605 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
608 return upgradeX86MaskedFPCompare(F, ID, NewFn);
609 } else if (Name.starts_with("vpdpbusd.") ||
610 Name.starts_with("vpdpbusds.")) {
611 // Added in 21.1
613 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
614 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
615 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
616 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
617 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
618 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
621 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
622 } else if (Name.starts_with("vpdpwssd.") ||
623 Name.starts_with("vpdpwssds.")) {
624 // Added in 21.1
626 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
627 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
628 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
629 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
630 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
631 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
634 return upgradeX86MultiplyAddWords(F, ID, NewFn);
635 }
636 return false; // No other 'x86.avx512.*'.
637 }
638
639 if (Name.consume_front("avx2.")) {
640 if (Name.consume_front("vpdpb")) {
641 // Added in 21.1
643 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
644 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
645 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
646 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
647 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
648 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
649 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
650 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
651 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
652 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
653 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
654 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
657 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
658 } else if (Name.consume_front("vpdpw")) {
659 // Added in 21.1
661 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
662 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
663 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
664 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
665 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
666 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
667 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
668 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
669 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
670 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
671 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
672 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
675 return upgradeX86MultiplyAddWords(F, ID, NewFn);
676 }
677 return false; // No other 'x86.avx2.*'
678 }
679
680 if (Name.consume_front("avx10.")) {
681 if (Name.consume_front("vpdpb")) {
682 // Added in 21.1
684 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
685 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
686 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
687 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
688 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
689 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
692 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
693 } else if (Name.consume_front("vpdpw")) {
695 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
696 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
697 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
698 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
699 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
700 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
703 return upgradeX86MultiplyAddWords(F, ID, NewFn);
704 }
705 return false; // No other 'x86.avx10.*'
706 }
707
708 if (Name.consume_front("avx512bf16.")) {
709 // Added in 9.0
711 .Case("cvtne2ps2bf16.128",
712 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
713 .Case("cvtne2ps2bf16.256",
714 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
715 .Case("cvtne2ps2bf16.512",
716 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
717 .Case("mask.cvtneps2bf16.128",
718 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
719 .Case("cvtneps2bf16.256",
720 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
721 .Case("cvtneps2bf16.512",
722 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
725 return upgradeX86BF16Intrinsic(F, ID, NewFn);
726
727 // Added in 9.0
729 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
730 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
731 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
734 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
735 return false; // No other 'x86.avx512bf16.*'.
736 }
737
738 if (Name.consume_front("xop.")) {
740 if (Name.starts_with("vpermil2")) { // Added in 3.9
741 // Upgrade any XOP PERMIL2 index operand still using a float/double
742 // vector.
743 auto Idx = F->getFunctionType()->getParamType(2);
744 if (Idx->isFPOrFPVectorTy()) {
745 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
746 unsigned EltSize = Idx->getScalarSizeInBits();
747 if (EltSize == 64 && IdxSize == 128)
748 ID = Intrinsic::x86_xop_vpermil2pd;
749 else if (EltSize == 32 && IdxSize == 128)
750 ID = Intrinsic::x86_xop_vpermil2ps;
751 else if (EltSize == 64 && IdxSize == 256)
752 ID = Intrinsic::x86_xop_vpermil2pd_256;
753 else
754 ID = Intrinsic::x86_xop_vpermil2ps_256;
755 }
756 } else if (F->arg_size() == 2)
757 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
759 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
760 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
762
764 rename(F);
765 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
766 return true;
767 }
768 return false; // No other 'x86.xop.*'
769 }
770
771 if (Name == "seh.recoverfp") {
772 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
773 Intrinsic::eh_recoverfp);
774 return true;
775 }
776
777 return false;
778}
779
780// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
781// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
783 StringRef Name,
784 Function *&NewFn) {
785 if (Name.starts_with("rbit")) {
786 // '(arm|aarch64).rbit'.
788 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
789 return true;
790 }
791
792 if (Name == "thread.pointer") {
793 // '(arm|aarch64).thread.pointer'.
795 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
796 return true;
797 }
798
799 bool Neon = Name.consume_front("neon.");
800 if (Neon) {
801 // '(arm|aarch64).neon.*'.
802 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
803 // v16i8 respectively.
804 if (Name.consume_front("bfdot.")) {
805 // (arm|aarch64).neon.bfdot.*'.
808 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
809 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
810 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
813 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
814 assert((OperandWidth == 64 || OperandWidth == 128) &&
815 "Unexpected operand width");
816 LLVMContext &Ctx = F->getParent()->getContext();
817 std::array<Type *, 2> Tys{
818 {F->getReturnType(),
819 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
820 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
821 return true;
822 }
823 return false; // No other '(arm|aarch64).neon.bfdot.*'.
824 }
825
826 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
827 // anymore and accept v8bf16 instead of v16i8.
828 if (Name.consume_front("bfm")) {
829 // (arm|aarch64).neon.bfm*'.
830 if (Name.consume_back(".v4f32.v16i8")) {
831 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
834 .Case("mla",
835 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
836 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
837 .Case("lalb",
838 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
839 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
840 .Case("lalt",
841 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
842 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
845 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
846 return true;
847 }
848 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
849 }
850 return false; // No other '(arm|aarch64).neon.bfm*.
851 }
852 // Continue on to Aarch64 Neon or Arm Neon.
853 }
854 // Continue on to Arm or Aarch64.
855
856 if (IsArm) {
857 // 'arm.*'.
858 if (Neon) {
859 // 'arm.neon.*'.
861 .StartsWith("vclz.", Intrinsic::ctlz)
862 .StartsWith("vcnt.", Intrinsic::ctpop)
863 .StartsWith("vqadds.", Intrinsic::sadd_sat)
864 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
865 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
866 .StartsWith("vqsubu.", Intrinsic::usub_sat)
867 .StartsWith("vrinta.", Intrinsic::round)
868 .StartsWith("vrintn.", Intrinsic::roundeven)
869 .StartsWith("vrintm.", Intrinsic::floor)
870 .StartsWith("vrintp.", Intrinsic::ceil)
871 .StartsWith("vrintx.", Intrinsic::rint)
872 .StartsWith("vrintz.", Intrinsic::trunc)
875 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
876 F->arg_begin()->getType());
877 return true;
878 }
879
880 if (Name.consume_front("vst")) {
881 // 'arm.neon.vst*'.
882 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
884 if (vstRegex.match(Name, &Groups)) {
885 static const Intrinsic::ID StoreInts[] = {
886 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
887 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
888
889 static const Intrinsic::ID StoreLaneInts[] = {
890 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
891 Intrinsic::arm_neon_vst4lane};
892
893 auto fArgs = F->getFunctionType()->params();
894 Type *Tys[] = {fArgs[0], fArgs[1]};
895 if (Groups[1].size() == 1)
897 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
898 else
900 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
901 return true;
902 }
903 return false; // No other 'arm.neon.vst*'.
904 }
905
906 return false; // No other 'arm.neon.*'.
907 }
908
909 if (Name.consume_front("mve.")) {
910 // 'arm.mve.*'.
911 if (Name == "vctp64") {
912 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
913 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
914 // the function and deal with it below in UpgradeIntrinsicCall.
915 rename(F);
916 return true;
917 }
918 return false; // Not 'arm.mve.vctp64'.
919 }
920
921 if (Name.starts_with("vrintn.v")) {
923 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
924 return true;
925 }
926
927 // These too are changed to accept a v2i1 instead of the old v4i1.
928 if (Name.consume_back(".v4i1")) {
929 // 'arm.mve.*.v4i1'.
930 if (Name.consume_back(".predicated.v2i64.v4i32"))
931 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
932 return Name == "mull.int" || Name == "vqdmull";
933
934 if (Name.consume_back(".v2i64")) {
935 // 'arm.mve.*.v2i64.v4i1'
936 bool IsGather = Name.consume_front("vldr.gather.");
937 if (IsGather || Name.consume_front("vstr.scatter.")) {
938 if (Name.consume_front("base.")) {
939 // Optional 'wb.' prefix.
940 Name.consume_front("wb.");
941 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
942 // predicated.v2i64.v2i64.v4i1'.
943 return Name == "predicated.v2i64";
944 }
945
946 if (Name.consume_front("offset.predicated."))
947 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
948 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
949
950 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
951 return false;
952 }
953
954 return false; // No other 'arm.mve.*.v2i64.v4i1'.
955 }
956 return false; // No other 'arm.mve.*.v4i1'.
957 }
958 return false; // No other 'arm.mve.*'.
959 }
960
961 if (Name.consume_front("cde.vcx")) {
962 // 'arm.cde.vcx*'.
963 if (Name.consume_back(".predicated.v2i64.v4i1"))
964 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
965 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
966 Name == "3q" || Name == "3qa";
967
968 return false; // No other 'arm.cde.vcx*'.
969 }
970 } else {
971 // 'aarch64.*'.
972 if (Neon) {
973 // 'aarch64.neon.*'.
975 .StartsWith("frintn", Intrinsic::roundeven)
976 .StartsWith("rbit", Intrinsic::bitreverse)
979 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
980 F->arg_begin()->getType());
981 return true;
982 }
983
984 if (Name.starts_with("addp")) {
985 // 'aarch64.neon.addp*'.
986 if (F->arg_size() != 2)
987 return false; // Invalid IR.
988 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
989 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
991 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
992 return true;
993 }
994 }
995
996 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
997 if (Name.starts_with("bfcvt")) {
998 NewFn = nullptr;
999 return true;
1000 }
1001
1002 return false; // No other 'aarch64.neon.*'.
1003 }
1004 if (Name.consume_front("sve.")) {
1005 // 'aarch64.sve.*'.
1006 if (Name.consume_front("bf")) {
1007 if (Name == "mmla") {
1008 Type *Tys[] = {F->getReturnType(),
1009 std::next(F->arg_begin())->getType()};
1011 F->getParent(), Intrinsic::aarch64_sve_fmmla, Tys);
1012 return true;
1013 }
1014 if (Name.consume_back(".lane")) {
1015 // 'aarch64.sve.bf*.lane'.
1018 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1019 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1020 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1023 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1024 return true;
1025 }
1026 return false; // No other 'aarch64.sve.bf*.lane'.
1027 }
1028 return false; // No other 'aarch64.sve.bf*'.
1029 }
1030
1031 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1032 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1033 NewFn = nullptr;
1034 return true;
1035 }
1036
1037 if (Name.consume_front("addqv")) {
1038 // 'aarch64.sve.addqv'.
1039 if (!F->getReturnType()->isFPOrFPVectorTy())
1040 return false;
1041
1042 auto Args = F->getFunctionType()->params();
1043 Type *Tys[] = {F->getReturnType(), Args[1]};
1045 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1046 return true;
1047 }
1048
1049 if (Name.consume_front("ld")) {
1050 // 'aarch64.sve.ld*'.
1051 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1052 if (LdRegex.match(Name)) {
1053 Type *ScalarTy =
1054 cast<VectorType>(F->getReturnType())->getElementType();
1055 ElementCount EC =
1056 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1057 assert(F->arg_size() == 2 &&
1058 "Expected 2 arguments for ld* intrinsic.");
1059 Type *PtrTy = F->getArg(1)->getType();
1060 Type *Ty = VectorType::get(ScalarTy, EC);
1061 static const Intrinsic::ID LoadIDs[] = {
1062 Intrinsic::aarch64_sve_ld2_sret,
1063 Intrinsic::aarch64_sve_ld3_sret,
1064 Intrinsic::aarch64_sve_ld4_sret,
1065 };
1067 F->getParent(), LoadIDs[Name[0] - '2'], {Ty, PtrTy});
1068 return true;
1069 }
1070 return false; // No other 'aarch64.sve.ld*'.
1071 }
1072
1073 if (Name.consume_front("tuple.")) {
1074 // 'aarch64.sve.tuple.*'.
1075 if (Name.starts_with("get")) {
1076 // 'aarch64.sve.tuple.get*'.
1077 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1079 F->getParent(), Intrinsic::vector_extract, Tys);
1080 return true;
1081 }
1082
1083 if (Name.starts_with("set")) {
1084 // 'aarch64.sve.tuple.set*'.
1085 auto Args = F->getFunctionType()->params();
1086 Type *Tys[] = {Args[0], Args[2], Args[1]};
1088 F->getParent(), Intrinsic::vector_insert, Tys);
1089 return true;
1090 }
1091
1092 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1093 if (CreateTupleRegex.match(Name)) {
1094 // 'aarch64.sve.tuple.create*'.
1095 auto Args = F->getFunctionType()->params();
1096 Type *Tys[] = {F->getReturnType(), Args[1]};
1098 F->getParent(), Intrinsic::vector_insert, Tys);
1099 return true;
1100 }
1101 return false; // No other 'aarch64.sve.tuple.*'.
1102 }
1103
1104 if (Name.starts_with("rev.nxv")) {
1105 // 'aarch64.sve.rev.<Ty>'
1107 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1108 return true;
1109 }
1110
1111 return false; // No other 'aarch64.sve.*'.
1112 }
1113 }
1114 return false; // No other 'arm.*', 'aarch64.*'.
1115}
1116
1118 StringRef Name) {
1119 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1122 .Case("im2col.3d",
1123 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1124 .Case("im2col.4d",
1125 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1126 .Case("im2col.5d",
1127 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1128 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1129 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1130 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1131 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1132 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1134
1136 return ID;
1137
1138 // These intrinsics may need upgrade for two reasons:
1139 // (1) When the address-space of the first argument is shared[AS=3]
1140 // (and we upgrade it to use shared_cluster address-space[AS=7])
1141 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1143 return ID;
1144
1145 // (2) When there are only two boolean flag arguments at the end:
1146 //
1147 // The last three parameters of the older version of these
1148 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1149 //
1150 // The newer version reads as:
1151 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1152 //
1153 // So, when the type of the [N-3]rd argument is "not i1", then
1154 // it is the older version and we need to upgrade.
1155 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1156 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1157 if (!ArgType->isIntegerTy(1))
1158 return ID;
1159 }
1160
1162}
1163
1165 StringRef Name) {
1166 if (Name.consume_front("mapa.shared.cluster"))
1167 if (F->getReturnType()->getPointerAddressSpace() ==
1169 return Intrinsic::nvvm_mapa_shared_cluster;
1170
1171 if (Name.consume_front("cp.async.bulk.")) {
1174 .Case("global.to.shared.cluster",
1175 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1176 .Case("shared.cta.to.cluster",
1177 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1179
1181 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1183 return ID;
1184 }
1185
1187}
1188
1190 if (Name.consume_front("fma.rn."))
1191 return StringSwitch<Intrinsic::ID>(Name)
1192 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1193 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1194 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1195 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1197
1198 if (Name.consume_front("fmax."))
1199 return StringSwitch<Intrinsic::ID>(Name)
1200 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1201 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1202 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1203 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1204 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1205 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1206 .Case("ftz.nan.xorsign.abs.bf16",
1207 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1208 .Case("ftz.nan.xorsign.abs.bf16x2",
1209 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1210 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1211 .Case("ftz.xorsign.abs.bf16x2",
1212 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1213 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1214 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1215 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1216 .Case("nan.xorsign.abs.bf16x2",
1217 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1218 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1219 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1221
1222 if (Name.consume_front("fmin."))
1223 return StringSwitch<Intrinsic::ID>(Name)
1224 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1225 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1226 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1227 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1228 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1229 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1230 .Case("ftz.nan.xorsign.abs.bf16",
1231 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1232 .Case("ftz.nan.xorsign.abs.bf16x2",
1233 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1234 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1235 .Case("ftz.xorsign.abs.bf16x2",
1236 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1237 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1238 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1239 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1240 .Case("nan.xorsign.abs.bf16x2",
1241 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1242 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1243 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1245
1246 if (Name.consume_front("neg."))
1247 return StringSwitch<Intrinsic::ID>(Name)
1248 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1249 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1251
1253}
1254
1256 return Name.consume_front("local") || Name.consume_front("shared") ||
1257 Name.consume_front("global") || Name.consume_front("constant") ||
1258 Name.consume_front("param");
1259}
1260
1262 const FunctionType *FuncTy) {
1263 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1264 if (Name.starts_with("to.fp16")) {
1265 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1266 HalfTy) &&
1267 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1268 FuncTy->getReturnType());
1269 }
1270
1271 if (Name.starts_with("from.fp16")) {
1272 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1273 HalfTy) &&
1274 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1275 FuncTy->getReturnType());
1276 }
1277
1278 return false;
1279}
1280
1282 bool CanUpgradeDebugIntrinsicsToRecords) {
1283 assert(F && "Illegal to upgrade a non-existent Function.");
1284
1285 StringRef Name = F->getName();
1286
1287 // Quickly eliminate it, if it's not a candidate.
1288 if (!Name.consume_front("llvm.") || Name.empty())
1289 return false;
1290
1291 switch (Name[0]) {
1292 default: break;
1293 case 'a': {
1294 bool IsArm = Name.consume_front("arm.");
1295 if (IsArm || Name.consume_front("aarch64.")) {
1296 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1297 return true;
1298 break;
1299 }
1300
1301 if (Name.consume_front("amdgcn.")) {
1302 if (Name == "alignbit") {
1303 // Target specific intrinsic became redundant
1305 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1306 return true;
1307 }
1308
1309 if (Name.consume_front("atomic.")) {
1310 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1311 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1312 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1313 // and usub_sat so there's no new declaration.
1314 NewFn = nullptr;
1315 return true;
1316 }
1317 break; // No other 'amdgcn.atomic.*'
1318 }
1319
1320 switch (F->getIntrinsicID()) {
1321 default:
1322 break;
1323 // Legacy wmma iu intrinsics without the optional clamp operand.
1324 case Intrinsic::amdgcn_wmma_i32_16x16x64_iu8:
1325 if (F->arg_size() == 7) {
1326 NewFn = nullptr;
1327 return true;
1328 }
1329 break;
1330 case Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8:
1331 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
1332 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
1333 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
1334 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
1335 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
1336 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16:
1337 if (F->arg_size() == 8) {
1338 NewFn = nullptr;
1339 return true;
1340 }
1341 break;
1342 }
1343
1344 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1345 Name.consume_front("flat.atomic.")) {
1346 if (Name.starts_with("fadd") ||
1347 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1348 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1349 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1350 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1351 // declaration.
1352 NewFn = nullptr;
1353 return true;
1354 }
1355 }
1356
1357 if (Name.starts_with("ldexp.")) {
1358 // Target specific intrinsic became redundant
1360 F->getParent(), Intrinsic::ldexp,
1361 {F->getReturnType(), F->getArg(1)->getType()});
1362 return true;
1363 }
1364 break; // No other 'amdgcn.*'
1365 }
1366
1367 break;
1368 }
1369 case 'c': {
1370 if (F->arg_size() == 1) {
1371 if (Name.consume_front("convert.")) {
1372 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1373 NewFn = nullptr;
1374 return true;
1375 }
1376 }
1377
1379 .StartsWith("ctlz.", Intrinsic::ctlz)
1380 .StartsWith("cttz.", Intrinsic::cttz)
1383 rename(F);
1384 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1385 F->arg_begin()->getType());
1386 return true;
1387 }
1388 }
1389
1390 if (F->arg_size() == 2 && Name == "coro.end") {
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1393 Intrinsic::coro_end);
1394 return true;
1395 }
1396
1397 break;
1398 }
1399 case 'd':
1400 if (Name.consume_front("dbg.")) {
1401 // Mark debug intrinsics for upgrade to new debug format.
1402 if (CanUpgradeDebugIntrinsicsToRecords) {
1403 if (Name == "addr" || Name == "value" || Name == "assign" ||
1404 Name == "declare" || Name == "label") {
1405 // There's no function to replace these with.
1406 NewFn = nullptr;
1407 // But we do want these to get upgraded.
1408 return true;
1409 }
1410 }
1411 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1412 // converted to DbgVariableRecords later.
1413 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1414 rename(F);
1415 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1416 Intrinsic::dbg_value);
1417 return true;
1418 }
1419 break; // No other 'dbg.*'.
1420 }
1421 break;
1422 case 'e':
1423 if (Name.consume_front("experimental.vector.")) {
1426 // Skip over extract.last.active, otherwise it will be 'upgraded'
1427 // to a regular vector extract which is a different operation.
1428 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1429 .StartsWith("extract.", Intrinsic::vector_extract)
1430 .StartsWith("insert.", Intrinsic::vector_insert)
1431 .StartsWith("reverse.", Intrinsic::vector_reverse)
1432 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1433 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1434 .StartsWith("partial.reduce.add",
1435 Intrinsic::vector_partial_reduce_add)
1438 const auto *FT = F->getFunctionType();
1440 if (ID == Intrinsic::vector_extract ||
1441 ID == Intrinsic::vector_interleave2)
1442 // Extracting overloads the return type.
1443 Tys.push_back(FT->getReturnType());
1444 if (ID != Intrinsic::vector_interleave2)
1445 Tys.push_back(FT->getParamType(0));
1446 if (ID == Intrinsic::vector_insert ||
1447 ID == Intrinsic::vector_partial_reduce_add)
1448 // Inserting overloads the inserted type.
1449 Tys.push_back(FT->getParamType(1));
1450 rename(F);
1451 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1452 return true;
1453 }
1454
1455 if (Name.consume_front("reduce.")) {
1457 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1458 if (R.match(Name, &Groups))
1460 .Case("add", Intrinsic::vector_reduce_add)
1461 .Case("mul", Intrinsic::vector_reduce_mul)
1462 .Case("and", Intrinsic::vector_reduce_and)
1463 .Case("or", Intrinsic::vector_reduce_or)
1464 .Case("xor", Intrinsic::vector_reduce_xor)
1465 .Case("smax", Intrinsic::vector_reduce_smax)
1466 .Case("smin", Intrinsic::vector_reduce_smin)
1467 .Case("umax", Intrinsic::vector_reduce_umax)
1468 .Case("umin", Intrinsic::vector_reduce_umin)
1469 .Case("fmax", Intrinsic::vector_reduce_fmax)
1470 .Case("fmin", Intrinsic::vector_reduce_fmin)
1472
1473 bool V2 = false;
1475 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1476 Groups.clear();
1477 V2 = true;
1478 if (R2.match(Name, &Groups))
1480 .Case("fadd", Intrinsic::vector_reduce_fadd)
1481 .Case("fmul", Intrinsic::vector_reduce_fmul)
1483 }
1485 rename(F);
1486 auto Args = F->getFunctionType()->params();
1487 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1488 {Args[V2 ? 1 : 0]});
1489 return true;
1490 }
1491 break; // No other 'expermental.vector.reduce.*'.
1492 }
1493
1494 if (Name.consume_front("splice"))
1495 return true;
1496 break; // No other 'experimental.vector.*'.
1497 }
1498 if (Name.consume_front("experimental.stepvector.")) {
1499 Intrinsic::ID ID = Intrinsic::stepvector;
1500 rename(F);
1502 F->getParent(), ID, F->getFunctionType()->getReturnType());
1503 return true;
1504 }
1505 break; // No other 'e*'.
1506 case 'f':
1507 if (Name.starts_with("flt.rounds")) {
1508 rename(F);
1509 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1510 Intrinsic::get_rounding);
1511 return true;
1512 }
1513 break;
1514 case 'i':
1515 if (Name.starts_with("invariant.group.barrier")) {
1516 // Rename invariant.group.barrier to launder.invariant.group
1517 auto Args = F->getFunctionType()->params();
1518 Type* ObjectPtr[1] = {Args[0]};
1519 rename(F);
1521 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1522 return true;
1523 }
1524 break;
1525 case 'l':
1526 if ((Name.starts_with("lifetime.start") ||
1527 Name.starts_with("lifetime.end")) &&
1528 F->arg_size() == 2) {
1529 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1530 ? Intrinsic::lifetime_start
1531 : Intrinsic::lifetime_end;
1532 rename(F);
1533 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1534 F->getArg(0)->getType());
1535 return true;
1536 }
1537 break;
1538 case 'm': {
1539 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1540 // alignment parameter to embedding the alignment as an attribute of
1541 // the pointer args.
1542 if (unsigned ID = StringSwitch<unsigned>(Name)
1543 .StartsWith("memcpy.", Intrinsic::memcpy)
1544 .StartsWith("memmove.", Intrinsic::memmove)
1545 .Default(0)) {
1546 if (F->arg_size() == 5) {
1547 rename(F);
1548 // Get the types of dest, src, and len
1549 ArrayRef<Type *> ParamTypes =
1550 F->getFunctionType()->params().slice(0, 3);
1551 NewFn =
1552 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1553 return true;
1554 }
1555 }
1556 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1557 rename(F);
1558 // Get the types of dest, and len
1559 const auto *FT = F->getFunctionType();
1560 Type *ParamTypes[2] = {
1561 FT->getParamType(0), // Dest
1562 FT->getParamType(2) // len
1563 };
1564 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1565 Intrinsic::memset, ParamTypes);
1566 return true;
1567 }
1568
1569 unsigned MaskedID =
1571 .StartsWith("masked.load", Intrinsic::masked_load)
1572 .StartsWith("masked.gather", Intrinsic::masked_gather)
1573 .StartsWith("masked.store", Intrinsic::masked_store)
1574 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1575 .Default(0);
1576 if (MaskedID && F->arg_size() == 4) {
1577 rename(F);
1578 if (MaskedID == Intrinsic::masked_load ||
1579 MaskedID == Intrinsic::masked_gather) {
1581 F->getParent(), MaskedID,
1582 {F->getReturnType(), F->getArg(0)->getType()});
1583 return true;
1584 }
1586 F->getParent(), MaskedID,
1587 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1588 return true;
1589 }
1590 break;
1591 }
1592 case 'n': {
1593 if (Name.consume_front("nvvm.")) {
1594 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1595 if (F->arg_size() == 1) {
1596 Intrinsic::ID IID =
1598 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1599 .Case("clz.i", Intrinsic::ctlz)
1600 .Case("popc.i", Intrinsic::ctpop)
1602 if (IID != Intrinsic::not_intrinsic) {
1603 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1604 {F->getReturnType()});
1605 return true;
1606 }
1607 } else if (F->arg_size() == 2) {
1608 Intrinsic::ID IID =
1610 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1611 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1612 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1613 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1615 if (IID != Intrinsic::not_intrinsic) {
1616 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1617 {F->getReturnType()});
1618 return true;
1619 }
1620 }
1621
1622 // Check for nvvm intrinsics that need a return type adjustment.
1623 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1625 if (IID != Intrinsic::not_intrinsic) {
1626 NewFn = nullptr;
1627 return true;
1628 }
1629 }
1630
1631 // Upgrade Distributed Shared Memory Intrinsics
1633 if (IID != Intrinsic::not_intrinsic) {
1634 rename(F);
1635 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1636 return true;
1637 }
1638
1639 // Upgrade TMA copy G2S Intrinsics
1641 if (IID != Intrinsic::not_intrinsic) {
1642 rename(F);
1643 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1644 return true;
1645 }
1646
1647 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1648 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1649 //
1650 // TODO: We could add lohi.i2d.
1651 bool Expand = false;
1652 if (Name.consume_front("abs."))
1653 // nvvm.abs.{i,ii}
1654 Expand =
1655 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1656 else if (Name.consume_front("fabs."))
1657 // nvvm.fabs.{f,ftz.f,d}
1658 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1659 else if (Name.consume_front("ex2.approx."))
1660 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1661 Expand =
1662 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1663 else if (Name.consume_front("atomic.load."))
1664 // nvvm.atomic.load.add.{f32,f64}.p
1665 // nvvm.atomic.load.{inc,dec}.32.p
1666 Expand = StringSwitch<bool>(Name)
1667 .StartsWith("add.f32.p", true)
1668 .StartsWith("add.f64.p", true)
1669 .StartsWith("inc.32.p", true)
1670 .StartsWith("dec.32.p", true)
1671 .Default(false);
1672 else if (Name.consume_front("bitcast."))
1673 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1674 Expand =
1675 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1676 else if (Name.consume_front("rotate."))
1677 // nvvm.rotate.{b32,b64,right.b64}
1678 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1679 else if (Name.consume_front("ptr.gen.to."))
1680 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1681 Expand = consumeNVVMPtrAddrSpace(Name);
1682 else if (Name.consume_front("ptr."))
1683 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1684 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1685 else if (Name.consume_front("ldg.global."))
1686 // nvvm.ldg.global.{i,p,f}
1687 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1688 Name.starts_with("p."));
1689 else
1690 Expand = StringSwitch<bool>(Name)
1691 .Case("barrier0", true)
1692 .Case("barrier.n", true)
1693 .Case("barrier.sync.cnt", true)
1694 .Case("barrier.sync", true)
1695 .Case("barrier", true)
1696 .Case("bar.sync", true)
1697 .Case("barrier0.popc", true)
1698 .Case("barrier0.and", true)
1699 .Case("barrier0.or", true)
1700 .Case("clz.ll", true)
1701 .Case("popc.ll", true)
1702 .Case("h2f", true)
1703 .Case("swap.lo.hi.b64", true)
1704 .Case("tanh.approx.f32", true)
1705 .Default(false);
1706
1707 if (Expand) {
1708 NewFn = nullptr;
1709 return true;
1710 }
1711 break; // No other 'nvvm.*'.
1712 }
1713 break;
1714 }
1715 case 'o':
1716 if (Name.starts_with("objectsize.")) {
1717 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1718 if (F->arg_size() == 2 || F->arg_size() == 3) {
1719 rename(F);
1720 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1721 Intrinsic::objectsize, Tys);
1722 return true;
1723 }
1724 }
1725 break;
1726
1727 case 'p':
1728 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1729 rename(F);
1731 F->getParent(), Intrinsic::ptr_annotation,
1732 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1733 return true;
1734 }
1735 break;
1736
1737 case 'r': {
1738 if (Name.consume_front("riscv.")) {
1741 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1742 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1743 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1744 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1747 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1748 rename(F);
1749 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1750 return true;
1751 }
1752 break; // No other applicable upgrades.
1753 }
1754
1756 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1757 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1760 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1761 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1762 rename(F);
1763 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1764 return true;
1765 }
1766 break; // No other applicable upgrades.
1767 }
1768
1770 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1771 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1772 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1773 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1774 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1775 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1778 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1779 rename(F);
1780 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1781 return true;
1782 }
1783 break; // No other applicable upgrades.
1784 }
1785
1786 // Replace llvm.riscv.clmul with llvm.clmul.
1787 if (Name == "clmul.i32" || Name == "clmul.i64") {
1789 F->getParent(), Intrinsic::clmul, {F->getReturnType()});
1790 return true;
1791 }
1792
1793 break; // No other 'riscv.*' intrinsics
1794 }
1795 } break;
1796
1797 case 's':
1798 if (Name == "stackprotectorcheck") {
1799 NewFn = nullptr;
1800 return true;
1801 }
1802 break;
1803
1804 case 't':
1805 if (Name == "thread.pointer") {
1807 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1808 return true;
1809 }
1810 break;
1811
1812 case 'v': {
1813 if (Name == "var.annotation" && F->arg_size() == 4) {
1814 rename(F);
1816 F->getParent(), Intrinsic::var_annotation,
1817 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1818 return true;
1819 }
1820 if (Name.consume_front("vector.splice")) {
1821 if (Name.starts_with(".left") || Name.starts_with(".right"))
1822 break;
1823 return true;
1824 }
1825 break;
1826 }
1827
1828 case 'w':
1829 if (Name.consume_front("wasm.")) {
1832 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1833 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1834 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1837 rename(F);
1838 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1839 F->getReturnType());
1840 return true;
1841 }
1842
1843 if (Name.consume_front("dot.i8x16.i7x16.")) {
1845 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1846 .Case("add.signed",
1847 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1850 rename(F);
1851 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1852 return true;
1853 }
1854 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1855 }
1856 break; // No other 'wasm.*'.
1857 }
1858 break;
1859
1860 case 'x':
1861 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1862 return true;
1863 }
1864
1865 auto *ST = dyn_cast<StructType>(F->getReturnType());
1866 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1867 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1868 // Replace return type with literal non-packed struct. Only do this for
1869 // intrinsics declared to return a struct, not for intrinsics with
1870 // overloaded return type, in which case the exact struct type will be
1871 // mangled into the name.
1874 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1875 auto *FT = F->getFunctionType();
1876 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1877 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1878 std::string Name = F->getName().str();
1879 rename(F);
1880 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1881 Name, F->getParent());
1882
1883 // The new function may also need remangling.
1884 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1885 NewFn = *Result;
1886 return true;
1887 }
1888 }
1889
1890 // Remangle our intrinsic since we upgrade the mangling
1892 if (Result != std::nullopt) {
1893 NewFn = *Result;
1894 return true;
1895 }
1896
1897 // This may not belong here. This function is effectively being overloaded
1898 // to both detect an intrinsic which needs upgrading, and to provide the
1899 // upgraded form of the intrinsic. We should perhaps have two separate
1900 // functions for this.
1901 return false;
1902}
1903
1905 bool CanUpgradeDebugIntrinsicsToRecords) {
1906 NewFn = nullptr;
1907 bool Upgraded =
1908 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1909
1910 // Upgrade intrinsic attributes. This does not change the function.
1911 if (NewFn)
1912 F = NewFn;
1913 if (Intrinsic::ID id = F->getIntrinsicID()) {
1914 // Only do this if the intrinsic signature is valid.
1915 SmallVector<Type *> OverloadTys;
1916 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1917 F->setAttributes(
1918 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1919 }
1920 return Upgraded;
1921}
1922
1924 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1925 GV->getName() == "llvm.global_dtors")) ||
1926 !GV->hasInitializer())
1927 return nullptr;
1929 if (!ATy)
1930 return nullptr;
1932 if (!STy || STy->getNumElements() != 2)
1933 return nullptr;
1934
1935 LLVMContext &C = GV->getContext();
1936 IRBuilder<> IRB(C);
1937 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1938 IRB.getPtrTy());
1939 Constant *Init = GV->getInitializer();
1940 unsigned N = Init->getNumOperands();
1941 std::vector<Constant *> NewCtors(N);
1942 for (unsigned i = 0; i != N; ++i) {
1943 auto Ctor = cast<Constant>(Init->getOperand(i));
1944 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1945 Ctor->getAggregateElement(1),
1947 }
1948 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1949
1950 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1951 NewInit, GV->getName());
1952}
1953
1954// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1955// to byte shuffles.
1957 unsigned Shift) {
1958 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1959 unsigned NumElts = ResultTy->getNumElements() * 8;
1960
1961 // Bitcast from a 64-bit element type to a byte element type.
1962 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1963 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1964
1965 // We'll be shuffling in zeroes.
1966 Value *Res = Constant::getNullValue(VecTy);
1967
1968 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1969 // we'll just return the zero vector.
1970 if (Shift < 16) {
1971 int Idxs[64];
1972 // 256/512-bit version is split into 2/4 16-byte lanes.
1973 for (unsigned l = 0; l != NumElts; l += 16)
1974 for (unsigned i = 0; i != 16; ++i) {
1975 unsigned Idx = NumElts + i - Shift;
1976 if (Idx < NumElts)
1977 Idx -= NumElts - 16; // end of lane, switch operand.
1978 Idxs[l + i] = Idx + l;
1979 }
1980
1981 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1982 }
1983
1984 // Bitcast back to a 64-bit element type.
1985 return Builder.CreateBitCast(Res, ResultTy, "cast");
1986}
1987
1988// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1989// to byte shuffles.
1991 unsigned Shift) {
1992 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1993 unsigned NumElts = ResultTy->getNumElements() * 8;
1994
1995 // Bitcast from a 64-bit element type to a byte element type.
1996 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1997 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1998
1999 // We'll be shuffling in zeroes.
2000 Value *Res = Constant::getNullValue(VecTy);
2001
2002 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
2003 // we'll just return the zero vector.
2004 if (Shift < 16) {
2005 int Idxs[64];
2006 // 256/512-bit version is split into 2/4 16-byte lanes.
2007 for (unsigned l = 0; l != NumElts; l += 16)
2008 for (unsigned i = 0; i != 16; ++i) {
2009 unsigned Idx = i + Shift;
2010 if (Idx >= 16)
2011 Idx += NumElts - 16; // end of lane, switch operand.
2012 Idxs[l + i] = Idx + l;
2013 }
2014
2015 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
2016 }
2017
2018 // Bitcast back to a 64-bit element type.
2019 return Builder.CreateBitCast(Res, ResultTy, "cast");
2020}
2021
2022static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
2023 unsigned NumElts) {
2024 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
2026 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
2027 Mask = Builder.CreateBitCast(Mask, MaskTy);
2028
2029 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
2030 // i8 and we need to extract down to the right number of elements.
2031 if (NumElts <= 4) {
2032 int Indices[4];
2033 for (unsigned i = 0; i != NumElts; ++i)
2034 Indices[i] = i;
2035 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
2036 "extract");
2037 }
2038
2039 return Mask;
2040}
2041
2042static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2043 Value *Op1) {
2044 // If the mask is all ones just emit the first operation.
2045 if (const auto *C = dyn_cast<Constant>(Mask))
2046 if (C->isAllOnesValue())
2047 return Op0;
2048
2049 Mask = getX86MaskVec(Builder, Mask,
2050 cast<FixedVectorType>(Op0->getType())->getNumElements());
2051 return Builder.CreateSelect(Mask, Op0, Op1);
2052}
2053
2054static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2055 Value *Op1) {
2056 // If the mask is all ones just emit the first operation.
2057 if (const auto *C = dyn_cast<Constant>(Mask))
2058 if (C->isAllOnesValue())
2059 return Op0;
2060
2061 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2062 Mask->getType()->getIntegerBitWidth());
2063 Mask = Builder.CreateBitCast(Mask, MaskTy);
2064 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2065 return Builder.CreateSelect(Mask, Op0, Op1);
2066}
2067
2068// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2069// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2070// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2072 Value *Op1, Value *Shift,
2073 Value *Passthru, Value *Mask,
2074 bool IsVALIGN) {
2075 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2076
2077 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2078 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2079 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2080 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2081
2082 // Mask the immediate for VALIGN.
2083 if (IsVALIGN)
2084 ShiftVal &= (NumElts - 1);
2085
2086 // If palignr is shifting the pair of vectors more than the size of two
2087 // lanes, emit zero.
2088 if (ShiftVal >= 32)
2090
2091 // If palignr is shifting the pair of input vectors more than one lane,
2092 // but less than two lanes, convert to shifting in zeroes.
2093 if (ShiftVal > 16) {
2094 ShiftVal -= 16;
2095 Op1 = Op0;
2097 }
2098
2099 int Indices[64];
2100 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2101 for (unsigned l = 0; l < NumElts; l += 16) {
2102 for (unsigned i = 0; i != 16; ++i) {
2103 unsigned Idx = ShiftVal + i;
2104 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2105 Idx += NumElts - 16; // End of lane, switch operand.
2106 Indices[l + i] = Idx + l;
2107 }
2108 }
2109
2110 Value *Align = Builder.CreateShuffleVector(
2111 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2112
2113 return emitX86Select(Builder, Mask, Align, Passthru);
2114}
2115
2117 bool ZeroMask, bool IndexForm) {
2118 Type *Ty = CI.getType();
2119 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2120 unsigned EltWidth = Ty->getScalarSizeInBits();
2121 bool IsFloat = Ty->isFPOrFPVectorTy();
2122 Intrinsic::ID IID;
2123 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2124 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2125 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2126 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2127 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2128 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2129 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2130 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2131 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2132 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2133 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2134 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2135 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2136 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2137 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2138 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2139 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2140 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2141 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2142 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2143 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2144 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2145 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2146 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2147 else if (VecWidth == 128 && EltWidth == 16)
2148 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2149 else if (VecWidth == 256 && EltWidth == 16)
2150 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2151 else if (VecWidth == 512 && EltWidth == 16)
2152 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2153 else if (VecWidth == 128 && EltWidth == 8)
2154 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2155 else if (VecWidth == 256 && EltWidth == 8)
2156 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2157 else if (VecWidth == 512 && EltWidth == 8)
2158 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2159 else
2160 llvm_unreachable("Unexpected intrinsic");
2161
2162 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2163 CI.getArgOperand(2) };
2164
2165 // If this isn't index form we need to swap operand 0 and 1.
2166 if (!IndexForm)
2167 std::swap(Args[0], Args[1]);
2168
2169 Value *V = Builder.CreateIntrinsic(IID, Args);
2170 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2171 : Builder.CreateBitCast(CI.getArgOperand(1),
2172 Ty);
2173 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2174}
2175
2177 Intrinsic::ID IID) {
2178 Type *Ty = CI.getType();
2179 Value *Op0 = CI.getOperand(0);
2180 Value *Op1 = CI.getOperand(1);
2181 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2182
2183 if (CI.arg_size() == 4) { // For masked intrinsics.
2184 Value *VecSrc = CI.getOperand(2);
2185 Value *Mask = CI.getOperand(3);
2186 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2187 }
2188 return Res;
2189}
2190
2192 bool IsRotateRight) {
2193 Type *Ty = CI.getType();
2194 Value *Src = CI.getArgOperand(0);
2195 Value *Amt = CI.getArgOperand(1);
2196
2197 // Amount may be scalar immediate, in which case create a splat vector.
2198 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2199 // we only care about the lowest log2 bits anyway.
2200 if (Amt->getType() != Ty) {
2201 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2202 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2203 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2204 }
2205
2206 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2207 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2208
2209 if (CI.arg_size() == 4) { // For masked intrinsics.
2210 Value *VecSrc = CI.getOperand(2);
2211 Value *Mask = CI.getOperand(3);
2212 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2213 }
2214 return Res;
2215}
2216
2217static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2218 bool IsSigned) {
2219 Type *Ty = CI.getType();
2220 Value *LHS = CI.getArgOperand(0);
2221 Value *RHS = CI.getArgOperand(1);
2222
2223 CmpInst::Predicate Pred;
2224 switch (Imm) {
2225 case 0x0:
2226 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2227 break;
2228 case 0x1:
2229 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2230 break;
2231 case 0x2:
2232 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2233 break;
2234 case 0x3:
2235 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2236 break;
2237 case 0x4:
2238 Pred = ICmpInst::ICMP_EQ;
2239 break;
2240 case 0x5:
2241 Pred = ICmpInst::ICMP_NE;
2242 break;
2243 case 0x6:
2244 return Constant::getNullValue(Ty); // FALSE
2245 case 0x7:
2246 return Constant::getAllOnesValue(Ty); // TRUE
2247 default:
2248 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2249 }
2250
2251 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2252 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2253 return Ext;
2254}
2255
2257 bool IsShiftRight, bool ZeroMask) {
2258 Type *Ty = CI.getType();
2259 Value *Op0 = CI.getArgOperand(0);
2260 Value *Op1 = CI.getArgOperand(1);
2261 Value *Amt = CI.getArgOperand(2);
2262
2263 if (IsShiftRight)
2264 std::swap(Op0, Op1);
2265
2266 // Amount may be scalar immediate, in which case create a splat vector.
2267 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2268 // we only care about the lowest log2 bits anyway.
2269 if (Amt->getType() != Ty) {
2270 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2271 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2272 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2273 }
2274
2275 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2276 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2277
2278 unsigned NumArgs = CI.arg_size();
2279 if (NumArgs >= 4) { // For masked intrinsics.
2280 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2281 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2282 CI.getArgOperand(0);
2283 Value *Mask = CI.getOperand(NumArgs - 1);
2284 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2285 }
2286 return Res;
2287}
2288
2290 Value *Mask, bool Aligned) {
2291 const Align Alignment =
2292 Aligned
2293 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2294 : Align(1);
2295
2296 // If the mask is all ones just emit a regular store.
2297 if (const auto *C = dyn_cast<Constant>(Mask))
2298 if (C->isAllOnesValue())
2299 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2300
2301 // Convert the mask from an integer type to a vector of i1.
2302 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2303 Mask = getX86MaskVec(Builder, Mask, NumElts);
2304 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2305}
2306
2308 Value *Passthru, Value *Mask, bool Aligned) {
2309 Type *ValTy = Passthru->getType();
2310 const Align Alignment =
2311 Aligned
2312 ? Align(
2314 8)
2315 : Align(1);
2316
2317 // If the mask is all ones just emit a regular store.
2318 if (const auto *C = dyn_cast<Constant>(Mask))
2319 if (C->isAllOnesValue())
2320 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2321
2322 // Convert the mask from an integer type to a vector of i1.
2323 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2324 Mask = getX86MaskVec(Builder, Mask, NumElts);
2325 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2326}
2327
2328static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2329 Type *Ty = CI.getType();
2330 Value *Op0 = CI.getArgOperand(0);
2331 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2332 {Op0, Builder.getInt1(false)});
2333 if (CI.arg_size() == 3)
2334 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2335 return Res;
2336}
2337
2338static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2339 Type *Ty = CI.getType();
2340
2341 // Arguments have a vXi32 type so cast to vXi64.
2342 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2343 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2344
2345 if (IsSigned) {
2346 // Shift left then arithmetic shift right.
2347 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2348 LHS = Builder.CreateShl(LHS, ShiftAmt);
2349 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2350 RHS = Builder.CreateShl(RHS, ShiftAmt);
2351 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2352 } else {
2353 // Clear the upper bits.
2354 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2355 LHS = Builder.CreateAnd(LHS, Mask);
2356 RHS = Builder.CreateAnd(RHS, Mask);
2357 }
2358
2359 Value *Res = Builder.CreateMul(LHS, RHS);
2360
2361 if (CI.arg_size() == 4)
2362 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2363
2364 return Res;
2365}
2366
2367// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2369 Value *Mask) {
2370 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2371 if (Mask) {
2372 const auto *C = dyn_cast<Constant>(Mask);
2373 if (!C || !C->isAllOnesValue())
2374 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2375 }
2376
2377 if (NumElts < 8) {
2378 int Indices[8];
2379 for (unsigned i = 0; i != NumElts; ++i)
2380 Indices[i] = i;
2381 for (unsigned i = NumElts; i != 8; ++i)
2382 Indices[i] = NumElts + i % NumElts;
2383 Vec = Builder.CreateShuffleVector(Vec,
2385 Indices);
2386 }
2387 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2388}
2389
2391 unsigned CC, bool Signed) {
2392 Value *Op0 = CI.getArgOperand(0);
2393 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2394
2395 Value *Cmp;
2396 if (CC == 3) {
2398 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2399 } else if (CC == 7) {
2401 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2402 } else {
2404 switch (CC) {
2405 default: llvm_unreachable("Unknown condition code");
2406 case 0: Pred = ICmpInst::ICMP_EQ; break;
2407 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2408 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2409 case 4: Pred = ICmpInst::ICMP_NE; break;
2410 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2411 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2412 }
2413 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2414 }
2415
2416 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2417
2418 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2419}
2420
2421// Replace a masked intrinsic with an older unmasked intrinsic.
2423 Intrinsic::ID IID) {
2424 Value *Rep =
2425 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2426 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2427}
2428
2430 Value* A = CI.getArgOperand(0);
2431 Value* B = CI.getArgOperand(1);
2432 Value* Src = CI.getArgOperand(2);
2433 Value* Mask = CI.getArgOperand(3);
2434
2435 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2436 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2437 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2438 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2439 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2440 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2441}
2442
2444 Value* Op = CI.getArgOperand(0);
2445 Type* ReturnOp = CI.getType();
2446 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2447 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2448 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2449}
2450
2451// Replace intrinsic with unmasked version and a select.
2453 CallBase &CI, Value *&Rep) {
2454 Name = Name.substr(12); // Remove avx512.mask.
2455
2456 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2457 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2458 Intrinsic::ID IID;
2459 if (Name.starts_with("max.p")) {
2460 if (VecWidth == 128 && EltWidth == 32)
2461 IID = Intrinsic::x86_sse_max_ps;
2462 else if (VecWidth == 128 && EltWidth == 64)
2463 IID = Intrinsic::x86_sse2_max_pd;
2464 else if (VecWidth == 256 && EltWidth == 32)
2465 IID = Intrinsic::x86_avx_max_ps_256;
2466 else if (VecWidth == 256 && EltWidth == 64)
2467 IID = Intrinsic::x86_avx_max_pd_256;
2468 else
2469 llvm_unreachable("Unexpected intrinsic");
2470 } else if (Name.starts_with("min.p")) {
2471 if (VecWidth == 128 && EltWidth == 32)
2472 IID = Intrinsic::x86_sse_min_ps;
2473 else if (VecWidth == 128 && EltWidth == 64)
2474 IID = Intrinsic::x86_sse2_min_pd;
2475 else if (VecWidth == 256 && EltWidth == 32)
2476 IID = Intrinsic::x86_avx_min_ps_256;
2477 else if (VecWidth == 256 && EltWidth == 64)
2478 IID = Intrinsic::x86_avx_min_pd_256;
2479 else
2480 llvm_unreachable("Unexpected intrinsic");
2481 } else if (Name.starts_with("pshuf.b.")) {
2482 if (VecWidth == 128)
2483 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2484 else if (VecWidth == 256)
2485 IID = Intrinsic::x86_avx2_pshuf_b;
2486 else if (VecWidth == 512)
2487 IID = Intrinsic::x86_avx512_pshuf_b_512;
2488 else
2489 llvm_unreachable("Unexpected intrinsic");
2490 } else if (Name.starts_with("pmul.hr.sw.")) {
2491 if (VecWidth == 128)
2492 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2493 else if (VecWidth == 256)
2494 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2495 else if (VecWidth == 512)
2496 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2497 else
2498 llvm_unreachable("Unexpected intrinsic");
2499 } else if (Name.starts_with("pmulh.w.")) {
2500 if (VecWidth == 128)
2501 IID = Intrinsic::x86_sse2_pmulh_w;
2502 else if (VecWidth == 256)
2503 IID = Intrinsic::x86_avx2_pmulh_w;
2504 else if (VecWidth == 512)
2505 IID = Intrinsic::x86_avx512_pmulh_w_512;
2506 else
2507 llvm_unreachable("Unexpected intrinsic");
2508 } else if (Name.starts_with("pmulhu.w.")) {
2509 if (VecWidth == 128)
2510 IID = Intrinsic::x86_sse2_pmulhu_w;
2511 else if (VecWidth == 256)
2512 IID = Intrinsic::x86_avx2_pmulhu_w;
2513 else if (VecWidth == 512)
2514 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2515 else
2516 llvm_unreachable("Unexpected intrinsic");
2517 } else if (Name.starts_with("pmaddw.d.")) {
2518 if (VecWidth == 128)
2519 IID = Intrinsic::x86_sse2_pmadd_wd;
2520 else if (VecWidth == 256)
2521 IID = Intrinsic::x86_avx2_pmadd_wd;
2522 else if (VecWidth == 512)
2523 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2524 else
2525 llvm_unreachable("Unexpected intrinsic");
2526 } else if (Name.starts_with("pmaddubs.w.")) {
2527 if (VecWidth == 128)
2528 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2529 else if (VecWidth == 256)
2530 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2531 else if (VecWidth == 512)
2532 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2533 else
2534 llvm_unreachable("Unexpected intrinsic");
2535 } else if (Name.starts_with("packsswb.")) {
2536 if (VecWidth == 128)
2537 IID = Intrinsic::x86_sse2_packsswb_128;
2538 else if (VecWidth == 256)
2539 IID = Intrinsic::x86_avx2_packsswb;
2540 else if (VecWidth == 512)
2541 IID = Intrinsic::x86_avx512_packsswb_512;
2542 else
2543 llvm_unreachable("Unexpected intrinsic");
2544 } else if (Name.starts_with("packssdw.")) {
2545 if (VecWidth == 128)
2546 IID = Intrinsic::x86_sse2_packssdw_128;
2547 else if (VecWidth == 256)
2548 IID = Intrinsic::x86_avx2_packssdw;
2549 else if (VecWidth == 512)
2550 IID = Intrinsic::x86_avx512_packssdw_512;
2551 else
2552 llvm_unreachable("Unexpected intrinsic");
2553 } else if (Name.starts_with("packuswb.")) {
2554 if (VecWidth == 128)
2555 IID = Intrinsic::x86_sse2_packuswb_128;
2556 else if (VecWidth == 256)
2557 IID = Intrinsic::x86_avx2_packuswb;
2558 else if (VecWidth == 512)
2559 IID = Intrinsic::x86_avx512_packuswb_512;
2560 else
2561 llvm_unreachable("Unexpected intrinsic");
2562 } else if (Name.starts_with("packusdw.")) {
2563 if (VecWidth == 128)
2564 IID = Intrinsic::x86_sse41_packusdw;
2565 else if (VecWidth == 256)
2566 IID = Intrinsic::x86_avx2_packusdw;
2567 else if (VecWidth == 512)
2568 IID = Intrinsic::x86_avx512_packusdw_512;
2569 else
2570 llvm_unreachable("Unexpected intrinsic");
2571 } else if (Name.starts_with("vpermilvar.")) {
2572 if (VecWidth == 128 && EltWidth == 32)
2573 IID = Intrinsic::x86_avx_vpermilvar_ps;
2574 else if (VecWidth == 128 && EltWidth == 64)
2575 IID = Intrinsic::x86_avx_vpermilvar_pd;
2576 else if (VecWidth == 256 && EltWidth == 32)
2577 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2578 else if (VecWidth == 256 && EltWidth == 64)
2579 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2580 else if (VecWidth == 512 && EltWidth == 32)
2581 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2582 else if (VecWidth == 512 && EltWidth == 64)
2583 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2584 else
2585 llvm_unreachable("Unexpected intrinsic");
2586 } else if (Name == "cvtpd2dq.256") {
2587 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2588 } else if (Name == "cvtpd2ps.256") {
2589 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2590 } else if (Name == "cvttpd2dq.256") {
2591 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2592 } else if (Name == "cvttps2dq.128") {
2593 IID = Intrinsic::x86_sse2_cvttps2dq;
2594 } else if (Name == "cvttps2dq.256") {
2595 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2596 } else if (Name.starts_with("permvar.")) {
2597 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2598 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2599 IID = Intrinsic::x86_avx2_permps;
2600 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2601 IID = Intrinsic::x86_avx2_permd;
2602 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2603 IID = Intrinsic::x86_avx512_permvar_df_256;
2604 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2605 IID = Intrinsic::x86_avx512_permvar_di_256;
2606 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2607 IID = Intrinsic::x86_avx512_permvar_sf_512;
2608 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2609 IID = Intrinsic::x86_avx512_permvar_si_512;
2610 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2611 IID = Intrinsic::x86_avx512_permvar_df_512;
2612 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2613 IID = Intrinsic::x86_avx512_permvar_di_512;
2614 else if (VecWidth == 128 && EltWidth == 16)
2615 IID = Intrinsic::x86_avx512_permvar_hi_128;
2616 else if (VecWidth == 256 && EltWidth == 16)
2617 IID = Intrinsic::x86_avx512_permvar_hi_256;
2618 else if (VecWidth == 512 && EltWidth == 16)
2619 IID = Intrinsic::x86_avx512_permvar_hi_512;
2620 else if (VecWidth == 128 && EltWidth == 8)
2621 IID = Intrinsic::x86_avx512_permvar_qi_128;
2622 else if (VecWidth == 256 && EltWidth == 8)
2623 IID = Intrinsic::x86_avx512_permvar_qi_256;
2624 else if (VecWidth == 512 && EltWidth == 8)
2625 IID = Intrinsic::x86_avx512_permvar_qi_512;
2626 else
2627 llvm_unreachable("Unexpected intrinsic");
2628 } else if (Name.starts_with("dbpsadbw.")) {
2629 if (VecWidth == 128)
2630 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2631 else if (VecWidth == 256)
2632 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2633 else if (VecWidth == 512)
2634 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2635 else
2636 llvm_unreachable("Unexpected intrinsic");
2637 } else if (Name.starts_with("pmultishift.qb.")) {
2638 if (VecWidth == 128)
2639 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2640 else if (VecWidth == 256)
2641 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2642 else if (VecWidth == 512)
2643 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2644 else
2645 llvm_unreachable("Unexpected intrinsic");
2646 } else if (Name.starts_with("conflict.")) {
2647 if (Name[9] == 'd' && VecWidth == 128)
2648 IID = Intrinsic::x86_avx512_conflict_d_128;
2649 else if (Name[9] == 'd' && VecWidth == 256)
2650 IID = Intrinsic::x86_avx512_conflict_d_256;
2651 else if (Name[9] == 'd' && VecWidth == 512)
2652 IID = Intrinsic::x86_avx512_conflict_d_512;
2653 else if (Name[9] == 'q' && VecWidth == 128)
2654 IID = Intrinsic::x86_avx512_conflict_q_128;
2655 else if (Name[9] == 'q' && VecWidth == 256)
2656 IID = Intrinsic::x86_avx512_conflict_q_256;
2657 else if (Name[9] == 'q' && VecWidth == 512)
2658 IID = Intrinsic::x86_avx512_conflict_q_512;
2659 else
2660 llvm_unreachable("Unexpected intrinsic");
2661 } else if (Name.starts_with("pavg.")) {
2662 if (Name[5] == 'b' && VecWidth == 128)
2663 IID = Intrinsic::x86_sse2_pavg_b;
2664 else if (Name[5] == 'b' && VecWidth == 256)
2665 IID = Intrinsic::x86_avx2_pavg_b;
2666 else if (Name[5] == 'b' && VecWidth == 512)
2667 IID = Intrinsic::x86_avx512_pavg_b_512;
2668 else if (Name[5] == 'w' && VecWidth == 128)
2669 IID = Intrinsic::x86_sse2_pavg_w;
2670 else if (Name[5] == 'w' && VecWidth == 256)
2671 IID = Intrinsic::x86_avx2_pavg_w;
2672 else if (Name[5] == 'w' && VecWidth == 512)
2673 IID = Intrinsic::x86_avx512_pavg_w_512;
2674 else
2675 llvm_unreachable("Unexpected intrinsic");
2676 } else
2677 return false;
2678
2679 SmallVector<Value *, 4> Args(CI.args());
2680 Args.pop_back();
2681 Args.pop_back();
2682 Rep = Builder.CreateIntrinsic(IID, Args);
2683 unsigned NumArgs = CI.arg_size();
2684 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2685 CI.getArgOperand(NumArgs - 2));
2686 return true;
2687}
2688
2689/// Upgrade comment in call to inline asm that represents an objc retain release
2690/// marker.
2691void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2692 size_t Pos;
2693 if (AsmStr->find("mov\tfp") == 0 &&
2694 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2695 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2696 AsmStr->replace(Pos, 1, ";");
2697 }
2698}
2699
2701 Function *F, IRBuilder<> &Builder) {
2702 Value *Rep = nullptr;
2703
2704 if (Name == "abs.i" || Name == "abs.ll") {
2705 Value *Arg = CI->getArgOperand(0);
2706 Value *Neg = Builder.CreateNeg(Arg, "neg");
2707 Value *Cmp = Builder.CreateICmpSGE(
2708 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2709 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2710 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2711 Type *Ty = (Name == "abs.bf16")
2712 ? Builder.getBFloatTy()
2713 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2714 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2715 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2716 Rep = Builder.CreateBitCast(Abs, CI->getType());
2717 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2718 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2719 : Intrinsic::nvvm_fabs;
2720 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2721 } else if (Name.consume_front("ex2.approx.")) {
2722 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2723 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2724 : Intrinsic::nvvm_ex2_approx;
2725 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2726 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2727 Name.starts_with("atomic.load.add.f64.p")) {
2728 Value *Ptr = CI->getArgOperand(0);
2729 Value *Val = CI->getArgOperand(1);
2730 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2732 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2733 Name.starts_with("atomic.load.dec.32.p")) {
2734 Value *Ptr = CI->getArgOperand(0);
2735 Value *Val = CI->getArgOperand(1);
2736 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2738 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2740 } else if (Name == "clz.ll") {
2741 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2742 Value *Arg = CI->getArgOperand(0);
2743 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2744 {Arg, Builder.getFalse()},
2745 /*FMFSource=*/nullptr, "ctlz");
2746 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2747 } else if (Name == "popc.ll") {
2748 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2749 // i64.
2750 Value *Arg = CI->getArgOperand(0);
2751 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2752 Arg, /*FMFSource=*/nullptr, "ctpop");
2753 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2754 } else if (Name == "h2f") {
2755 Value *Cast =
2756 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2757 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2758 } else if (Name.consume_front("bitcast.") &&
2759 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2760 Name == "d2ll")) {
2761 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2762 } else if (Name == "rotate.b32") {
2763 Value *Arg = CI->getOperand(0);
2764 Value *ShiftAmt = CI->getOperand(1);
2765 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2766 {Arg, Arg, ShiftAmt});
2767 } else if (Name == "rotate.b64") {
2768 Type *Int64Ty = Builder.getInt64Ty();
2769 Value *Arg = CI->getOperand(0);
2770 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2771 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2772 {Arg, Arg, ZExtShiftAmt});
2773 } else if (Name == "rotate.right.b64") {
2774 Type *Int64Ty = Builder.getInt64Ty();
2775 Value *Arg = CI->getOperand(0);
2776 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2777 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2778 {Arg, Arg, ZExtShiftAmt});
2779 } else if (Name == "swap.lo.hi.b64") {
2780 Type *Int64Ty = Builder.getInt64Ty();
2781 Value *Arg = CI->getOperand(0);
2782 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2783 {Arg, Arg, Builder.getInt64(32)});
2784 } else if ((Name.consume_front("ptr.gen.to.") &&
2785 consumeNVVMPtrAddrSpace(Name)) ||
2786 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2787 Name.starts_with(".to.gen"))) {
2788 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2789 } else if (Name.consume_front("ldg.global")) {
2790 Value *Ptr = CI->getArgOperand(0);
2791 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2792 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2793 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2794 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2795 MDNode *MD = MDNode::get(Builder.getContext(), {});
2796 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2797 return LD;
2798 } else if (Name == "tanh.approx.f32") {
2799 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2800 FastMathFlags FMF;
2801 FMF.setApproxFunc();
2802 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2803 FMF);
2804 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2805 Value *Arg =
2806 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2807 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2808 {}, {Arg});
2809 } else if (Name == "barrier") {
2810 Rep = Builder.CreateIntrinsic(
2811 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2812 {CI->getArgOperand(0), CI->getArgOperand(1)});
2813 } else if (Name == "barrier.sync") {
2814 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2815 {CI->getArgOperand(0)});
2816 } else if (Name == "barrier.sync.cnt") {
2817 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2818 {CI->getArgOperand(0), CI->getArgOperand(1)});
2819 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2820 Name == "barrier0.or") {
2821 Value *C = CI->getArgOperand(0);
2822 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2823
2824 Intrinsic::ID IID =
2826 .Case("barrier0.popc",
2827 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2828 .Case("barrier0.and",
2829 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2830 .Case("barrier0.or",
2831 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2832 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2833 Rep = Builder.CreateZExt(Bar, CI->getType());
2834 } else {
2836 if (IID != Intrinsic::not_intrinsic &&
2837 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2838 rename(F);
2839 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2841 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2842 Value *Arg = CI->getArgOperand(I);
2843 Type *OldType = Arg->getType();
2844 Type *NewType = NewFn->getArg(I)->getType();
2845 Args.push_back(
2846 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2847 ? Builder.CreateBitCast(Arg, NewType)
2848 : Arg);
2849 }
2850 Rep = Builder.CreateCall(NewFn, Args);
2851 if (F->getReturnType()->isIntegerTy())
2852 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2853 }
2854 }
2855
2856 return Rep;
2857}
2858
2860 IRBuilder<> &Builder) {
2861 LLVMContext &C = F->getContext();
2862 Value *Rep = nullptr;
2863
2864 if (Name.starts_with("sse4a.movnt.")) {
2866 Elts.push_back(
2867 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2868 MDNode *Node = MDNode::get(C, Elts);
2869
2870 Value *Arg0 = CI->getArgOperand(0);
2871 Value *Arg1 = CI->getArgOperand(1);
2872
2873 // Nontemporal (unaligned) store of the 0'th element of the float/double
2874 // vector.
2875 Value *Extract =
2876 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2877
2878 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2879 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2880 } else if (Name.starts_with("avx.movnt.") ||
2881 Name.starts_with("avx512.storent.")) {
2883 Elts.push_back(
2884 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2885 MDNode *Node = MDNode::get(C, Elts);
2886
2887 Value *Arg0 = CI->getArgOperand(0);
2888 Value *Arg1 = CI->getArgOperand(1);
2889
2890 StoreInst *SI = Builder.CreateAlignedStore(
2891 Arg1, Arg0,
2893 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2894 } else if (Name == "sse2.storel.dq") {
2895 Value *Arg0 = CI->getArgOperand(0);
2896 Value *Arg1 = CI->getArgOperand(1);
2897
2898 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2899 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2900 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2901 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2902 } else if (Name.starts_with("sse.storeu.") ||
2903 Name.starts_with("sse2.storeu.") ||
2904 Name.starts_with("avx.storeu.")) {
2905 Value *Arg0 = CI->getArgOperand(0);
2906 Value *Arg1 = CI->getArgOperand(1);
2907 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2908 } else if (Name == "avx512.mask.store.ss") {
2909 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2910 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2911 Mask, false);
2912 } else if (Name.starts_with("avx512.mask.store")) {
2913 // "avx512.mask.storeu." or "avx512.mask.store."
2914 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2915 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2916 CI->getArgOperand(2), Aligned);
2917 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2918 // Upgrade packed integer vector compare intrinsics to compare instructions.
2919 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2920 bool CmpEq = Name[9] == 'e';
2921 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2922 CI->getArgOperand(0), CI->getArgOperand(1));
2923 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2924 } else if (Name.starts_with("avx512.broadcastm")) {
2925 Type *ExtTy = Type::getInt32Ty(C);
2926 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2927 ExtTy = Type::getInt64Ty(C);
2928 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2929 ExtTy->getPrimitiveSizeInBits();
2930 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2931 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2932 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2933 Value *Vec = CI->getArgOperand(0);
2934 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2935 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2936 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2937 } else if (Name.starts_with("avx.sqrt.p") ||
2938 Name.starts_with("sse2.sqrt.p") ||
2939 Name.starts_with("sse.sqrt.p")) {
2940 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2941 {CI->getArgOperand(0)});
2942 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2943 if (CI->arg_size() == 4 &&
2944 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2945 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2946 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2947 : Intrinsic::x86_avx512_sqrt_pd_512;
2948
2949 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2950 Rep = Builder.CreateIntrinsic(IID, Args);
2951 } else {
2952 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2953 {CI->getArgOperand(0)});
2954 }
2955 Rep =
2956 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2957 } else if (Name.starts_with("avx512.ptestm") ||
2958 Name.starts_with("avx512.ptestnm")) {
2959 Value *Op0 = CI->getArgOperand(0);
2960 Value *Op1 = CI->getArgOperand(1);
2961 Value *Mask = CI->getArgOperand(2);
2962 Rep = Builder.CreateAnd(Op0, Op1);
2963 llvm::Type *Ty = Op0->getType();
2965 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2968 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2969 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2970 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2971 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2972 ->getNumElements();
2973 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2974 Rep =
2975 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2976 } else if (Name.starts_with("avx512.kunpck")) {
2977 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2978 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2979 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2980 int Indices[64];
2981 for (unsigned i = 0; i != NumElts; ++i)
2982 Indices[i] = i;
2983
2984 // First extract half of each vector. This gives better codegen than
2985 // doing it in a single shuffle.
2986 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2987 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2988 // Concat the vectors.
2989 // NOTE: Operands have to be swapped to match intrinsic definition.
2990 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2991 Rep = Builder.CreateBitCast(Rep, CI->getType());
2992 } else if (Name == "avx512.kand.w") {
2993 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2994 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2995 Rep = Builder.CreateAnd(LHS, RHS);
2996 Rep = Builder.CreateBitCast(Rep, CI->getType());
2997 } else if (Name == "avx512.kandn.w") {
2998 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2999 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3000 LHS = Builder.CreateNot(LHS);
3001 Rep = Builder.CreateAnd(LHS, RHS);
3002 Rep = Builder.CreateBitCast(Rep, CI->getType());
3003 } else if (Name == "avx512.kor.w") {
3004 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3005 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3006 Rep = Builder.CreateOr(LHS, RHS);
3007 Rep = Builder.CreateBitCast(Rep, CI->getType());
3008 } else if (Name == "avx512.kxor.w") {
3009 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3010 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3011 Rep = Builder.CreateXor(LHS, RHS);
3012 Rep = Builder.CreateBitCast(Rep, CI->getType());
3013 } else if (Name == "avx512.kxnor.w") {
3014 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3015 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3016 LHS = Builder.CreateNot(LHS);
3017 Rep = Builder.CreateXor(LHS, RHS);
3018 Rep = Builder.CreateBitCast(Rep, CI->getType());
3019 } else if (Name == "avx512.knot.w") {
3020 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3021 Rep = Builder.CreateNot(Rep);
3022 Rep = Builder.CreateBitCast(Rep, CI->getType());
3023 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
3024 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
3025 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
3026 Rep = Builder.CreateOr(LHS, RHS);
3027 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
3028 Value *C;
3029 if (Name[14] == 'c')
3030 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
3031 else
3032 C = ConstantInt::getNullValue(Builder.getInt16Ty());
3033 Rep = Builder.CreateICmpEQ(Rep, C);
3034 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
3035 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
3036 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
3037 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
3038 Name == "sse.div.ss" || Name == "sse2.div.sd") {
3039 Type *I32Ty = Type::getInt32Ty(C);
3040 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3041 ConstantInt::get(I32Ty, 0));
3042 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3043 ConstantInt::get(I32Ty, 0));
3044 Value *EltOp;
3045 if (Name.contains(".add."))
3046 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3047 else if (Name.contains(".sub."))
3048 EltOp = Builder.CreateFSub(Elt0, Elt1);
3049 else if (Name.contains(".mul."))
3050 EltOp = Builder.CreateFMul(Elt0, Elt1);
3051 else
3052 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3053 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3054 ConstantInt::get(I32Ty, 0));
3055 } else if (Name.starts_with("avx512.mask.pcmp")) {
3056 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3057 bool CmpEq = Name[16] == 'e';
3058 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3059 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3060 Type *OpTy = CI->getArgOperand(0)->getType();
3061 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3062 Intrinsic::ID IID;
3063 switch (VecWidth) {
3064 default:
3065 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3066 break;
3067 case 128:
3068 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3069 break;
3070 case 256:
3071 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3072 break;
3073 case 512:
3074 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3075 break;
3076 }
3077
3078 Rep =
3079 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3080 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3081 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3082 Type *OpTy = CI->getArgOperand(0)->getType();
3083 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3084 unsigned EltWidth = OpTy->getScalarSizeInBits();
3085 Intrinsic::ID IID;
3086 if (VecWidth == 128 && EltWidth == 32)
3087 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3088 else if (VecWidth == 256 && EltWidth == 32)
3089 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3090 else if (VecWidth == 512 && EltWidth == 32)
3091 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3092 else if (VecWidth == 128 && EltWidth == 64)
3093 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3094 else if (VecWidth == 256 && EltWidth == 64)
3095 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3096 else if (VecWidth == 512 && EltWidth == 64)
3097 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3098 else
3099 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3100
3101 Rep =
3102 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3103 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3104 } else if (Name.starts_with("avx512.cmp.p")) {
3105 SmallVector<Value *, 4> Args(CI->args());
3106 Type *OpTy = Args[0]->getType();
3107 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3108 unsigned EltWidth = OpTy->getScalarSizeInBits();
3109 Intrinsic::ID IID;
3110 if (VecWidth == 128 && EltWidth == 32)
3111 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3112 else if (VecWidth == 256 && EltWidth == 32)
3113 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3114 else if (VecWidth == 512 && EltWidth == 32)
3115 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3116 else if (VecWidth == 128 && EltWidth == 64)
3117 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3118 else if (VecWidth == 256 && EltWidth == 64)
3119 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3120 else if (VecWidth == 512 && EltWidth == 64)
3121 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3122 else
3123 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
3124
3126 if (VecWidth == 512)
3127 std::swap(Mask, Args.back());
3128 Args.push_back(Mask);
3129
3130 Rep = Builder.CreateIntrinsic(IID, Args);
3131 } else if (Name.starts_with("avx512.mask.cmp.")) {
3132 // Integer compare intrinsics.
3133 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3134 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3135 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3136 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3137 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3138 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3139 Name.starts_with("avx512.cvtw2mask.") ||
3140 Name.starts_with("avx512.cvtd2mask.") ||
3141 Name.starts_with("avx512.cvtq2mask.")) {
3142 Value *Op = CI->getArgOperand(0);
3143 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3144 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3145 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3146 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3147 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3148 Name.starts_with("avx512.mask.pabs")) {
3149 Rep = upgradeAbs(Builder, *CI);
3150 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3151 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3152 Name.starts_with("avx512.mask.pmaxs")) {
3153 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3154 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3155 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3156 Name.starts_with("avx512.mask.pmaxu")) {
3157 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3158 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3159 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3160 Name.starts_with("avx512.mask.pmins")) {
3161 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3162 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3163 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3164 Name.starts_with("avx512.mask.pminu")) {
3165 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3166 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3167 Name == "avx512.pmulu.dq.512" ||
3168 Name.starts_with("avx512.mask.pmulu.dq.")) {
3169 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3170 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3171 Name == "avx512.pmul.dq.512" ||
3172 Name.starts_with("avx512.mask.pmul.dq.")) {
3173 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3174 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3175 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3176 Rep =
3177 Builder.CreateSIToFP(CI->getArgOperand(1),
3178 cast<VectorType>(CI->getType())->getElementType());
3179 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3180 } else if (Name == "avx512.cvtusi2sd") {
3181 Rep =
3182 Builder.CreateUIToFP(CI->getArgOperand(1),
3183 cast<VectorType>(CI->getType())->getElementType());
3184 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3185 } else if (Name == "sse2.cvtss2sd") {
3186 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3187 Rep = Builder.CreateFPExt(
3188 Rep, cast<VectorType>(CI->getType())->getElementType());
3189 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3190 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3191 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3192 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3193 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3194 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3195 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3196 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3197 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3198 Name == "avx512.mask.cvtqq2ps.256" ||
3199 Name == "avx512.mask.cvtqq2ps.512" ||
3200 Name == "avx512.mask.cvtuqq2ps.256" ||
3201 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3202 Name == "avx.cvt.ps2.pd.256" ||
3203 Name == "avx512.mask.cvtps2pd.128" ||
3204 Name == "avx512.mask.cvtps2pd.256") {
3205 auto *DstTy = cast<FixedVectorType>(CI->getType());
3206 Rep = CI->getArgOperand(0);
3207 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3208
3209 unsigned NumDstElts = DstTy->getNumElements();
3210 if (NumDstElts < SrcTy->getNumElements()) {
3211 assert(NumDstElts == 2 && "Unexpected vector size");
3212 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3213 }
3214
3215 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3216 bool IsUnsigned = Name.contains("cvtu");
3217 if (IsPS2PD)
3218 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3219 else if (CI->arg_size() == 4 &&
3220 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3221 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3222 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3223 : Intrinsic::x86_avx512_sitofp_round;
3224 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3225 {Rep, CI->getArgOperand(3)});
3226 } else {
3227 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3228 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3229 }
3230
3231 if (CI->arg_size() >= 3)
3232 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3233 CI->getArgOperand(1));
3234 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3235 Name.starts_with("vcvtph2ps.")) {
3236 auto *DstTy = cast<FixedVectorType>(CI->getType());
3237 Rep = CI->getArgOperand(0);
3238 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3239 unsigned NumDstElts = DstTy->getNumElements();
3240 if (NumDstElts != SrcTy->getNumElements()) {
3241 assert(NumDstElts == 4 && "Unexpected vector size");
3242 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3243 }
3244 Rep = Builder.CreateBitCast(
3245 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3246 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3247 if (CI->arg_size() >= 3)
3248 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3249 CI->getArgOperand(1));
3250 } else if (Name.starts_with("avx512.mask.load")) {
3251 // "avx512.mask.loadu." or "avx512.mask.load."
3252 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3253 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3254 CI->getArgOperand(2), Aligned);
3255 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3256 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3257 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3258 ResultTy->getNumElements());
3259
3260 Rep = Builder.CreateIntrinsic(
3261 Intrinsic::masked_expandload, ResultTy,
3262 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3263 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3264 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3265 Value *MaskVec =
3266 getX86MaskVec(Builder, CI->getArgOperand(2),
3267 cast<FixedVectorType>(ResultTy)->getNumElements());
3268
3269 Rep = Builder.CreateIntrinsic(
3270 Intrinsic::masked_compressstore, ResultTy,
3271 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3272 } else if (Name.starts_with("avx512.mask.compress.") ||
3273 Name.starts_with("avx512.mask.expand.")) {
3274 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3275
3276 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3277 ResultTy->getNumElements());
3278
3279 bool IsCompress = Name[12] == 'c';
3280 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3281 : Intrinsic::x86_avx512_mask_expand;
3282 Rep = Builder.CreateIntrinsic(
3283 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3284 } else if (Name.starts_with("xop.vpcom")) {
3285 bool IsSigned;
3286 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3287 Name.ends_with("uq"))
3288 IsSigned = false;
3289 else if (Name.ends_with("b") || Name.ends_with("w") ||
3290 Name.ends_with("d") || Name.ends_with("q"))
3291 IsSigned = true;
3292 else
3293 reportFatalUsageErrorWithCI("Intrinsic has unknown suffix", CI);
3294
3295 unsigned Imm;
3296 if (CI->arg_size() == 3) {
3297 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3298 } else {
3299 Name = Name.substr(9); // strip off "xop.vpcom"
3300 if (Name.starts_with("lt"))
3301 Imm = 0;
3302 else if (Name.starts_with("le"))
3303 Imm = 1;
3304 else if (Name.starts_with("gt"))
3305 Imm = 2;
3306 else if (Name.starts_with("ge"))
3307 Imm = 3;
3308 else if (Name.starts_with("eq"))
3309 Imm = 4;
3310 else if (Name.starts_with("ne"))
3311 Imm = 5;
3312 else if (Name.starts_with("false"))
3313 Imm = 6;
3314 else if (Name.starts_with("true"))
3315 Imm = 7;
3316 else
3317 llvm_unreachable("Unknown condition");
3318 }
3319
3320 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3321 } else if (Name.starts_with("xop.vpcmov")) {
3322 Value *Sel = CI->getArgOperand(2);
3323 Value *NotSel = Builder.CreateNot(Sel);
3324 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3325 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3326 Rep = Builder.CreateOr(Sel0, Sel1);
3327 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3328 Name.starts_with("avx512.mask.prol")) {
3329 Rep = upgradeX86Rotate(Builder, *CI, false);
3330 } else if (Name.starts_with("avx512.pror") ||
3331 Name.starts_with("avx512.mask.pror")) {
3332 Rep = upgradeX86Rotate(Builder, *CI, true);
3333 } else if (Name.starts_with("avx512.vpshld.") ||
3334 Name.starts_with("avx512.mask.vpshld") ||
3335 Name.starts_with("avx512.maskz.vpshld")) {
3336 bool ZeroMask = Name[11] == 'z';
3337 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3338 } else if (Name.starts_with("avx512.vpshrd.") ||
3339 Name.starts_with("avx512.mask.vpshrd") ||
3340 Name.starts_with("avx512.maskz.vpshrd")) {
3341 bool ZeroMask = Name[11] == 'z';
3342 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3343 } else if (Name == "sse42.crc32.64.8") {
3344 Value *Trunc0 =
3345 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3346 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3347 {Trunc0, CI->getArgOperand(1)});
3348 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3349 } else if (Name.starts_with("avx.vbroadcast.s") ||
3350 Name.starts_with("avx512.vbroadcast.s")) {
3351 // Replace broadcasts with a series of insertelements.
3352 auto *VecTy = cast<FixedVectorType>(CI->getType());
3353 Type *EltTy = VecTy->getElementType();
3354 unsigned EltNum = VecTy->getNumElements();
3355 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3356 Type *I32Ty = Type::getInt32Ty(C);
3357 Rep = PoisonValue::get(VecTy);
3358 for (unsigned I = 0; I < EltNum; ++I)
3359 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3360 } else if (Name.starts_with("sse41.pmovsx") ||
3361 Name.starts_with("sse41.pmovzx") ||
3362 Name.starts_with("avx2.pmovsx") ||
3363 Name.starts_with("avx2.pmovzx") ||
3364 Name.starts_with("avx512.mask.pmovsx") ||
3365 Name.starts_with("avx512.mask.pmovzx")) {
3366 auto *DstTy = cast<FixedVectorType>(CI->getType());
3367 unsigned NumDstElts = DstTy->getNumElements();
3368
3369 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3370 SmallVector<int, 8> ShuffleMask(NumDstElts);
3371 for (unsigned i = 0; i != NumDstElts; ++i)
3372 ShuffleMask[i] = i;
3373
3374 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3375
3376 bool DoSext = Name.contains("pmovsx");
3377 Rep =
3378 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3379 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3380 if (CI->arg_size() == 3)
3381 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3382 CI->getArgOperand(1));
3383 } else if (Name == "avx512.mask.pmov.qd.256" ||
3384 Name == "avx512.mask.pmov.qd.512" ||
3385 Name == "avx512.mask.pmov.wb.256" ||
3386 Name == "avx512.mask.pmov.wb.512") {
3387 Type *Ty = CI->getArgOperand(1)->getType();
3388 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3389 Rep =
3390 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3391 } else if (Name.starts_with("avx.vbroadcastf128") ||
3392 Name == "avx2.vbroadcasti128") {
3393 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3394 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3395 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3396 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3397 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3398 if (NumSrcElts == 2)
3399 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3400 else
3401 Rep = Builder.CreateShuffleVector(Load,
3402 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3403 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3404 Name.starts_with("avx512.mask.shuf.f")) {
3405 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3406 Type *VT = CI->getType();
3407 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3408 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3409 unsigned ControlBitsMask = NumLanes - 1;
3410 unsigned NumControlBits = NumLanes / 2;
3411 SmallVector<int, 8> ShuffleMask(0);
3412
3413 for (unsigned l = 0; l != NumLanes; ++l) {
3414 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3415 // We actually need the other source.
3416 if (l >= NumLanes / 2)
3417 LaneMask += NumLanes;
3418 for (unsigned i = 0; i != NumElementsInLane; ++i)
3419 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3420 }
3421 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3422 CI->getArgOperand(1), ShuffleMask);
3423 Rep =
3424 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3425 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3426 Name.starts_with("avx512.mask.broadcasti")) {
3427 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3428 ->getNumElements();
3429 unsigned NumDstElts =
3430 cast<FixedVectorType>(CI->getType())->getNumElements();
3431
3432 SmallVector<int, 8> ShuffleMask(NumDstElts);
3433 for (unsigned i = 0; i != NumDstElts; ++i)
3434 ShuffleMask[i] = i % NumSrcElts;
3435
3436 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3437 CI->getArgOperand(0), ShuffleMask);
3438 Rep =
3439 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3440 } else if (Name.starts_with("avx2.pbroadcast") ||
3441 Name.starts_with("avx2.vbroadcast") ||
3442 Name.starts_with("avx512.pbroadcast") ||
3443 Name.starts_with("avx512.mask.broadcast.s")) {
3444 // Replace vp?broadcasts with a vector shuffle.
3445 Value *Op = CI->getArgOperand(0);
3446 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3447 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3450 Rep = Builder.CreateShuffleVector(Op, M);
3451
3452 if (CI->arg_size() == 3)
3453 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3454 CI->getArgOperand(1));
3455 } else if (Name.starts_with("sse2.padds.") ||
3456 Name.starts_with("avx2.padds.") ||
3457 Name.starts_with("avx512.padds.") ||
3458 Name.starts_with("avx512.mask.padds.")) {
3459 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3460 } else if (Name.starts_with("sse2.psubs.") ||
3461 Name.starts_with("avx2.psubs.") ||
3462 Name.starts_with("avx512.psubs.") ||
3463 Name.starts_with("avx512.mask.psubs.")) {
3464 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3465 } else if (Name.starts_with("sse2.paddus.") ||
3466 Name.starts_with("avx2.paddus.") ||
3467 Name.starts_with("avx512.mask.paddus.")) {
3468 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3469 } else if (Name.starts_with("sse2.psubus.") ||
3470 Name.starts_with("avx2.psubus.") ||
3471 Name.starts_with("avx512.mask.psubus.")) {
3472 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3473 } else if (Name.starts_with("avx512.mask.palignr.")) {
3474 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3475 CI->getArgOperand(1), CI->getArgOperand(2),
3476 CI->getArgOperand(3), CI->getArgOperand(4),
3477 false);
3478 } else if (Name.starts_with("avx512.mask.valign.")) {
3480 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3481 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3482 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3483 // 128/256-bit shift left specified in bits.
3484 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3485 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3486 Shift / 8); // Shift is in bits.
3487 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3488 // 128/256-bit shift right specified in bits.
3489 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3490 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3491 Shift / 8); // Shift is in bits.
3492 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3493 Name == "avx512.psll.dq.512") {
3494 // 128/256/512-bit shift left specified in bytes.
3495 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3496 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3497 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3498 Name == "avx512.psrl.dq.512") {
3499 // 128/256/512-bit shift right specified in bytes.
3500 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3501 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3502 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3503 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3504 Name.starts_with("avx2.pblendd.")) {
3505 Value *Op0 = CI->getArgOperand(0);
3506 Value *Op1 = CI->getArgOperand(1);
3507 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3508 auto *VecTy = cast<FixedVectorType>(CI->getType());
3509 unsigned NumElts = VecTy->getNumElements();
3510
3511 SmallVector<int, 16> Idxs(NumElts);
3512 for (unsigned i = 0; i != NumElts; ++i)
3513 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3514
3515 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3516 } else if (Name.starts_with("avx.vinsertf128.") ||
3517 Name == "avx2.vinserti128" ||
3518 Name.starts_with("avx512.mask.insert")) {
3519 Value *Op0 = CI->getArgOperand(0);
3520 Value *Op1 = CI->getArgOperand(1);
3521 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3522 unsigned DstNumElts =
3523 cast<FixedVectorType>(CI->getType())->getNumElements();
3524 unsigned SrcNumElts =
3525 cast<FixedVectorType>(Op1->getType())->getNumElements();
3526 unsigned Scale = DstNumElts / SrcNumElts;
3527
3528 // Mask off the high bits of the immediate value; hardware ignores those.
3529 Imm = Imm % Scale;
3530
3531 // Extend the second operand into a vector the size of the destination.
3532 SmallVector<int, 8> Idxs(DstNumElts);
3533 for (unsigned i = 0; i != SrcNumElts; ++i)
3534 Idxs[i] = i;
3535 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3536 Idxs[i] = SrcNumElts;
3537 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3538
3539 // Insert the second operand into the first operand.
3540
3541 // Note that there is no guarantee that instruction lowering will actually
3542 // produce a vinsertf128 instruction for the created shuffles. In
3543 // particular, the 0 immediate case involves no lane changes, so it can
3544 // be handled as a blend.
3545
3546 // Example of shuffle mask for 32-bit elements:
3547 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3548 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3549
3550 // First fill with identify mask.
3551 for (unsigned i = 0; i != DstNumElts; ++i)
3552 Idxs[i] = i;
3553 // Then replace the elements where we need to insert.
3554 for (unsigned i = 0; i != SrcNumElts; ++i)
3555 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3556 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3557
3558 // If the intrinsic has a mask operand, handle that.
3559 if (CI->arg_size() == 5)
3560 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3561 CI->getArgOperand(3));
3562 } else if (Name.starts_with("avx.vextractf128.") ||
3563 Name == "avx2.vextracti128" ||
3564 Name.starts_with("avx512.mask.vextract")) {
3565 Value *Op0 = CI->getArgOperand(0);
3566 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3567 unsigned DstNumElts =
3568 cast<FixedVectorType>(CI->getType())->getNumElements();
3569 unsigned SrcNumElts =
3570 cast<FixedVectorType>(Op0->getType())->getNumElements();
3571 unsigned Scale = SrcNumElts / DstNumElts;
3572
3573 // Mask off the high bits of the immediate value; hardware ignores those.
3574 Imm = Imm % Scale;
3575
3576 // Get indexes for the subvector of the input vector.
3577 SmallVector<int, 8> Idxs(DstNumElts);
3578 for (unsigned i = 0; i != DstNumElts; ++i) {
3579 Idxs[i] = i + (Imm * DstNumElts);
3580 }
3581 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3582
3583 // If the intrinsic has a mask operand, handle that.
3584 if (CI->arg_size() == 4)
3585 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3586 CI->getArgOperand(2));
3587 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3588 Name.starts_with("avx512.mask.perm.di.")) {
3589 Value *Op0 = CI->getArgOperand(0);
3590 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3591 auto *VecTy = cast<FixedVectorType>(CI->getType());
3592 unsigned NumElts = VecTy->getNumElements();
3593
3594 SmallVector<int, 8> Idxs(NumElts);
3595 for (unsigned i = 0; i != NumElts; ++i)
3596 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3597
3598 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3599
3600 if (CI->arg_size() == 4)
3601 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3602 CI->getArgOperand(2));
3603 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3604 // The immediate permute control byte looks like this:
3605 // [1:0] - select 128 bits from sources for low half of destination
3606 // [2] - ignore
3607 // [3] - zero low half of destination
3608 // [5:4] - select 128 bits from sources for high half of destination
3609 // [6] - ignore
3610 // [7] - zero high half of destination
3611
3612 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3613
3614 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3615 unsigned HalfSize = NumElts / 2;
3616 SmallVector<int, 8> ShuffleMask(NumElts);
3617
3618 // Determine which operand(s) are actually in use for this instruction.
3619 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3620 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3621
3622 // If needed, replace operands based on zero mask.
3623 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3624 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3625
3626 // Permute low half of result.
3627 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3628 for (unsigned i = 0; i < HalfSize; ++i)
3629 ShuffleMask[i] = StartIndex + i;
3630
3631 // Permute high half of result.
3632 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3633 for (unsigned i = 0; i < HalfSize; ++i)
3634 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3635
3636 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3637
3638 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3639 Name.starts_with("avx512.mask.vpermil.p") ||
3640 Name.starts_with("avx512.mask.pshuf.d.")) {
3641 Value *Op0 = CI->getArgOperand(0);
3642 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3643 auto *VecTy = cast<FixedVectorType>(CI->getType());
3644 unsigned NumElts = VecTy->getNumElements();
3645 // Calculate the size of each index in the immediate.
3646 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3647 unsigned IdxMask = ((1 << IdxSize) - 1);
3648
3649 SmallVector<int, 8> Idxs(NumElts);
3650 // Lookup the bits for this element, wrapping around the immediate every
3651 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3652 // to offset by the first index of each group.
3653 for (unsigned i = 0; i != NumElts; ++i)
3654 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3655
3656 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3657
3658 if (CI->arg_size() == 4)
3659 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3660 CI->getArgOperand(2));
3661 } else if (Name == "sse2.pshufl.w" ||
3662 Name.starts_with("avx512.mask.pshufl.w.")) {
3663 Value *Op0 = CI->getArgOperand(0);
3664 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3665 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3666
3667 if (Name == "sse2.pshufl.w" && NumElts % 8 != 0)
3668 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3669
3670 SmallVector<int, 16> Idxs(NumElts);
3671 for (unsigned l = 0; l != NumElts; l += 8) {
3672 for (unsigned i = 0; i != 4; ++i)
3673 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3674 for (unsigned i = 4; i != 8; ++i)
3675 Idxs[i + l] = i + l;
3676 }
3677
3678 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3679
3680 if (CI->arg_size() == 4)
3681 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3682 CI->getArgOperand(2));
3683 } else if (Name == "sse2.pshufh.w" ||
3684 Name.starts_with("avx512.mask.pshufh.w.")) {
3685 Value *Op0 = CI->getArgOperand(0);
3686 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3687 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3688
3689 if (Name == "sse2.pshufh.w" && NumElts % 8 != 0)
3690 reportFatalUsageErrorWithCI("Intrinsic has invalid signature", CI);
3691
3692 SmallVector<int, 16> Idxs(NumElts);
3693 for (unsigned l = 0; l != NumElts; l += 8) {
3694 for (unsigned i = 0; i != 4; ++i)
3695 Idxs[i + l] = i + l;
3696 for (unsigned i = 0; i != 4; ++i)
3697 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3698 }
3699
3700 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3701
3702 if (CI->arg_size() == 4)
3703 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3704 CI->getArgOperand(2));
3705 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3706 Value *Op0 = CI->getArgOperand(0);
3707 Value *Op1 = CI->getArgOperand(1);
3708 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3709 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3710
3711 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3712 unsigned HalfLaneElts = NumLaneElts / 2;
3713
3714 SmallVector<int, 16> Idxs(NumElts);
3715 for (unsigned i = 0; i != NumElts; ++i) {
3716 // Base index is the starting element of the lane.
3717 Idxs[i] = i - (i % NumLaneElts);
3718 // If we are half way through the lane switch to the other source.
3719 if ((i % NumLaneElts) >= HalfLaneElts)
3720 Idxs[i] += NumElts;
3721 // Now select the specific element. By adding HalfLaneElts bits from
3722 // the immediate. Wrapping around the immediate every 8-bits.
3723 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3724 }
3725
3726 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3727
3728 Rep =
3729 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3730 } else if (Name.starts_with("avx512.mask.movddup") ||
3731 Name.starts_with("avx512.mask.movshdup") ||
3732 Name.starts_with("avx512.mask.movsldup")) {
3733 Value *Op0 = CI->getArgOperand(0);
3734 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3735 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3736
3737 unsigned Offset = 0;
3738 if (Name.starts_with("avx512.mask.movshdup."))
3739 Offset = 1;
3740
3741 SmallVector<int, 16> Idxs(NumElts);
3742 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3743 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3744 Idxs[i + l + 0] = i + l + Offset;
3745 Idxs[i + l + 1] = i + l + Offset;
3746 }
3747
3748 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3749
3750 Rep =
3751 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3752 } else if (Name.starts_with("avx512.mask.punpckl") ||
3753 Name.starts_with("avx512.mask.unpckl.")) {
3754 Value *Op0 = CI->getArgOperand(0);
3755 Value *Op1 = CI->getArgOperand(1);
3756 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3757 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3758
3759 SmallVector<int, 64> Idxs(NumElts);
3760 for (int l = 0; l != NumElts; l += NumLaneElts)
3761 for (int i = 0; i != NumLaneElts; ++i)
3762 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3763
3764 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3765
3766 Rep =
3767 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3768 } else if (Name.starts_with("avx512.mask.punpckh") ||
3769 Name.starts_with("avx512.mask.unpckh.")) {
3770 Value *Op0 = CI->getArgOperand(0);
3771 Value *Op1 = CI->getArgOperand(1);
3772 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3773 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3774
3775 SmallVector<int, 64> Idxs(NumElts);
3776 for (int l = 0; l != NumElts; l += NumLaneElts)
3777 for (int i = 0; i != NumLaneElts; ++i)
3778 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3779
3780 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3781
3782 Rep =
3783 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3784 } else if (Name.starts_with("avx512.mask.and.") ||
3785 Name.starts_with("avx512.mask.pand.")) {
3786 VectorType *FTy = cast<VectorType>(CI->getType());
3788 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3789 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3790 Rep = Builder.CreateBitCast(Rep, FTy);
3791 Rep =
3792 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3793 } else if (Name.starts_with("avx512.mask.andn.") ||
3794 Name.starts_with("avx512.mask.pandn.")) {
3795 VectorType *FTy = cast<VectorType>(CI->getType());
3797 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3798 Rep = Builder.CreateAnd(Rep,
3799 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3800 Rep = Builder.CreateBitCast(Rep, FTy);
3801 Rep =
3802 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3803 } else if (Name.starts_with("avx512.mask.or.") ||
3804 Name.starts_with("avx512.mask.por.")) {
3805 VectorType *FTy = cast<VectorType>(CI->getType());
3807 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3808 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3809 Rep = Builder.CreateBitCast(Rep, FTy);
3810 Rep =
3811 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3812 } else if (Name.starts_with("avx512.mask.xor.") ||
3813 Name.starts_with("avx512.mask.pxor.")) {
3814 VectorType *FTy = cast<VectorType>(CI->getType());
3816 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3817 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3818 Rep = Builder.CreateBitCast(Rep, FTy);
3819 Rep =
3820 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3821 } else if (Name.starts_with("avx512.mask.padd.")) {
3822 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3823 Rep =
3824 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3825 } else if (Name.starts_with("avx512.mask.psub.")) {
3826 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3827 Rep =
3828 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3829 } else if (Name.starts_with("avx512.mask.pmull.")) {
3830 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3831 Rep =
3832 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3833 } else if (Name.starts_with("avx512.mask.add.p")) {
3834 if (Name.ends_with(".512")) {
3835 Intrinsic::ID IID;
3836 if (Name[17] == 's')
3837 IID = Intrinsic::x86_avx512_add_ps_512;
3838 else
3839 IID = Intrinsic::x86_avx512_add_pd_512;
3840
3841 Rep = Builder.CreateIntrinsic(
3842 IID,
3843 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3844 } else {
3845 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3846 }
3847 Rep =
3848 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3849 } else if (Name.starts_with("avx512.mask.div.p")) {
3850 if (Name.ends_with(".512")) {
3851 Intrinsic::ID IID;
3852 if (Name[17] == 's')
3853 IID = Intrinsic::x86_avx512_div_ps_512;
3854 else
3855 IID = Intrinsic::x86_avx512_div_pd_512;
3856
3857 Rep = Builder.CreateIntrinsic(
3858 IID,
3859 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3860 } else {
3861 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3862 }
3863 Rep =
3864 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3865 } else if (Name.starts_with("avx512.mask.mul.p")) {
3866 if (Name.ends_with(".512")) {
3867 Intrinsic::ID IID;
3868 if (Name[17] == 's')
3869 IID = Intrinsic::x86_avx512_mul_ps_512;
3870 else
3871 IID = Intrinsic::x86_avx512_mul_pd_512;
3872
3873 Rep = Builder.CreateIntrinsic(
3874 IID,
3875 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3876 } else {
3877 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3878 }
3879 Rep =
3880 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3881 } else if (Name.starts_with("avx512.mask.sub.p")) {
3882 if (Name.ends_with(".512")) {
3883 Intrinsic::ID IID;
3884 if (Name[17] == 's')
3885 IID = Intrinsic::x86_avx512_sub_ps_512;
3886 else
3887 IID = Intrinsic::x86_avx512_sub_pd_512;
3888
3889 Rep = Builder.CreateIntrinsic(
3890 IID,
3891 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3892 } else {
3893 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3894 }
3895 Rep =
3896 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3897 } else if ((Name.starts_with("avx512.mask.max.p") ||
3898 Name.starts_with("avx512.mask.min.p")) &&
3899 Name.drop_front(18) == ".512") {
3900 bool IsDouble = Name[17] == 'd';
3901 bool IsMin = Name[13] == 'i';
3902 static const Intrinsic::ID MinMaxTbl[2][2] = {
3903 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3904 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3905 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3906
3907 Rep = Builder.CreateIntrinsic(
3908 IID,
3909 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3910 Rep =
3911 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3912 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3913 Rep =
3914 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3915 {CI->getArgOperand(0), Builder.getInt1(false)});
3916 Rep =
3917 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3918 } else if (Name.starts_with("avx512.mask.psll")) {
3919 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3920 bool IsVariable = Name[16] == 'v';
3921 char Size = Name[16] == '.' ? Name[17]
3922 : Name[17] == '.' ? Name[18]
3923 : Name[18] == '.' ? Name[19]
3924 : Name[20];
3925
3926 Intrinsic::ID IID;
3927 if (IsVariable && Name[17] != '.') {
3928 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3929 IID = Intrinsic::x86_avx2_psllv_q;
3930 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3931 IID = Intrinsic::x86_avx2_psllv_q_256;
3932 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3933 IID = Intrinsic::x86_avx2_psllv_d;
3934 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3935 IID = Intrinsic::x86_avx2_psllv_d_256;
3936 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3937 IID = Intrinsic::x86_avx512_psllv_w_128;
3938 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3939 IID = Intrinsic::x86_avx512_psllv_w_256;
3940 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3941 IID = Intrinsic::x86_avx512_psllv_w_512;
3942 else
3943 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3944 } else if (Name.ends_with(".128")) {
3945 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3946 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3947 : Intrinsic::x86_sse2_psll_d;
3948 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3949 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3950 : Intrinsic::x86_sse2_psll_q;
3951 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3952 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3953 : Intrinsic::x86_sse2_psll_w;
3954 else
3955 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3956 } else if (Name.ends_with(".256")) {
3957 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3958 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3959 : Intrinsic::x86_avx2_psll_d;
3960 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3961 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3962 : Intrinsic::x86_avx2_psll_q;
3963 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3964 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3965 : Intrinsic::x86_avx2_psll_w;
3966 else
3967 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3968 } else {
3969 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3970 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3971 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3972 : Intrinsic::x86_avx512_psll_d_512;
3973 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3974 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3975 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3976 : Intrinsic::x86_avx512_psll_q_512;
3977 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3978 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3979 : Intrinsic::x86_avx512_psll_w_512;
3980 else
3981 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
3982 }
3983
3984 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3985 } else if (Name.starts_with("avx512.mask.psrl")) {
3986 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3987 bool IsVariable = Name[16] == 'v';
3988 char Size = Name[16] == '.' ? Name[17]
3989 : Name[17] == '.' ? Name[18]
3990 : Name[18] == '.' ? Name[19]
3991 : Name[20];
3992
3993 Intrinsic::ID IID;
3994 if (IsVariable && Name[17] != '.') {
3995 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3996 IID = Intrinsic::x86_avx2_psrlv_q;
3997 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3998 IID = Intrinsic::x86_avx2_psrlv_q_256;
3999 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
4000 IID = Intrinsic::x86_avx2_psrlv_d;
4001 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
4002 IID = Intrinsic::x86_avx2_psrlv_d_256;
4003 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
4004 IID = Intrinsic::x86_avx512_psrlv_w_128;
4005 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
4006 IID = Intrinsic::x86_avx512_psrlv_w_256;
4007 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
4008 IID = Intrinsic::x86_avx512_psrlv_w_512;
4009 else
4010 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4011 } else if (Name.ends_with(".128")) {
4012 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
4013 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
4014 : Intrinsic::x86_sse2_psrl_d;
4015 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
4016 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
4017 : Intrinsic::x86_sse2_psrl_q;
4018 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
4019 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
4020 : Intrinsic::x86_sse2_psrl_w;
4021 else
4022 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4023 } else if (Name.ends_with(".256")) {
4024 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
4025 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
4026 : Intrinsic::x86_avx2_psrl_d;
4027 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
4028 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
4029 : Intrinsic::x86_avx2_psrl_q;
4030 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
4031 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
4032 : Intrinsic::x86_avx2_psrl_w;
4033 else
4034 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4035 } else {
4036 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
4037 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
4038 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
4039 : Intrinsic::x86_avx512_psrl_d_512;
4040 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
4041 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
4042 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
4043 : Intrinsic::x86_avx512_psrl_q_512;
4044 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
4046 : Intrinsic::x86_avx512_psrl_w_512;
4047 else
4048 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4049 }
4050
4051 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4052 } else if (Name.starts_with("avx512.mask.psra")) {
4053 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4054 bool IsVariable = Name[16] == 'v';
4055 char Size = Name[16] == '.' ? Name[17]
4056 : Name[17] == '.' ? Name[18]
4057 : Name[18] == '.' ? Name[19]
4058 : Name[20];
4059
4060 Intrinsic::ID IID;
4061 if (IsVariable && Name[17] != '.') {
4062 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4063 IID = Intrinsic::x86_avx2_psrav_d;
4064 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4065 IID = Intrinsic::x86_avx2_psrav_d_256;
4066 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4067 IID = Intrinsic::x86_avx512_psrav_w_128;
4068 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4069 IID = Intrinsic::x86_avx512_psrav_w_256;
4070 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4071 IID = Intrinsic::x86_avx512_psrav_w_512;
4072 else
4073 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4074 } else if (Name.ends_with(".128")) {
4075 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4076 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4077 : Intrinsic::x86_sse2_psra_d;
4078 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4079 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4080 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4081 : Intrinsic::x86_avx512_psra_q_128;
4082 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4083 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4084 : Intrinsic::x86_sse2_psra_w;
4085 else
4086 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4087 } else if (Name.ends_with(".256")) {
4088 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4089 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4090 : Intrinsic::x86_avx2_psra_d;
4091 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4092 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4093 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4094 : Intrinsic::x86_avx512_psra_q_256;
4095 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4096 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4097 : Intrinsic::x86_avx2_psra_w;
4098 else
4099 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4100 } else {
4101 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4102 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4103 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4104 : Intrinsic::x86_avx512_psra_d_512;
4105 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4106 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4107 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4108 : Intrinsic::x86_avx512_psra_q_512;
4109 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4110 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4111 : Intrinsic::x86_avx512_psra_w_512;
4112 else
4113 reportFatalUsageErrorWithCI("Intrinsic has unexpected size", CI);
4114 }
4115
4116 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4117 } else if (Name.starts_with("avx512.mask.move.s")) {
4118 Rep = upgradeMaskedMove(Builder, *CI);
4119 } else if (Name.starts_with("avx512.cvtmask2")) {
4120 Rep = upgradeMaskToInt(Builder, *CI);
4121 } else if (Name.ends_with(".movntdqa")) {
4123 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4124
4125 LoadInst *LI = Builder.CreateAlignedLoad(
4126 CI->getType(), CI->getArgOperand(0),
4128 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4129 Rep = LI;
4130 } else if (Name.starts_with("fma.vfmadd.") ||
4131 Name.starts_with("fma.vfmsub.") ||
4132 Name.starts_with("fma.vfnmadd.") ||
4133 Name.starts_with("fma.vfnmsub.")) {
4134 bool NegMul = Name[6] == 'n';
4135 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4136 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4137
4138 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4139 CI->getArgOperand(2)};
4140
4141 if (IsScalar) {
4142 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4143 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4144 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4145 }
4146
4147 if (NegMul && !IsScalar)
4148 Ops[0] = Builder.CreateFNeg(Ops[0]);
4149 if (NegMul && IsScalar)
4150 Ops[1] = Builder.CreateFNeg(Ops[1]);
4151 if (NegAcc)
4152 Ops[2] = Builder.CreateFNeg(Ops[2]);
4153
4154 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4155
4156 if (IsScalar)
4157 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4158 } else if (Name.starts_with("fma4.vfmadd.s")) {
4159 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4160 CI->getArgOperand(2)};
4161
4162 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4163 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4164 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4165
4166 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4167
4168 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4169 Rep, (uint64_t)0);
4170 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4171 Name.starts_with("avx512.maskz.vfmadd.s") ||
4172 Name.starts_with("avx512.mask3.vfmadd.s") ||
4173 Name.starts_with("avx512.mask3.vfmsub.s") ||
4174 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4175 bool IsMask3 = Name[11] == '3';
4176 bool IsMaskZ = Name[11] == 'z';
4177 // Drop the "avx512.mask." to make it easier.
4178 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4179 bool NegMul = Name[2] == 'n';
4180 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4181
4182 Value *A = CI->getArgOperand(0);
4183 Value *B = CI->getArgOperand(1);
4184 Value *C = CI->getArgOperand(2);
4185
4186 if (NegMul && (IsMask3 || IsMaskZ))
4187 A = Builder.CreateFNeg(A);
4188 if (NegMul && !(IsMask3 || IsMaskZ))
4189 B = Builder.CreateFNeg(B);
4190 if (NegAcc)
4191 C = Builder.CreateFNeg(C);
4192
4193 A = Builder.CreateExtractElement(A, (uint64_t)0);
4194 B = Builder.CreateExtractElement(B, (uint64_t)0);
4195 C = Builder.CreateExtractElement(C, (uint64_t)0);
4196
4197 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4198 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4199 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4200
4201 Intrinsic::ID IID;
4202 if (Name.back() == 'd')
4203 IID = Intrinsic::x86_avx512_vfmadd_f64;
4204 else
4205 IID = Intrinsic::x86_avx512_vfmadd_f32;
4206 Rep = Builder.CreateIntrinsic(IID, Ops);
4207 } else {
4208 Rep = Builder.CreateFMA(A, B, C);
4209 }
4210
4211 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4212 : IsMask3 ? C
4213 : A;
4214
4215 // For Mask3 with NegAcc, we need to create a new extractelement that
4216 // avoids the negation above.
4217 if (NegAcc && IsMask3)
4218 PassThru =
4219 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4220
4221 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4222 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4223 (uint64_t)0);
4224 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4225 Name.starts_with("avx512.mask.vfnmadd.p") ||
4226 Name.starts_with("avx512.mask.vfnmsub.p") ||
4227 Name.starts_with("avx512.mask3.vfmadd.p") ||
4228 Name.starts_with("avx512.mask3.vfmsub.p") ||
4229 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4230 Name.starts_with("avx512.maskz.vfmadd.p")) {
4231 bool IsMask3 = Name[11] == '3';
4232 bool IsMaskZ = Name[11] == 'z';
4233 // Drop the "avx512.mask." to make it easier.
4234 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4235 bool NegMul = Name[2] == 'n';
4236 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4237
4238 Value *A = CI->getArgOperand(0);
4239 Value *B = CI->getArgOperand(1);
4240 Value *C = CI->getArgOperand(2);
4241
4242 if (NegMul && (IsMask3 || IsMaskZ))
4243 A = Builder.CreateFNeg(A);
4244 if (NegMul && !(IsMask3 || IsMaskZ))
4245 B = Builder.CreateFNeg(B);
4246 if (NegAcc)
4247 C = Builder.CreateFNeg(C);
4248
4249 if (CI->arg_size() == 5 &&
4250 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4251 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4252 Intrinsic::ID IID;
4253 // Check the character before ".512" in string.
4254 if (Name[Name.size() - 5] == 's')
4255 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4256 else
4257 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4258
4259 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4260 } else {
4261 Rep = Builder.CreateFMA(A, B, C);
4262 }
4263
4264 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4265 : IsMask3 ? CI->getArgOperand(2)
4266 : CI->getArgOperand(0);
4267
4268 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4269 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4270 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4271 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4272 Intrinsic::ID IID;
4273 if (VecWidth == 128 && EltWidth == 32)
4274 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4275 else if (VecWidth == 256 && EltWidth == 32)
4276 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4277 else if (VecWidth == 128 && EltWidth == 64)
4278 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4279 else if (VecWidth == 256 && EltWidth == 64)
4280 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4281 else
4282 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4283
4284 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4285 CI->getArgOperand(2)};
4286 Ops[2] = Builder.CreateFNeg(Ops[2]);
4287 Rep = Builder.CreateIntrinsic(IID, Ops);
4288 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4289 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4290 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4291 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4292 bool IsMask3 = Name[11] == '3';
4293 bool IsMaskZ = Name[11] == 'z';
4294 // Drop the "avx512.mask." to make it easier.
4295 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4296 bool IsSubAdd = Name[3] == 's';
4297 if (CI->arg_size() == 5) {
4298 Intrinsic::ID IID;
4299 // Check the character before ".512" in string.
4300 if (Name[Name.size() - 5] == 's')
4301 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4302 else
4303 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4304
4305 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4306 CI->getArgOperand(2), CI->getArgOperand(4)};
4307 if (IsSubAdd)
4308 Ops[2] = Builder.CreateFNeg(Ops[2]);
4309
4310 Rep = Builder.CreateIntrinsic(IID, Ops);
4311 } else {
4312 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4313
4314 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4315 CI->getArgOperand(2)};
4316
4318 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4319 Value *Odd = Builder.CreateCall(FMA, Ops);
4320 Ops[2] = Builder.CreateFNeg(Ops[2]);
4321 Value *Even = Builder.CreateCall(FMA, Ops);
4322
4323 if (IsSubAdd)
4324 std::swap(Even, Odd);
4325
4326 SmallVector<int, 32> Idxs(NumElts);
4327 for (int i = 0; i != NumElts; ++i)
4328 Idxs[i] = i + (i % 2) * NumElts;
4329
4330 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4331 }
4332
4333 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4334 : IsMask3 ? CI->getArgOperand(2)
4335 : CI->getArgOperand(0);
4336
4337 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4338 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4339 Name.starts_with("avx512.maskz.pternlog.")) {
4340 bool ZeroMask = Name[11] == 'z';
4341 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4342 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4343 Intrinsic::ID IID;
4344 if (VecWidth == 128 && EltWidth == 32)
4345 IID = Intrinsic::x86_avx512_pternlog_d_128;
4346 else if (VecWidth == 256 && EltWidth == 32)
4347 IID = Intrinsic::x86_avx512_pternlog_d_256;
4348 else if (VecWidth == 512 && EltWidth == 32)
4349 IID = Intrinsic::x86_avx512_pternlog_d_512;
4350 else if (VecWidth == 128 && EltWidth == 64)
4351 IID = Intrinsic::x86_avx512_pternlog_q_128;
4352 else if (VecWidth == 256 && EltWidth == 64)
4353 IID = Intrinsic::x86_avx512_pternlog_q_256;
4354 else if (VecWidth == 512 && EltWidth == 64)
4355 IID = Intrinsic::x86_avx512_pternlog_q_512;
4356 else
4357 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4358
4359 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4360 CI->getArgOperand(2), CI->getArgOperand(3)};
4361 Rep = Builder.CreateIntrinsic(IID, Args);
4362 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4363 : CI->getArgOperand(0);
4364 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4365 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4366 Name.starts_with("avx512.maskz.vpmadd52")) {
4367 bool ZeroMask = Name[11] == 'z';
4368 bool High = Name[20] == 'h' || Name[21] == 'h';
4369 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4370 Intrinsic::ID IID;
4371 if (VecWidth == 128 && !High)
4372 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4373 else if (VecWidth == 256 && !High)
4374 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4375 else if (VecWidth == 512 && !High)
4376 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4377 else if (VecWidth == 128 && High)
4378 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4379 else if (VecWidth == 256 && High)
4380 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4381 else if (VecWidth == 512 && High)
4382 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4383 else
4384 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4385
4386 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4387 CI->getArgOperand(2)};
4388 Rep = Builder.CreateIntrinsic(IID, Args);
4389 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4390 : CI->getArgOperand(0);
4391 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4392 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4393 Name.starts_with("avx512.mask.vpermt2var.") ||
4394 Name.starts_with("avx512.maskz.vpermt2var.")) {
4395 bool ZeroMask = Name[11] == 'z';
4396 bool IndexForm = Name[17] == 'i';
4397 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4398 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4399 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4400 Name.starts_with("avx512.mask.vpdpbusds.") ||
4401 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4402 bool ZeroMask = Name[11] == 'z';
4403 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4404 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4405 Intrinsic::ID IID;
4406 if (VecWidth == 128 && !IsSaturating)
4407 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4408 else if (VecWidth == 256 && !IsSaturating)
4409 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4410 else if (VecWidth == 512 && !IsSaturating)
4411 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4412 else if (VecWidth == 128 && IsSaturating)
4413 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4414 else if (VecWidth == 256 && IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4416 else if (VecWidth == 512 && IsSaturating)
4417 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4418 else
4419 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4420
4421 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4422 CI->getArgOperand(2)};
4423
4424 // Input arguments types were incorrectly set to vectors of i32 before but
4425 // they should be vectors of i8. Insert bit cast when encountering the old
4426 // types
4427 if (Args[1]->getType()->isVectorTy() &&
4428 cast<VectorType>(Args[1]->getType())
4429 ->getElementType()
4430 ->isIntegerTy(32) &&
4431 Args[2]->getType()->isVectorTy() &&
4432 cast<VectorType>(Args[2]->getType())
4433 ->getElementType()
4434 ->isIntegerTy(32)) {
4435 Type *NewArgType = nullptr;
4436 if (VecWidth == 128)
4437 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4438 else if (VecWidth == 256)
4439 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4440 else if (VecWidth == 512)
4441 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4442 else
4443 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4444 CI);
4445
4446 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4447 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4448 }
4449
4450 Rep = Builder.CreateIntrinsic(IID, Args);
4451 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4452 : CI->getArgOperand(0);
4453 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4454 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4455 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4456 Name.starts_with("avx512.mask.vpdpwssds.") ||
4457 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4458 bool ZeroMask = Name[11] == 'z';
4459 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4460 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4461 Intrinsic::ID IID;
4462 if (VecWidth == 128 && !IsSaturating)
4463 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4464 else if (VecWidth == 256 && !IsSaturating)
4465 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4466 else if (VecWidth == 512 && !IsSaturating)
4467 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4468 else if (VecWidth == 128 && IsSaturating)
4469 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4470 else if (VecWidth == 256 && IsSaturating)
4471 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4472 else if (VecWidth == 512 && IsSaturating)
4473 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4474 else
4475 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4476
4477 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4478 CI->getArgOperand(2)};
4479
4480 // Input arguments types were incorrectly set to vectors of i32 before but
4481 // they should be vectors of i16. Insert bit cast when encountering the old
4482 // types
4483 if (Args[1]->getType()->isVectorTy() &&
4484 cast<VectorType>(Args[1]->getType())
4485 ->getElementType()
4486 ->isIntegerTy(32) &&
4487 Args[2]->getType()->isVectorTy() &&
4488 cast<VectorType>(Args[2]->getType())
4489 ->getElementType()
4490 ->isIntegerTy(32)) {
4491 Type *NewArgType = nullptr;
4492 if (VecWidth == 128)
4493 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4494 else if (VecWidth == 256)
4495 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4496 else if (VecWidth == 512)
4497 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4498 else
4499 reportFatalUsageErrorWithCI("Intrinsic has unexpected vector bit width",
4500 CI);
4501
4502 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4503 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4504 }
4505
4506 Rep = Builder.CreateIntrinsic(IID, Args);
4507 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4508 : CI->getArgOperand(0);
4509 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4510 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4511 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4512 Name == "subborrow.u32" || Name == "subborrow.u64") {
4513 Intrinsic::ID IID;
4514 if (Name[0] == 'a' && Name.back() == '2')
4515 IID = Intrinsic::x86_addcarry_32;
4516 else if (Name[0] == 'a' && Name.back() == '4')
4517 IID = Intrinsic::x86_addcarry_64;
4518 else if (Name[0] == 's' && Name.back() == '2')
4519 IID = Intrinsic::x86_subborrow_32;
4520 else if (Name[0] == 's' && Name.back() == '4')
4521 IID = Intrinsic::x86_subborrow_64;
4522 else
4523 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4524
4525 // Make a call with 3 operands.
4526 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4527 CI->getArgOperand(2)};
4528 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4529
4530 // Extract the second result and store it.
4531 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4532 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4533 // Replace the original call result with the first result of the new call.
4534 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4535
4536 CI->replaceAllUsesWith(CF);
4537 Rep = nullptr;
4538 } else if (Name.starts_with("avx512.mask.") &&
4539 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4540 // Rep will be updated by the call in the condition.
4541 } else
4542 reportFatalUsageErrorWithCI("Unexpected intrinsic", CI);
4543
4544 return Rep;
4545}
4546
4548 Function *F, IRBuilder<> &Builder) {
4549 if (Name.starts_with("neon.bfcvt")) {
4550 if (Name.starts_with("neon.bfcvtn2")) {
4551 SmallVector<int, 32> LoMask(4);
4552 std::iota(LoMask.begin(), LoMask.end(), 0);
4553 SmallVector<int, 32> ConcatMask(8);
4554 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4555 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4556 Value *Trunc =
4557 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4558 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4559 } else if (Name.starts_with("neon.bfcvtn")) {
4560 SmallVector<int, 32> ConcatMask(8);
4561 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4562 Type *V4BF16 =
4563 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4564 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4565 dbgs() << "Trunc: " << *Trunc << "\n";
4566 return Builder.CreateShuffleVector(
4567 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4568 } else {
4569 return Builder.CreateFPTrunc(CI->getOperand(0),
4570 Type::getBFloatTy(F->getContext()));
4571 }
4572 } else if (Name.starts_with("sve.fcvt")) {
4573 Intrinsic::ID NewID =
4575 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4576 .Case("sve.fcvtnt.bf16f32",
4577 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4579 if (NewID == Intrinsic::not_intrinsic)
4580 llvm_unreachable("Unhandled Intrinsic!");
4581
4582 SmallVector<Value *, 3> Args(CI->args());
4583
4584 // The original intrinsics incorrectly used a predicate based on the
4585 // smallest element type rather than the largest.
4586 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4587 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4588
4589 if (Args[1]->getType() != BadPredTy)
4590 llvm_unreachable("Unexpected predicate type!");
4591
4592 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4593 BadPredTy, Args[1]);
4594 Args[1] = Builder.CreateIntrinsic(
4595 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4596
4597 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4598 CI->getName());
4599 }
4600
4601 llvm_unreachable("Unhandled Intrinsic!");
4602}
4603
4605 IRBuilder<> &Builder) {
4606 if (Name == "mve.vctp64.old") {
4607 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4608 // correct type.
4609 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4610 CI->getArgOperand(0),
4611 /*FMFSource=*/nullptr, CI->getName());
4612 Value *C1 = Builder.CreateIntrinsic(
4613 Intrinsic::arm_mve_pred_v2i,
4614 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4615 return Builder.CreateIntrinsic(
4616 Intrinsic::arm_mve_pred_i2v,
4617 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4618 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4619 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4620 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4621 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4622 Name ==
4623 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4624 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4625 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4626 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4627 Name ==
4628 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4629 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4630 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4631 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4632 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4633 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4634 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4635 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4636 std::vector<Type *> Tys;
4637 unsigned ID = CI->getIntrinsicID();
4638 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4639 switch (ID) {
4640 case Intrinsic::arm_mve_mull_int_predicated:
4641 case Intrinsic::arm_mve_vqdmull_predicated:
4642 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4643 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4644 break;
4645 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4646 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4647 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4648 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4649 V2I1Ty};
4650 break;
4651 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4652 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4653 CI->getOperand(1)->getType(), V2I1Ty};
4654 break;
4655 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4656 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4657 CI->getOperand(2)->getType(), V2I1Ty};
4658 break;
4659 case Intrinsic::arm_cde_vcx1q_predicated:
4660 case Intrinsic::arm_cde_vcx1qa_predicated:
4661 case Intrinsic::arm_cde_vcx2q_predicated:
4662 case Intrinsic::arm_cde_vcx2qa_predicated:
4663 case Intrinsic::arm_cde_vcx3q_predicated:
4664 case Intrinsic::arm_cde_vcx3qa_predicated:
4665 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4666 break;
4667 default:
4668 llvm_unreachable("Unhandled Intrinsic!");
4669 }
4670
4671 std::vector<Value *> Ops;
4672 for (Value *Op : CI->args()) {
4673 Type *Ty = Op->getType();
4674 if (Ty->getScalarSizeInBits() == 1) {
4675 Value *C1 = Builder.CreateIntrinsic(
4676 Intrinsic::arm_mve_pred_v2i,
4677 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4678 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4679 }
4680 Ops.push_back(Op);
4681 }
4682
4683 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4684 CI->getName());
4685 }
4686 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4687}
4688
4689// These are expected to have the arguments:
4690// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4691//
4692// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4693//
4695 Function *F, IRBuilder<> &Builder) {
4696 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4697 // for compatibility.
4698 auto UpgradeLegacyWMMAIUIntrinsicCall =
4699 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4700 ArrayRef<Type *> OverloadTys) -> Value * {
4701 // Prepare arguments, append clamp=0 for compatibility
4702 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4703 Args.push_back(Builder.getFalse());
4704
4705 // Insert the declaration for the right overload types
4707 F->getParent(), F->getIntrinsicID(), OverloadTys);
4708
4709 // Copy operand bundles if any
4711 CI->getOperandBundlesAsDefs(Bundles);
4712
4713 // Create the new call and copy calling properties
4714 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4715 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4716 NewCall->setCallingConv(CI->getCallingConv());
4717 NewCall->setAttributes(CI->getAttributes());
4718 NewCall->setDebugLoc(CI->getDebugLoc());
4719 NewCall->copyMetadata(*CI);
4720 return NewCall;
4721 };
4722
4723 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4724 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4725 "intrinsic should have 7 arguments");
4726 Type *T1 = CI->getArgOperand(4)->getType();
4727 Type *T2 = CI->getArgOperand(1)->getType();
4728 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4729 }
4730 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4731 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4732 "intrinsic should have 8 arguments");
4733 Type *T1 = CI->getArgOperand(4)->getType();
4734 Type *T2 = CI->getArgOperand(1)->getType();
4735 Type *T3 = CI->getArgOperand(3)->getType();
4736 Type *T4 = CI->getArgOperand(5)->getType();
4737 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4738 }
4739
4740 switch (F->getIntrinsicID()) {
4741 default:
4742 break;
4743 case Intrinsic::amdgcn_wmma_f32_16x16x4_f32:
4744 case Intrinsic::amdgcn_wmma_f32_16x16x32_bf16:
4745 case Intrinsic::amdgcn_wmma_f32_16x16x32_f16:
4746 case Intrinsic::amdgcn_wmma_f16_16x16x32_f16:
4747 case Intrinsic::amdgcn_wmma_bf16_16x16x32_bf16:
4748 case Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16: {
4749 // Drop src0 and src1 modifiers.
4750 const Value *Op0 = CI->getArgOperand(0);
4751 const Value *Op2 = CI->getArgOperand(2);
4752 assert(Op0->getType()->isIntegerTy() && Op2->getType()->isIntegerTy());
4753 const ConstantInt *ModA = dyn_cast<ConstantInt>(Op0);
4754 const ConstantInt *ModB = dyn_cast<ConstantInt>(Op2);
4755 if (!ModA->isZero() || !ModB->isZero())
4756 reportFatalUsageError(Name + " matrix A and B modifiers shall be zero");
4757
4759 for (int I = 4, E = CI->arg_size(); I < E; ++I)
4760 Args.push_back(CI->getArgOperand(I));
4761
4762 SmallVector<Type *, 3> Overloads{F->getReturnType(), Args[0]->getType()};
4763 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_bf16f32_16x16x32_bf16)
4764 Overloads.push_back(Args[3]->getType());
4766 F->getParent(), F->getIntrinsicID(), Overloads);
4767
4769 CI->getOperandBundlesAsDefs(Bundles);
4770
4771 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4772 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4773 NewCall->setCallingConv(CI->getCallingConv());
4774 NewCall->setAttributes(CI->getAttributes());
4775 NewCall->setDebugLoc(CI->getDebugLoc());
4776 NewCall->copyMetadata(*CI);
4777 NewCall->takeName(CI);
4778 return NewCall;
4779 }
4780 }
4781
4782 AtomicRMWInst::BinOp RMWOp =
4784 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4785 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4786 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4787 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4788 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4789 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4790 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4791 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4792 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4793 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4794 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4795 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4796 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4797
4798 unsigned NumOperands = CI->getNumOperands();
4799 if (NumOperands < 3) // Malformed bitcode.
4800 return nullptr;
4801
4802 Value *Ptr = CI->getArgOperand(0);
4803 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4804 if (!PtrTy) // Malformed.
4805 return nullptr;
4806
4807 Value *Val = CI->getArgOperand(1);
4808 if (Val->getType() != CI->getType()) // Malformed.
4809 return nullptr;
4810
4811 ConstantInt *OrderArg = nullptr;
4812 bool IsVolatile = false;
4813
4814 // These should have 5 arguments (plus the callee). A separate version of the
4815 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4816 if (NumOperands > 3)
4817 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4818
4819 // Ignore scope argument at 3
4820
4821 if (NumOperands > 5) {
4822 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4823 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4824 }
4825
4827 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4828 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4831
4832 LLVMContext &Ctx = F->getContext();
4833
4834 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4835 Type *RetTy = CI->getType();
4836 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4837 if (VT->getElementType()->isIntegerTy(16)) {
4838 VectorType *AsBF16 =
4839 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4840 Val = Builder.CreateBitCast(Val, AsBF16);
4841 }
4842 }
4843
4844 // The scope argument never really worked correctly. Use agent as the most
4845 // conservative option which should still always produce the instruction.
4846 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4847 AtomicRMWInst *RMW =
4848 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4849
4850 unsigned AddrSpace = PtrTy->getAddressSpace();
4851 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4852 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4853 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4854 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4855 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4856 }
4857
4858 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4859 MDBuilder MDB(F->getContext());
4860 MDNode *RangeNotPrivate =
4863 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4864 }
4865
4866 if (IsVolatile)
4867 RMW->setVolatile(true);
4868
4869 return Builder.CreateBitCast(RMW, RetTy);
4870}
4871
4872/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4873/// plain MDNode, as it's the verifier's job to check these are the correct
4874/// types later.
4875static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4876 if (Op < CI->arg_size()) {
4877 if (MetadataAsValue *MAV =
4879 Metadata *MD = MAV->getMetadata();
4880 return dyn_cast_if_present<MDNode>(MD);
4881 }
4882 }
4883 return nullptr;
4884}
4885
4886/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4887static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4888 if (Op < CI->arg_size())
4890 return MAV->getMetadata();
4891 return nullptr;
4892}
4893
4895 // The MDNode attached to this instruction might not be the correct type,
4896 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4897 return I->getDebugLoc().getAsMDNode();
4898}
4899
4900/// Convert debug intrinsic calls to non-instruction debug records.
4901/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4902/// \p CI - The debug intrinsic call.
4904 DbgRecord *DR = nullptr;
4905 if (Name == "label") {
4907 CI->getDebugLoc());
4908 } else if (Name == "assign") {
4911 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4912 unwrapMAVMetadataOp(CI, 4),
4913 /*The address is a Value ref, it will be stored as a Metadata */
4914 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4915 } else if (Name == "declare") {
4918 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4919 getDebugLocSafe(CI));
4920 } else if (Name == "addr") {
4921 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4922 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4923 // Don't try to add something to the expression if it's not an expression.
4924 // Instead, allow the verifier to fail later.
4925 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4926 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4927 }
4930 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4931 getDebugLocSafe(CI));
4932 } else if (Name == "value") {
4933 // An old version of dbg.value had an extra offset argument.
4934 unsigned VarOp = 1;
4935 unsigned ExprOp = 2;
4936 if (CI->arg_size() == 4) {
4938 // Nonzero offset dbg.values get dropped without a replacement.
4939 if (!Offset || !Offset->isNullValue())
4940 return;
4941 VarOp = 2;
4942 ExprOp = 3;
4943 }
4946 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4947 nullptr, getDebugLocSafe(CI));
4948 }
4949 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4950 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4951}
4952
4955 if (!Offset)
4956 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4957 int64_t OffsetVal = Offset->getSExtValue();
4958 return Builder.CreateIntrinsic(OffsetVal >= 0
4959 ? Intrinsic::vector_splice_left
4960 : Intrinsic::vector_splice_right,
4961 CI->getType(),
4962 {CI->getArgOperand(0), CI->getArgOperand(1),
4963 Builder.getInt32(std::abs(OffsetVal))});
4964}
4965
4967 Function *F, IRBuilder<> &Builder) {
4968 if (Name.starts_with("to.fp16")) {
4969 Value *Cast =
4970 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4971 return Builder.CreateBitCast(Cast, CI->getType());
4972 }
4973
4974 if (Name.starts_with("from.fp16")) {
4975 Value *Cast =
4976 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4977 return Builder.CreateFPExt(Cast, CI->getType());
4978 }
4979
4980 return nullptr;
4981}
4982
4983/// Upgrade a call to an old intrinsic. All argument and return casting must be
4984/// provided to seamlessly integrate with existing context.
4986 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4987 // checks the callee's function type matches. It's likely we need to handle
4988 // type changes here.
4990 if (!F)
4991 return;
4992
4993 LLVMContext &C = CI->getContext();
4994 IRBuilder<> Builder(C);
4995 if (isa<FPMathOperator>(CI))
4996 Builder.setFastMathFlags(CI->getFastMathFlags());
4997 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4998
4999 if (!NewFn) {
5000 // Get the Function's name.
5001 StringRef Name = F->getName();
5002 if (!Name.consume_front("llvm."))
5003 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
5004
5005 bool IsX86 = Name.consume_front("x86.");
5006 bool IsNVVM = Name.consume_front("nvvm.");
5007 bool IsAArch64 = Name.consume_front("aarch64.");
5008 bool IsARM = Name.consume_front("arm.");
5009 bool IsAMDGCN = Name.consume_front("amdgcn.");
5010 bool IsDbg = Name.consume_front("dbg.");
5011 bool IsOldSplice =
5012 (Name.consume_front("experimental.vector.splice") ||
5013 Name.consume_front("vector.splice")) &&
5014 !(Name.starts_with(".left") || Name.starts_with(".right"));
5015 Value *Rep = nullptr;
5016
5017 if (!IsX86 && Name == "stackprotectorcheck") {
5018 Rep = nullptr;
5019 } else if (IsNVVM) {
5020 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
5021 } else if (IsX86) {
5022 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
5023 } else if (IsAArch64) {
5024 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
5025 } else if (IsARM) {
5026 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
5027 } else if (IsAMDGCN) {
5028 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
5029 } else if (IsDbg) {
5031 } else if (IsOldSplice) {
5032 Rep = upgradeVectorSplice(CI, Builder);
5033 } else if (Name.consume_front("convert.")) {
5034 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
5035 } else {
5036 llvm_unreachable("Unknown function for CallBase upgrade.");
5037 }
5038
5039 if (Rep)
5040 CI->replaceAllUsesWith(Rep);
5041 CI->eraseFromParent();
5042 return;
5043 }
5044
5045 const auto &DefaultCase = [&]() -> void {
5046 if (F == NewFn)
5047 return;
5048
5049 if (CI->getFunctionType() == NewFn->getFunctionType()) {
5050 // Handle generic mangling change.
5051 assert(
5052 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
5053 "Unknown function for CallBase upgrade and isn't just a name change");
5054 CI->setCalledFunction(NewFn);
5055 return;
5056 }
5057
5058 // This must be an upgrade from a named to a literal struct.
5059 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
5060 assert(OldST != NewFn->getReturnType() &&
5061 "Return type must have changed");
5062 assert(OldST->getNumElements() ==
5063 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
5064 "Must have same number of elements");
5065
5066 SmallVector<Value *> Args(CI->args());
5067 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
5068 NewCI->setAttributes(CI->getAttributes());
5069 Value *Res = PoisonValue::get(OldST);
5070 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
5071 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
5072 Res = Builder.CreateInsertValue(Res, Elem, Idx);
5073 }
5074 CI->replaceAllUsesWith(Res);
5075 CI->eraseFromParent();
5076 return;
5077 }
5078
5079 // We're probably about to produce something invalid. Let the verifier catch
5080 // it instead of dying here.
5081 CI->setCalledOperand(
5083 return;
5084 };
5085 CallInst *NewCall = nullptr;
5086 switch (NewFn->getIntrinsicID()) {
5087 default: {
5088 DefaultCase();
5089 return;
5090 }
5091 case Intrinsic::arm_neon_vst1:
5092 case Intrinsic::arm_neon_vst2:
5093 case Intrinsic::arm_neon_vst3:
5094 case Intrinsic::arm_neon_vst4:
5095 case Intrinsic::arm_neon_vst2lane:
5096 case Intrinsic::arm_neon_vst3lane:
5097 case Intrinsic::arm_neon_vst4lane: {
5098 SmallVector<Value *, 4> Args(CI->args());
5099 NewCall = Builder.CreateCall(NewFn, Args);
5100 break;
5101 }
5102 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5103 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5104 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5105 LLVMContext &Ctx = F->getParent()->getContext();
5106 SmallVector<Value *, 4> Args(CI->args());
5107 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5108 cast<ConstantInt>(Args[3])->getZExtValue());
5109 NewCall = Builder.CreateCall(NewFn, Args);
5110 break;
5111 }
5112 case Intrinsic::aarch64_sve_ld3_sret:
5113 case Intrinsic::aarch64_sve_ld4_sret:
5114 case Intrinsic::aarch64_sve_ld2_sret: {
5115 // Is this a trivial remangle of the name to support ptr address spaces?
5116 if (isa<StructType>(F->getReturnType())) {
5117 DefaultCase();
5118 return;
5119 }
5120
5121 StringRef Name = F->getName();
5122 Name = Name.substr(5);
5123 unsigned N = StringSwitch<unsigned>(Name)
5124 .StartsWith("aarch64.sve.ld2", 2)
5125 .StartsWith("aarch64.sve.ld3", 3)
5126 .StartsWith("aarch64.sve.ld4", 4)
5127 .Default(0);
5128 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5129 unsigned MinElts = RetTy->getMinNumElements() / N;
5130 SmallVector<Value *, 2> Args(CI->args());
5131 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5132 Value *Ret = llvm::PoisonValue::get(RetTy);
5133 for (unsigned I = 0; I < N; I++) {
5134 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5135 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5136 }
5137 NewCall = dyn_cast<CallInst>(Ret);
5138 break;
5139 }
5140
5141 case Intrinsic::coro_end: {
5142 SmallVector<Value *, 3> Args(CI->args());
5143 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5144 NewCall = Builder.CreateCall(NewFn, Args);
5145 break;
5146 }
5147
5148 case Intrinsic::vector_extract: {
5149 StringRef Name = F->getName();
5150 Name = Name.substr(5); // Strip llvm
5151 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5152 DefaultCase();
5153 return;
5154 }
5155 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5156 unsigned MinElts = RetTy->getMinNumElements();
5157 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5158 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5159 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5160 break;
5161 }
5162
5163 case Intrinsic::vector_insert: {
5164 StringRef Name = F->getName();
5165 Name = Name.substr(5);
5166 if (!Name.starts_with("aarch64.sve.tuple")) {
5167 DefaultCase();
5168 return;
5169 }
5170 if (Name.starts_with("aarch64.sve.tuple.set")) {
5171 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5172 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5173 Value *NewIdx =
5174 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5175 NewCall = Builder.CreateCall(
5176 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5177 break;
5178 }
5179 if (Name.starts_with("aarch64.sve.tuple.create")) {
5180 unsigned N = StringSwitch<unsigned>(Name)
5181 .StartsWith("aarch64.sve.tuple.create2", 2)
5182 .StartsWith("aarch64.sve.tuple.create3", 3)
5183 .StartsWith("aarch64.sve.tuple.create4", 4)
5184 .Default(0);
5185 assert(N > 1 && "Create is expected to be between 2-4");
5186 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5187 Value *Ret = llvm::PoisonValue::get(RetTy);
5188 unsigned MinElts = RetTy->getMinNumElements() / N;
5189 for (unsigned I = 0; I < N; I++) {
5190 Value *V = CI->getArgOperand(I);
5191 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5192 }
5193 NewCall = dyn_cast<CallInst>(Ret);
5194 }
5195 break;
5196 }
5197
5198 case Intrinsic::arm_neon_bfdot:
5199 case Intrinsic::arm_neon_bfmmla:
5200 case Intrinsic::arm_neon_bfmlalb:
5201 case Intrinsic::arm_neon_bfmlalt:
5202 case Intrinsic::aarch64_neon_bfdot:
5203 case Intrinsic::aarch64_neon_bfmmla:
5204 case Intrinsic::aarch64_neon_bfmlalb:
5205 case Intrinsic::aarch64_neon_bfmlalt: {
5207 assert(CI->arg_size() == 3 &&
5208 "Mismatch between function args and call args");
5209 size_t OperandWidth =
5211 assert((OperandWidth == 64 || OperandWidth == 128) &&
5212 "Unexpected operand width");
5213 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5214 auto Iter = CI->args().begin();
5215 Args.push_back(*Iter++);
5216 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5217 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5218 NewCall = Builder.CreateCall(NewFn, Args);
5219 break;
5220 }
5221
5222 case Intrinsic::bitreverse:
5223 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5224 break;
5225
5226 case Intrinsic::ctlz:
5227 case Intrinsic::cttz: {
5228 if (CI->arg_size() != 1) {
5229 DefaultCase();
5230 return;
5231 }
5232
5233 NewCall =
5234 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5235 break;
5236 }
5237
5238 case Intrinsic::objectsize: {
5239 Value *NullIsUnknownSize =
5240 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5241 Value *Dynamic =
5242 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5243 NewCall = Builder.CreateCall(
5244 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5245 break;
5246 }
5247
5248 case Intrinsic::ctpop:
5249 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5250 break;
5251 case Intrinsic::dbg_value: {
5252 StringRef Name = F->getName();
5253 Name = Name.substr(5); // Strip llvm.
5254 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5255 if (Name.starts_with("dbg.addr")) {
5257 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5258 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5259 NewCall =
5260 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5261 MetadataAsValue::get(C, Expr)});
5262 break;
5263 }
5264
5265 // Upgrade from the old version that had an extra offset argument.
5266 assert(CI->arg_size() == 4);
5267 // Drop nonzero offsets instead of attempting to upgrade them.
5269 if (Offset->isNullValue()) {
5270 NewCall = Builder.CreateCall(
5271 NewFn,
5272 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5273 break;
5274 }
5275 CI->eraseFromParent();
5276 return;
5277 }
5278
5279 case Intrinsic::ptr_annotation:
5280 // Upgrade from versions that lacked the annotation attribute argument.
5281 if (CI->arg_size() != 4) {
5282 DefaultCase();
5283 return;
5284 }
5285
5286 // Create a new call with an added null annotation attribute argument.
5287 NewCall = Builder.CreateCall(
5288 NewFn,
5289 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5290 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5291 NewCall->takeName(CI);
5292 CI->replaceAllUsesWith(NewCall);
5293 CI->eraseFromParent();
5294 return;
5295
5296 case Intrinsic::var_annotation:
5297 // Upgrade from versions that lacked the annotation attribute argument.
5298 if (CI->arg_size() != 4) {
5299 DefaultCase();
5300 return;
5301 }
5302 // Create a new call with an added null annotation attribute argument.
5303 NewCall = Builder.CreateCall(
5304 NewFn,
5305 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5306 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5307 NewCall->takeName(CI);
5308 CI->replaceAllUsesWith(NewCall);
5309 CI->eraseFromParent();
5310 return;
5311
5312 case Intrinsic::riscv_aes32dsi:
5313 case Intrinsic::riscv_aes32dsmi:
5314 case Intrinsic::riscv_aes32esi:
5315 case Intrinsic::riscv_aes32esmi:
5316 case Intrinsic::riscv_sm4ks:
5317 case Intrinsic::riscv_sm4ed: {
5318 // The last argument to these intrinsics used to be i8 and changed to i32.
5319 // The type overload for sm4ks and sm4ed was removed.
5320 Value *Arg2 = CI->getArgOperand(2);
5321 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5322 return;
5323
5324 Value *Arg0 = CI->getArgOperand(0);
5325 Value *Arg1 = CI->getArgOperand(1);
5326 if (CI->getType()->isIntegerTy(64)) {
5327 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5328 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5329 }
5330
5331 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5332 cast<ConstantInt>(Arg2)->getZExtValue());
5333
5334 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5335 Value *Res = NewCall;
5336 if (Res->getType() != CI->getType())
5337 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5338 NewCall->takeName(CI);
5339 CI->replaceAllUsesWith(Res);
5340 CI->eraseFromParent();
5341 return;
5342 }
5343 case Intrinsic::nvvm_mapa_shared_cluster: {
5344 // Create a new call with the correct address space.
5345 NewCall =
5346 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5347 Value *Res = NewCall;
5348 Res = Builder.CreateAddrSpaceCast(
5349 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5350 NewCall->takeName(CI);
5351 CI->replaceAllUsesWith(Res);
5352 CI->eraseFromParent();
5353 return;
5354 }
5355 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5356 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5357 // Create a new call with the correct address space.
5358 SmallVector<Value *, 4> Args(CI->args());
5359 Args[0] = Builder.CreateAddrSpaceCast(
5360 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5361
5362 NewCall = Builder.CreateCall(NewFn, Args);
5363 NewCall->takeName(CI);
5364 CI->replaceAllUsesWith(NewCall);
5365 CI->eraseFromParent();
5366 return;
5367 }
5368 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5369 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5370 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5371 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5372 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5373 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5374 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5375 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5376 SmallVector<Value *, 16> Args(CI->args());
5377
5378 // Create AddrSpaceCast to shared_cluster if needed.
5379 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5380 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5382 Args[0] = Builder.CreateAddrSpaceCast(
5383 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5384
5385 // Attach the flag argument for cta_group, with a
5386 // default value of 0. This handles case (2) in
5387 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5388 size_t NumArgs = CI->arg_size();
5389 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5390 if (!FlagArg->getType()->isIntegerTy(1))
5391 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5392
5393 NewCall = Builder.CreateCall(NewFn, Args);
5394 NewCall->takeName(CI);
5395 CI->replaceAllUsesWith(NewCall);
5396 CI->eraseFromParent();
5397 return;
5398 }
5399 case Intrinsic::riscv_sha256sig0:
5400 case Intrinsic::riscv_sha256sig1:
5401 case Intrinsic::riscv_sha256sum0:
5402 case Intrinsic::riscv_sha256sum1:
5403 case Intrinsic::riscv_sm3p0:
5404 case Intrinsic::riscv_sm3p1: {
5405 // The last argument to these intrinsics used to be i8 and changed to i32.
5406 // The type overload for sm4ks and sm4ed was removed.
5407 if (!CI->getType()->isIntegerTy(64))
5408 return;
5409
5410 Value *Arg =
5411 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5412
5413 NewCall = Builder.CreateCall(NewFn, Arg);
5414 Value *Res =
5415 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5416 NewCall->takeName(CI);
5417 CI->replaceAllUsesWith(Res);
5418 CI->eraseFromParent();
5419 return;
5420 }
5421
5422 case Intrinsic::x86_xop_vfrcz_ss:
5423 case Intrinsic::x86_xop_vfrcz_sd:
5424 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5425 break;
5426
5427 case Intrinsic::x86_xop_vpermil2pd:
5428 case Intrinsic::x86_xop_vpermil2ps:
5429 case Intrinsic::x86_xop_vpermil2pd_256:
5430 case Intrinsic::x86_xop_vpermil2ps_256: {
5431 SmallVector<Value *, 4> Args(CI->args());
5432 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5433 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5434 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5435 NewCall = Builder.CreateCall(NewFn, Args);
5436 break;
5437 }
5438
5439 case Intrinsic::x86_sse41_ptestc:
5440 case Intrinsic::x86_sse41_ptestz:
5441 case Intrinsic::x86_sse41_ptestnzc: {
5442 // The arguments for these intrinsics used to be v4f32, and changed
5443 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5444 // So, the only thing required is a bitcast for both arguments.
5445 // First, check the arguments have the old type.
5446 Value *Arg0 = CI->getArgOperand(0);
5447 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5448 return;
5449
5450 // Old intrinsic, add bitcasts
5451 Value *Arg1 = CI->getArgOperand(1);
5452
5453 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5454
5455 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5456 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5457
5458 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5459 break;
5460 }
5461
5462 case Intrinsic::x86_rdtscp: {
5463 // This used to take 1 arguments. If we have no arguments, it is already
5464 // upgraded.
5465 if (CI->getNumOperands() == 0)
5466 return;
5467
5468 NewCall = Builder.CreateCall(NewFn);
5469 // Extract the second result and store it.
5470 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5471 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5472 // Replace the original call result with the first result of the new call.
5473 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5474
5475 NewCall->takeName(CI);
5476 CI->replaceAllUsesWith(TSC);
5477 CI->eraseFromParent();
5478 return;
5479 }
5480
5481 case Intrinsic::x86_sse41_insertps:
5482 case Intrinsic::x86_sse41_dppd:
5483 case Intrinsic::x86_sse41_dpps:
5484 case Intrinsic::x86_sse41_mpsadbw:
5485 case Intrinsic::x86_avx_dp_ps_256:
5486 case Intrinsic::x86_avx2_mpsadbw: {
5487 // Need to truncate the last argument from i32 to i8 -- this argument models
5488 // an inherently 8-bit immediate operand to these x86 instructions.
5489 SmallVector<Value *, 4> Args(CI->args());
5490
5491 // Replace the last argument with a trunc.
5492 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5493 NewCall = Builder.CreateCall(NewFn, Args);
5494 break;
5495 }
5496
5497 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5498 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5499 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5500 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5501 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5502 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5503 SmallVector<Value *, 4> Args(CI->args());
5504 unsigned NumElts =
5505 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5506 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5507
5508 NewCall = Builder.CreateCall(NewFn, Args);
5509 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5510
5511 NewCall->takeName(CI);
5512 CI->replaceAllUsesWith(Res);
5513 CI->eraseFromParent();
5514 return;
5515 }
5516
5517 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5518 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5519 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5520 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5521 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5522 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5523 SmallVector<Value *, 4> Args(CI->args());
5524 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5525 if (NewFn->getIntrinsicID() ==
5526 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5527 Args[1] = Builder.CreateBitCast(
5528 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5529
5530 NewCall = Builder.CreateCall(NewFn, Args);
5531 Value *Res = Builder.CreateBitCast(
5532 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5533
5534 NewCall->takeName(CI);
5535 CI->replaceAllUsesWith(Res);
5536 CI->eraseFromParent();
5537 return;
5538 }
5539 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5540 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5541 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5542 SmallVector<Value *, 4> Args(CI->args());
5543 unsigned NumElts =
5544 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5545 Args[1] = Builder.CreateBitCast(
5546 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5547 Args[2] = Builder.CreateBitCast(
5548 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5549
5550 NewCall = Builder.CreateCall(NewFn, Args);
5551 break;
5552 }
5553
5554 case Intrinsic::thread_pointer: {
5555 NewCall = Builder.CreateCall(NewFn, {});
5556 break;
5557 }
5558
5559 case Intrinsic::memcpy:
5560 case Intrinsic::memmove:
5561 case Intrinsic::memset: {
5562 // We have to make sure that the call signature is what we're expecting.
5563 // We only want to change the old signatures by removing the alignment arg:
5564 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5565 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5566 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5567 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5568 // Note: i8*'s in the above can be any pointer type
5569 if (CI->arg_size() != 5) {
5570 DefaultCase();
5571 return;
5572 }
5573 // Remove alignment argument (3), and add alignment attributes to the
5574 // dest/src pointers.
5575 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5576 CI->getArgOperand(2), CI->getArgOperand(4)};
5577 NewCall = Builder.CreateCall(NewFn, Args);
5578 AttributeList OldAttrs = CI->getAttributes();
5579 AttributeList NewAttrs = AttributeList::get(
5580 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5581 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5582 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5583 NewCall->setAttributes(NewAttrs);
5584 auto *MemCI = cast<MemIntrinsic>(NewCall);
5585 // All mem intrinsics support dest alignment.
5587 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5588 // Memcpy/Memmove also support source alignment.
5589 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5590 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5591 break;
5592 }
5593
5594 case Intrinsic::masked_load:
5595 case Intrinsic::masked_gather:
5596 case Intrinsic::masked_store:
5597 case Intrinsic::masked_scatter: {
5598 if (CI->arg_size() != 4) {
5599 DefaultCase();
5600 return;
5601 }
5602
5603 auto GetMaybeAlign = [](Value *Op) {
5604 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5605 uint64_t Val = CI->getZExtValue();
5606 if (Val == 0)
5607 return MaybeAlign();
5608 if (isPowerOf2_64(Val))
5609 return MaybeAlign(Val);
5610 }
5611 reportFatalUsageError("Invalid alignment argument");
5612 };
5613 auto GetAlign = [&](Value *Op) {
5614 MaybeAlign Align = GetMaybeAlign(Op);
5615 if (Align)
5616 return *Align;
5617 reportFatalUsageError("Invalid zero alignment argument");
5618 };
5619
5620 const DataLayout &DL = CI->getDataLayout();
5621 switch (NewFn->getIntrinsicID()) {
5622 case Intrinsic::masked_load:
5623 NewCall = Builder.CreateMaskedLoad(
5624 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5625 CI->getArgOperand(2), CI->getArgOperand(3));
5626 break;
5627 case Intrinsic::masked_gather:
5628 NewCall = Builder.CreateMaskedGather(
5629 CI->getType(), CI->getArgOperand(0),
5630 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5631 CI->getType()->getScalarType()),
5632 CI->getArgOperand(2), CI->getArgOperand(3));
5633 break;
5634 case Intrinsic::masked_store:
5635 NewCall = Builder.CreateMaskedStore(
5636 CI->getArgOperand(0), CI->getArgOperand(1),
5637 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5638 break;
5639 case Intrinsic::masked_scatter:
5640 NewCall = Builder.CreateMaskedScatter(
5641 CI->getArgOperand(0), CI->getArgOperand(1),
5642 DL.getValueOrABITypeAlignment(
5643 GetMaybeAlign(CI->getArgOperand(2)),
5644 CI->getArgOperand(0)->getType()->getScalarType()),
5645 CI->getArgOperand(3));
5646 break;
5647 default:
5648 llvm_unreachable("Unexpected intrinsic ID");
5649 }
5650 // Previous metadata is still valid.
5651 NewCall->copyMetadata(*CI);
5652 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5653 break;
5654 }
5655
5656 case Intrinsic::lifetime_start:
5657 case Intrinsic::lifetime_end: {
5658 if (CI->arg_size() != 2) {
5659 DefaultCase();
5660 return;
5661 }
5662
5663 Value *Ptr = CI->getArgOperand(1);
5664 // Try to strip pointer casts, such that the lifetime works on an alloca.
5665 Ptr = Ptr->stripPointerCasts();
5666 if (isa<AllocaInst>(Ptr)) {
5667 // Don't use NewFn, as we might have looked through an addrspacecast.
5668 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5669 NewCall = Builder.CreateLifetimeStart(Ptr);
5670 else
5671 NewCall = Builder.CreateLifetimeEnd(Ptr);
5672 break;
5673 }
5674
5675 // Otherwise remove the lifetime marker.
5676 CI->eraseFromParent();
5677 return;
5678 }
5679
5680 case Intrinsic::x86_avx512_vpdpbusd_128:
5681 case Intrinsic::x86_avx512_vpdpbusd_256:
5682 case Intrinsic::x86_avx512_vpdpbusd_512:
5683 case Intrinsic::x86_avx512_vpdpbusds_128:
5684 case Intrinsic::x86_avx512_vpdpbusds_256:
5685 case Intrinsic::x86_avx512_vpdpbusds_512:
5686 case Intrinsic::x86_avx2_vpdpbssd_128:
5687 case Intrinsic::x86_avx2_vpdpbssd_256:
5688 case Intrinsic::x86_avx10_vpdpbssd_512:
5689 case Intrinsic::x86_avx2_vpdpbssds_128:
5690 case Intrinsic::x86_avx2_vpdpbssds_256:
5691 case Intrinsic::x86_avx10_vpdpbssds_512:
5692 case Intrinsic::x86_avx2_vpdpbsud_128:
5693 case Intrinsic::x86_avx2_vpdpbsud_256:
5694 case Intrinsic::x86_avx10_vpdpbsud_512:
5695 case Intrinsic::x86_avx2_vpdpbsuds_128:
5696 case Intrinsic::x86_avx2_vpdpbsuds_256:
5697 case Intrinsic::x86_avx10_vpdpbsuds_512:
5698 case Intrinsic::x86_avx2_vpdpbuud_128:
5699 case Intrinsic::x86_avx2_vpdpbuud_256:
5700 case Intrinsic::x86_avx10_vpdpbuud_512:
5701 case Intrinsic::x86_avx2_vpdpbuuds_128:
5702 case Intrinsic::x86_avx2_vpdpbuuds_256:
5703 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5704 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5705 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5706 CI->getArgOperand(2)};
5707 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5708 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5709 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5710
5711 NewCall = Builder.CreateCall(NewFn, Args);
5712 break;
5713 }
5714 case Intrinsic::x86_avx512_vpdpwssd_128:
5715 case Intrinsic::x86_avx512_vpdpwssd_256:
5716 case Intrinsic::x86_avx512_vpdpwssd_512:
5717 case Intrinsic::x86_avx512_vpdpwssds_128:
5718 case Intrinsic::x86_avx512_vpdpwssds_256:
5719 case Intrinsic::x86_avx512_vpdpwssds_512:
5720 case Intrinsic::x86_avx2_vpdpwsud_128:
5721 case Intrinsic::x86_avx2_vpdpwsud_256:
5722 case Intrinsic::x86_avx10_vpdpwsud_512:
5723 case Intrinsic::x86_avx2_vpdpwsuds_128:
5724 case Intrinsic::x86_avx2_vpdpwsuds_256:
5725 case Intrinsic::x86_avx10_vpdpwsuds_512:
5726 case Intrinsic::x86_avx2_vpdpwusd_128:
5727 case Intrinsic::x86_avx2_vpdpwusd_256:
5728 case Intrinsic::x86_avx10_vpdpwusd_512:
5729 case Intrinsic::x86_avx2_vpdpwusds_128:
5730 case Intrinsic::x86_avx2_vpdpwusds_256:
5731 case Intrinsic::x86_avx10_vpdpwusds_512:
5732 case Intrinsic::x86_avx2_vpdpwuud_128:
5733 case Intrinsic::x86_avx2_vpdpwuud_256:
5734 case Intrinsic::x86_avx10_vpdpwuud_512:
5735 case Intrinsic::x86_avx2_vpdpwuuds_128:
5736 case Intrinsic::x86_avx2_vpdpwuuds_256:
5737 case Intrinsic::x86_avx10_vpdpwuuds_512:
5738 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5739 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5740 CI->getArgOperand(2)};
5741 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5742 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5743 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5744
5745 NewCall = Builder.CreateCall(NewFn, Args);
5746 break;
5747 }
5748 assert(NewCall && "Should have either set this variable or returned through "
5749 "the default case");
5750 NewCall->takeName(CI);
5751 CI->replaceAllUsesWith(NewCall);
5752 CI->eraseFromParent();
5753}
5754
5756 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5757
5758 // Check if this function should be upgraded and get the replacement function
5759 // if there is one.
5760 Function *NewFn;
5761 if (UpgradeIntrinsicFunction(F, NewFn)) {
5762 // Replace all users of the old function with the new function or new
5763 // instructions. This is not a range loop because the call is deleted.
5764 for (User *U : make_early_inc_range(F->users()))
5765 if (CallBase *CB = dyn_cast<CallBase>(U))
5766 UpgradeIntrinsicCall(CB, NewFn);
5767
5768 // Remove old function, no longer used, from the module.
5769 if (F != NewFn)
5770 F->eraseFromParent();
5771 }
5772}
5773
5775 const unsigned NumOperands = MD.getNumOperands();
5776 if (NumOperands == 0)
5777 return &MD; // Invalid, punt to a verifier error.
5778
5779 // Check if the tag uses struct-path aware TBAA format.
5780 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5781 return &MD;
5782
5783 auto &Context = MD.getContext();
5784 if (NumOperands == 3) {
5785 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5786 MDNode *ScalarType = MDNode::get(Context, Elts);
5787 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5788 Metadata *Elts2[] = {ScalarType, ScalarType,
5791 MD.getOperand(2)};
5792 return MDNode::get(Context, Elts2);
5793 }
5794 // Create a MDNode <MD, MD, offset 0>
5796 Type::getInt64Ty(Context)))};
5797 return MDNode::get(Context, Elts);
5798}
5799
5801 Instruction *&Temp) {
5802 if (Opc != Instruction::BitCast)
5803 return nullptr;
5804
5805 Temp = nullptr;
5806 Type *SrcTy = V->getType();
5807 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5808 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5809 LLVMContext &Context = V->getContext();
5810
5811 // We have no information about target data layout, so we assume that
5812 // the maximum pointer size is 64bit.
5813 Type *MidTy = Type::getInt64Ty(Context);
5814 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5815
5816 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5817 }
5818
5819 return nullptr;
5820}
5821
5823 if (Opc != Instruction::BitCast)
5824 return nullptr;
5825
5826 Type *SrcTy = C->getType();
5827 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5828 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5829 LLVMContext &Context = C->getContext();
5830
5831 // We have no information about target data layout, so we assume that
5832 // the maximum pointer size is 64bit.
5833 Type *MidTy = Type::getInt64Ty(Context);
5834
5836 DestTy);
5837 }
5838
5839 return nullptr;
5840}
5841
5842/// Check the debug info version number, if it is out-dated, drop the debug
5843/// info. Return true if module is modified.
5846 return false;
5847
5848 llvm::TimeTraceScope timeScope("Upgrade debug info");
5849 // We need to get metadata before the module is verified (i.e., getModuleFlag
5850 // makes assumptions that we haven't verified yet). Carefully extract the flag
5851 // from the metadata.
5852 unsigned Version = 0;
5853 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5854 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5855 if (Flag->getNumOperands() < 3)
5856 return false;
5857 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5858 return K->getString() == "Debug Info Version";
5859 return false;
5860 });
5861 if (OpIt != ModFlags->op_end()) {
5862 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5863 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5864 Version = CI->getZExtValue();
5865 }
5866 }
5867
5869 bool BrokenDebugInfo = false;
5870 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5871 report_fatal_error("Broken module found, compilation aborted!");
5872 if (!BrokenDebugInfo)
5873 // Everything is ok.
5874 return false;
5875 else {
5876 // Diagnose malformed debug info.
5878 M.getContext().diagnose(Diag);
5879 }
5880 }
5881 bool Modified = StripDebugInfo(M);
5883 // Diagnose a version mismatch.
5885 M.getContext().diagnose(DiagVersion);
5886 }
5887 return Modified;
5888}
5889
5890static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5891 GlobalValue *GV, const Metadata *V) {
5892 Function *F = cast<Function>(GV);
5893
5894 constexpr StringLiteral DefaultValue = "1";
5895 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5896 unsigned Length = 0;
5897
5898 if (F->hasFnAttribute(Attr)) {
5899 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5900 // parse these elements placing them into Vect3
5901 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5902 for (; Length < 3 && !S.empty(); Length++) {
5903 auto [Part, Rest] = S.split(',');
5904 Vect3[Length] = Part.trim();
5905 S = Rest;
5906 }
5907 }
5908
5909 const unsigned Dim = DimC - 'x';
5910 assert(Dim < 3 && "Unexpected dim char");
5911
5912 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5913
5914 // local variable required for StringRef in Vect3 to point to.
5915 const std::string VStr = llvm::utostr(VInt);
5916 Vect3[Dim] = VStr;
5917 Length = std::max(Length, Dim + 1);
5918
5919 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5920 F->addFnAttr(Attr, NewAttr);
5921}
5922
5923static inline bool isXYZ(StringRef S) {
5924 return S == "x" || S == "y" || S == "z";
5925}
5926
5928 const Metadata *V) {
5929 if (K == "kernel") {
5931 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5932 return true;
5933 }
5934 if (K == "align") {
5935 // V is a bitfeild specifying two 16-bit values. The alignment value is
5936 // specfied in low 16-bits, The index is specified in the high bits. For the
5937 // index, 0 indicates the return value while higher values correspond to
5938 // each parameter (idx = param + 1).
5939 const uint64_t AlignIdxValuePair =
5940 mdconst::extract<ConstantInt>(V)->getZExtValue();
5941 const unsigned Idx = (AlignIdxValuePair >> 16);
5942 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5943 cast<Function>(GV)->addAttributeAtIndex(
5944 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5945 return true;
5946 }
5947 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5948 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5950 return true;
5951 }
5952 if (K == "minctasm") {
5953 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5954 cast<Function>(GV)->addFnAttr(NVVMAttr::MinCTASm, llvm::utostr(CV));
5955 return true;
5956 }
5957 if (K == "maxnreg") {
5958 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5959 cast<Function>(GV)->addFnAttr(NVVMAttr::MaxNReg, llvm::utostr(CV));
5960 return true;
5961 }
5962 if (K.consume_front("maxntid") && isXYZ(K)) {
5964 return true;
5965 }
5966 if (K.consume_front("reqntid") && isXYZ(K)) {
5968 return true;
5969 }
5970 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5972 return true;
5973 }
5974 if (K == "grid_constant") {
5975 const auto Attr = Attribute::get(GV->getContext(), NVVMAttr::GridConstant);
5976 for (const auto &Op : cast<MDNode>(V)->operands()) {
5977 // For some reason, the index is 1-based in the metadata. Good thing we're
5978 // able to auto-upgrade it!
5979 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5980 cast<Function>(GV)->addParamAttr(Index, Attr);
5981 }
5982 return true;
5983 }
5984
5985 return false;
5986}
5987
5989 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5990 if (!NamedMD)
5991 return;
5992
5993 SmallVector<MDNode *, 8> NewNodes;
5995 for (MDNode *MD : NamedMD->operands()) {
5996 if (!SeenNodes.insert(MD).second)
5997 continue;
5998
5999 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
6000 if (!GV)
6001 continue;
6002
6003 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
6004
6005 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
6006 // Each nvvm.annotations metadata entry will be of the following form:
6007 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
6008 // start index = 1, to skip the global variable key
6009 // increment = 2, to skip the value for each property-value pairs
6010 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
6011 MDString *K = cast<MDString>(MD->getOperand(j));
6012 const MDOperand &V = MD->getOperand(j + 1);
6013 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
6014 if (!Upgraded)
6015 NewOperands.append({K, V});
6016 }
6017
6018 if (NewOperands.size() > 1)
6019 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
6020 }
6021
6022 NamedMD->clearOperands();
6023 for (MDNode *N : NewNodes)
6024 NamedMD->addOperand(N);
6025}
6026
6027/// This checks for objc retain release marker which should be upgraded. It
6028/// returns true if module is modified.
6030 bool Changed = false;
6031 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
6032 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
6033 if (ModRetainReleaseMarker) {
6034 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
6035 if (Op) {
6036 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
6037 if (ID) {
6038 SmallVector<StringRef, 4> ValueComp;
6039 ID->getString().split(ValueComp, "#");
6040 if (ValueComp.size() == 2) {
6041 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
6042 ID = MDString::get(M.getContext(), NewValue);
6043 }
6044 M.addModuleFlag(Module::Error, MarkerKey, ID);
6045 M.eraseNamedMetadata(ModRetainReleaseMarker);
6046 Changed = true;
6047 }
6048 }
6049 }
6050 return Changed;
6051}
6052
6054 // This lambda converts normal function calls to ARC runtime functions to
6055 // intrinsic calls.
6056 auto UpgradeToIntrinsic = [&](const char *OldFunc,
6057 llvm::Intrinsic::ID IntrinsicFunc) {
6058 Function *Fn = M.getFunction(OldFunc);
6059
6060 if (!Fn)
6061 return;
6062
6063 Function *NewFn =
6064 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
6065
6066 for (User *U : make_early_inc_range(Fn->users())) {
6068 if (!CI || CI->getCalledFunction() != Fn)
6069 continue;
6070
6071 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
6072 FunctionType *NewFuncTy = NewFn->getFunctionType();
6074
6075 // Don't upgrade the intrinsic if it's not valid to bitcast the return
6076 // value to the return type of the old function.
6077 if (NewFuncTy->getReturnType() != CI->getType() &&
6078 !CastInst::castIsValid(Instruction::BitCast, CI,
6079 NewFuncTy->getReturnType()))
6080 continue;
6081
6082 bool InvalidCast = false;
6083
6084 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
6085 Value *Arg = CI->getArgOperand(I);
6086
6087 // Bitcast argument to the parameter type of the new function if it's
6088 // not a variadic argument.
6089 if (I < NewFuncTy->getNumParams()) {
6090 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
6091 // to the parameter type of the new function.
6092 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
6093 NewFuncTy->getParamType(I))) {
6094 InvalidCast = true;
6095 break;
6096 }
6097 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6098 }
6099 Args.push_back(Arg);
6100 }
6101
6102 if (InvalidCast)
6103 continue;
6104
6105 // Create a call instruction that calls the new function.
6106 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6107 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6108 NewCall->takeName(CI);
6109
6110 // Bitcast the return value back to the type of the old call.
6111 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6112
6113 if (!CI->use_empty())
6114 CI->replaceAllUsesWith(NewRetVal);
6115 CI->eraseFromParent();
6116 }
6117
6118 if (Fn->use_empty())
6119 Fn->eraseFromParent();
6120 };
6121
6122 // Unconditionally convert a call to "clang.arc.use" to a call to
6123 // "llvm.objc.clang.arc.use".
6124 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6125
6126 // Upgrade the retain release marker. If there is no need to upgrade
6127 // the marker, that means either the module is already new enough to contain
6128 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6130 return;
6131
6132 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6133 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6134 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6135 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6136 {"objc_autoreleaseReturnValue",
6137 llvm::Intrinsic::objc_autoreleaseReturnValue},
6138 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6139 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6140 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6141 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6142 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6143 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6144 {"objc_release", llvm::Intrinsic::objc_release},
6145 {"objc_retain", llvm::Intrinsic::objc_retain},
6146 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6147 {"objc_retainAutoreleaseReturnValue",
6148 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6149 {"objc_retainAutoreleasedReturnValue",
6150 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6151 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6152 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6153 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6154 {"objc_unsafeClaimAutoreleasedReturnValue",
6155 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6156 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6157 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6158 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6159 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6160 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6161 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6162 {"objc_arc_annotation_topdown_bbstart",
6163 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6164 {"objc_arc_annotation_topdown_bbend",
6165 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6166 {"objc_arc_annotation_bottomup_bbstart",
6167 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6168 {"objc_arc_annotation_bottomup_bbend",
6169 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6170
6171 for (auto &I : RuntimeFuncs)
6172 UpgradeToIntrinsic(I.first, I.second);
6173}
6174
6176 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6177 if (!ModFlags)
6178 return false;
6179
6180 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6181 bool HasSwiftVersionFlag = false;
6182 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6183 uint32_t SwiftABIVersion;
6184 auto Int8Ty = Type::getInt8Ty(M.getContext());
6185 auto Int32Ty = Type::getInt32Ty(M.getContext());
6186
6187 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6188 MDNode *Op = ModFlags->getOperand(I);
6189 if (Op->getNumOperands() != 3)
6190 continue;
6191 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6192 if (!ID)
6193 continue;
6194 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6195 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6196 Type::getInt32Ty(M.getContext()), B)),
6197 MDString::get(M.getContext(), ID->getString()),
6198 Op->getOperand(2)};
6199 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6200 Changed = true;
6201 };
6202
6203 if (ID->getString() == "Objective-C Image Info Version")
6204 HasObjCFlag = true;
6205 if (ID->getString() == "Objective-C Class Properties")
6206 HasClassProperties = true;
6207 // Upgrade PIC from Error/Max to Min.
6208 if (ID->getString() == "PIC Level") {
6209 if (auto *Behavior =
6211 uint64_t V = Behavior->getLimitedValue();
6212 if (V == Module::Error || V == Module::Max)
6213 SetBehavior(Module::Min);
6214 }
6215 }
6216 // Upgrade "PIE Level" from Error to Max.
6217 if (ID->getString() == "PIE Level")
6218 if (auto *Behavior =
6220 if (Behavior->getLimitedValue() == Module::Error)
6221 SetBehavior(Module::Max);
6222
6223 // Upgrade branch protection and return address signing module flags. The
6224 // module flag behavior for these fields were Error and now they are Min.
6225 if (ID->getString() == "branch-target-enforcement" ||
6226 ID->getString().starts_with("sign-return-address")) {
6227 if (auto *Behavior =
6229 if (Behavior->getLimitedValue() == Module::Error) {
6230 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6231 Metadata *Ops[3] = {
6232 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6233 Op->getOperand(1), Op->getOperand(2)};
6234 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6235 Changed = true;
6236 }
6237 }
6238 }
6239
6240 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6241 // section name so that llvm-lto will not complain about mismatching
6242 // module flags that is functionally the same.
6243 if (ID->getString() == "Objective-C Image Info Section") {
6244 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6245 SmallVector<StringRef, 4> ValueComp;
6246 Value->getString().split(ValueComp, " ");
6247 if (ValueComp.size() != 1) {
6248 std::string NewValue;
6249 for (auto &S : ValueComp)
6250 NewValue += S.str();
6251 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6252 MDString::get(M.getContext(), NewValue)};
6253 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6254 Changed = true;
6255 }
6256 }
6257 }
6258
6259 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6260 // If the higher bits are set, it adds new module flag for swift info.
6261 if (ID->getString() == "Objective-C Garbage Collection") {
6262 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6263 if (Md) {
6264 assert(Md->getValue() && "Expected non-empty metadata");
6265 auto Type = Md->getValue()->getType();
6266 if (Type == Int8Ty)
6267 continue;
6268 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6269 if ((Val & 0xff) != Val) {
6270 HasSwiftVersionFlag = true;
6271 SwiftABIVersion = (Val & 0xff00) >> 8;
6272 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6273 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6274 }
6275 Metadata *Ops[3] = {
6277 Op->getOperand(1),
6278 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6279 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6280 Changed = true;
6281 }
6282 }
6283
6284 if (ID->getString() == "amdgpu_code_object_version") {
6285 Metadata *Ops[3] = {
6286 Op->getOperand(0),
6287 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6288 Op->getOperand(2)};
6289 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6290 Changed = true;
6291 }
6292 }
6293
6294 // "Objective-C Class Properties" is recently added for Objective-C. We
6295 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6296 // flag of value 0, so we can correclty downgrade this flag when trying to
6297 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6298 // this module flag.
6299 if (HasObjCFlag && !HasClassProperties) {
6300 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6301 (uint32_t)0);
6302 Changed = true;
6303 }
6304
6305 if (HasSwiftVersionFlag) {
6306 M.addModuleFlag(Module::Error, "Swift ABI Version",
6307 SwiftABIVersion);
6308 M.addModuleFlag(Module::Error, "Swift Major Version",
6309 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6310 M.addModuleFlag(Module::Error, "Swift Minor Version",
6311 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6312 Changed = true;
6313 }
6314
6315 return Changed;
6316}
6317
6319 auto TrimSpaces = [](StringRef Section) -> std::string {
6320 SmallVector<StringRef, 5> Components;
6321 Section.split(Components, ',');
6322
6323 SmallString<32> Buffer;
6324 raw_svector_ostream OS(Buffer);
6325
6326 for (auto Component : Components)
6327 OS << ',' << Component.trim();
6328
6329 return std::string(OS.str().substr(1));
6330 };
6331
6332 for (auto &GV : M.globals()) {
6333 if (!GV.hasSection())
6334 continue;
6335
6336 StringRef Section = GV.getSection();
6337
6338 if (!Section.starts_with("__DATA, __objc_catlist"))
6339 continue;
6340
6341 // __DATA, __objc_catlist, regular, no_dead_strip
6342 // __DATA,__objc_catlist,regular,no_dead_strip
6343 GV.setSection(TrimSpaces(Section));
6344 }
6345}
6346
6347namespace {
6348// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6349// callsites within a function that did not also have the strictfp attribute.
6350// Since 10.0, if strict FP semantics are needed within a function, the
6351// function must have the strictfp attribute and all calls within the function
6352// must also have the strictfp attribute. This latter restriction is
6353// necessary to prevent unwanted libcall simplification when a function is
6354// being cloned (such as for inlining).
6355//
6356// The "dangling" strictfp attribute usage was only used to prevent constant
6357// folding and other libcall simplification. The nobuiltin attribute on the
6358// callsite has the same effect.
6359struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6360 StrictFPUpgradeVisitor() = default;
6361
6362 void visitCallBase(CallBase &Call) {
6363 if (!Call.isStrictFP())
6364 return;
6366 return;
6367 // If we get here, the caller doesn't have the strictfp attribute
6368 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6369 Call.removeFnAttr(Attribute::StrictFP);
6370 Call.addFnAttr(Attribute::NoBuiltin);
6371 }
6372};
6373
6374/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6375struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6376 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6377 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6378
6379 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6380 if (!RMW.isFloatingPointOperation())
6381 return;
6382
6383 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6384 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6385 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6386 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6387 }
6388};
6389} // namespace
6390
6392 // If a function definition doesn't have the strictfp attribute,
6393 // convert any callsite strictfp attributes to nobuiltin.
6394 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6395 StrictFPUpgradeVisitor SFPV;
6396 SFPV.visit(F);
6397 }
6398
6399 // Remove all incompatibile attributes from function.
6400 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6401 F.getReturnType(), F.getAttributes().getRetAttrs()));
6402 for (auto &Arg : F.args())
6403 Arg.removeAttrs(
6404 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6405
6406 bool AddingAttrs = false, RemovingAttrs = false;
6407 AttrBuilder AttrsToAdd(F.getContext());
6408 AttributeMask AttrsToRemove;
6409
6410 // Older versions of LLVM treated an "implicit-section-name" attribute
6411 // similarly to directly setting the section on a Function.
6412 if (Attribute A = F.getFnAttribute("implicit-section-name");
6413 A.isValid() && A.isStringAttribute()) {
6414 F.setSection(A.getValueAsString());
6415 AttrsToRemove.addAttribute("implicit-section-name");
6416 RemovingAttrs = true;
6417 }
6418
6419 if (Attribute A = F.getFnAttribute("nooutline");
6420 A.isValid() && A.isStringAttribute()) {
6421 AttrsToRemove.addAttribute("nooutline");
6422 AttrsToAdd.addAttribute(Attribute::NoOutline);
6423 AddingAttrs = RemovingAttrs = true;
6424 }
6425
6426 if (Attribute A = F.getFnAttribute("uniform-work-group-size");
6427 A.isValid() && A.isStringAttribute() && !A.getValueAsString().empty()) {
6428 AttrsToRemove.addAttribute("uniform-work-group-size");
6429 RemovingAttrs = true;
6430 if (A.getValueAsString() == "true") {
6431 AttrsToAdd.addAttribute("uniform-work-group-size");
6432 AddingAttrs = true;
6433 }
6434 }
6435
6436 if (!F.empty()) {
6437 // For some reason this is called twice, and the first time is before any
6438 // instructions are loaded into the body.
6439
6440 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6441 A.isValid()) {
6442
6443 if (A.getValueAsBool()) {
6444 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6445 Visitor.visit(F);
6446 }
6447
6448 // We will leave behind dead attribute uses on external declarations, but
6449 // clang never added these to declarations anyway.
6450 AttrsToRemove.addAttribute("amdgpu-unsafe-fp-atomics");
6451 RemovingAttrs = true;
6452 }
6453 }
6454
6455 DenormalMode DenormalFPMath = DenormalMode::getIEEE();
6456 DenormalMode DenormalFPMathF32 = DenormalMode::getInvalid();
6457
6458 bool HandleDenormalMode = false;
6459
6460 if (Attribute Attr = F.getFnAttribute("denormal-fp-math"); Attr.isValid()) {
6461 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6462 if (ParsedMode.isValid()) {
6463 DenormalFPMath = ParsedMode;
6464 AttrsToRemove.addAttribute("denormal-fp-math");
6465 AddingAttrs = RemovingAttrs = true;
6466 HandleDenormalMode = true;
6467 }
6468 }
6469
6470 if (Attribute Attr = F.getFnAttribute("denormal-fp-math-f32");
6471 Attr.isValid()) {
6472 DenormalMode ParsedMode = parseDenormalFPAttribute(Attr.getValueAsString());
6473 if (ParsedMode.isValid()) {
6474 DenormalFPMathF32 = ParsedMode;
6475 AttrsToRemove.addAttribute("denormal-fp-math-f32");
6476 AddingAttrs = RemovingAttrs = true;
6477 HandleDenormalMode = true;
6478 }
6479 }
6480
6481 if (HandleDenormalMode)
6482 AttrsToAdd.addDenormalFPEnvAttr(
6483 DenormalFPEnv(DenormalFPMath, DenormalFPMathF32));
6484
6485 if (RemovingAttrs)
6486 F.removeFnAttrs(AttrsToRemove);
6487
6488 if (AddingAttrs)
6489 F.addFnAttrs(AttrsToAdd);
6490}
6491
6492// Check if the function attribute is not present and set it.
6494 StringRef Value) {
6495 if (!F.hasFnAttribute(FnAttrName))
6496 F.addFnAttr(FnAttrName, Value);
6497}
6498
6499// Check if the function attribute is not present and set it if needed.
6500// If the attribute is "false" then removes it.
6501// If the attribute is "true" resets it to a valueless attribute.
6502static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6503 if (!F.hasFnAttribute(FnAttrName)) {
6504 if (Set)
6505 F.addFnAttr(FnAttrName);
6506 } else {
6507 auto A = F.getFnAttribute(FnAttrName);
6508 if ("false" == A.getValueAsString())
6509 F.removeFnAttr(FnAttrName);
6510 else if ("true" == A.getValueAsString()) {
6511 F.removeFnAttr(FnAttrName);
6512 F.addFnAttr(FnAttrName);
6513 }
6514 }
6515}
6516
6518 Triple T(M.getTargetTriple());
6519 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6520 return;
6521
6522 uint64_t BTEValue = 0;
6523 uint64_t BPPLRValue = 0;
6524 uint64_t GCSValue = 0;
6525 uint64_t SRAValue = 0;
6526 uint64_t SRAALLValue = 0;
6527 uint64_t SRABKeyValue = 0;
6528
6529 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6530 if (ModFlags) {
6531 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6532 MDNode *Op = ModFlags->getOperand(I);
6533 if (Op->getNumOperands() != 3)
6534 continue;
6535
6536 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6537 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6538 if (!ID || !CI)
6539 continue;
6540
6541 StringRef IDStr = ID->getString();
6542 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6543 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6544 : IDStr == "guarded-control-stack" ? &GCSValue
6545 : IDStr == "sign-return-address" ? &SRAValue
6546 : IDStr == "sign-return-address-all" ? &SRAALLValue
6547 : IDStr == "sign-return-address-with-bkey"
6548 ? &SRABKeyValue
6549 : nullptr;
6550 if (!ValPtr)
6551 continue;
6552
6553 *ValPtr = CI->getZExtValue();
6554 if (*ValPtr == 2)
6555 return;
6556 }
6557 }
6558
6559 bool BTE = BTEValue == 1;
6560 bool BPPLR = BPPLRValue == 1;
6561 bool GCS = GCSValue == 1;
6562 bool SRA = SRAValue == 1;
6563
6564 StringRef SignTypeValue = "non-leaf";
6565 if (SRA && SRAALLValue == 1)
6566 SignTypeValue = "all";
6567
6568 StringRef SignKeyValue = "a_key";
6569 if (SRA && SRABKeyValue == 1)
6570 SignKeyValue = "b_key";
6571
6572 for (Function &F : M.getFunctionList()) {
6573 if (F.isDeclaration())
6574 continue;
6575
6576 if (SRA) {
6577 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6578 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6579 } else {
6580 if (auto A = F.getFnAttribute("sign-return-address");
6581 A.isValid() && "none" == A.getValueAsString()) {
6582 F.removeFnAttr("sign-return-address");
6583 F.removeFnAttr("sign-return-address-key");
6584 }
6585 }
6586 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6587 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6588 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6589 }
6590
6591 if (BTE)
6592 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6593 if (BPPLR)
6594 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6595 if (GCS)
6596 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6597 if (SRA) {
6598 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6599 if (SRAALLValue == 1)
6600 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6601 if (SRABKeyValue == 1)
6602 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6603 }
6604}
6605
6606static bool isOldLoopArgument(Metadata *MD) {
6607 auto *T = dyn_cast_or_null<MDTuple>(MD);
6608 if (!T)
6609 return false;
6610 if (T->getNumOperands() < 1)
6611 return false;
6612 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6613 if (!S)
6614 return false;
6615 return S->getString().starts_with("llvm.vectorizer.");
6616}
6617
6619 StringRef OldPrefix = "llvm.vectorizer.";
6620 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6621
6622 if (OldTag == "llvm.vectorizer.unroll")
6623 return MDString::get(C, "llvm.loop.interleave.count");
6624
6625 return MDString::get(
6626 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6627 .str());
6628}
6629
6631 auto *T = dyn_cast_or_null<MDTuple>(MD);
6632 if (!T)
6633 return MD;
6634 if (T->getNumOperands() < 1)
6635 return MD;
6636 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6637 if (!OldTag)
6638 return MD;
6639 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6640 return MD;
6641
6642 // This has an old tag. Upgrade it.
6644 Ops.reserve(T->getNumOperands());
6645 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6646 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6647 Ops.push_back(T->getOperand(I));
6648
6649 return MDTuple::get(T->getContext(), Ops);
6650}
6651
6653 auto *T = dyn_cast<MDTuple>(&N);
6654 if (!T)
6655 return &N;
6656
6657 if (none_of(T->operands(), isOldLoopArgument))
6658 return &N;
6659
6661 Ops.reserve(T->getNumOperands());
6662 for (Metadata *MD : T->operands())
6663 Ops.push_back(upgradeLoopArgument(MD));
6664
6665 return MDTuple::get(T->getContext(), Ops);
6666}
6667
6669 Triple T(TT);
6670 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6671 // the address space of globals to 1. This does not apply to SPIRV Logical.
6672 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6673 !DL.contains("-G") && !DL.starts_with("G")) {
6674 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6675 }
6676
6677 if (T.isLoongArch64() || T.isRISCV64()) {
6678 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6679 auto I = DL.find("-n64-");
6680 if (I != StringRef::npos)
6681 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6682 return DL.str();
6683 }
6684
6685 // AMDGPU data layout upgrades.
6686 std::string Res = DL.str();
6687 if (T.isAMDGPU()) {
6688 // Define address spaces for constants.
6689 if (!DL.contains("-G") && !DL.starts_with("G"))
6690 Res.append(Res.empty() ? "G1" : "-G1");
6691
6692 // AMDGCN data layout upgrades.
6693 if (T.isAMDGCN()) {
6694
6695 // Add missing non-integral declarations.
6696 // This goes before adding new address spaces to prevent incoherent string
6697 // values.
6698 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6699 Res.append("-ni:7:8:9");
6700 // Update ni:7 to ni:7:8:9.
6701 if (DL.ends_with("ni:7"))
6702 Res.append(":8:9");
6703 if (DL.ends_with("ni:7:8"))
6704 Res.append(":9");
6705
6706 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6707 // resources) An empty data layout has already been upgraded to G1 by now.
6708 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6709 Res.append("-p7:160:256:256:32");
6710 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6711 Res.append("-p8:128:128:128:48");
6712 constexpr StringRef OldP8("-p8:128:128-");
6713 if (DL.contains(OldP8))
6714 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6715 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6716 Res.append("-p9:192:256:256:32");
6717 }
6718
6719 // Upgrade the ELF mangling mode.
6720 if (!DL.contains("m:e"))
6721 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6722
6723 return Res;
6724 }
6725
6726 if (T.isSystemZ() && !DL.empty()) {
6727 // Make sure the stack alignment is present.
6728 if (!DL.contains("-S64"))
6729 return "E-S64" + DL.drop_front(1).str();
6730 return DL.str();
6731 }
6732
6733 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6734 // If the datalayout matches the expected format, add pointer size address
6735 // spaces to the datalayout.
6736 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6737 if (!DL.contains(AddrSpaces)) {
6739 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6740 if (R.match(Res, &Groups))
6741 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6742 }
6743 };
6744
6745 // AArch64 data layout upgrades.
6746 if (T.isAArch64()) {
6747 // Add "-Fn32"
6748 if (!DL.empty() && !DL.contains("-Fn32"))
6749 Res.append("-Fn32");
6750 AddPtr32Ptr64AddrSpaces();
6751 return Res;
6752 }
6753
6754 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6755 T.isWasm()) {
6756 // Mips64 with o32 ABI did not add "-i128:128".
6757 // Add "-i128:128"
6758 std::string I64 = "-i64:64";
6759 std::string I128 = "-i128:128";
6760 if (!StringRef(Res).contains(I128)) {
6761 size_t Pos = Res.find(I64);
6762 if (Pos != size_t(-1))
6763 Res.insert(Pos + I64.size(), I128);
6764 }
6765 }
6766
6767 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6768 size_t Pos = Res.find("-S128");
6769 if (Pos == StringRef::npos)
6770 Pos = Res.size();
6771 Res.insert(Pos, "-f64:32:64");
6772 }
6773
6774 if (!T.isX86())
6775 return Res;
6776
6777 AddPtr32Ptr64AddrSpaces();
6778
6779 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6780 // for i128 operations prior to this being reflected in the data layout, and
6781 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6782 // boundaries, so although this is a breaking change, the upgrade is expected
6783 // to fix more IR than it breaks.
6784 // Intel MCU is an exception and uses 4-byte-alignment.
6785 if (!T.isOSIAMCU()) {
6786 std::string I128 = "-i128:128";
6787 if (StringRef Ref = Res; !Ref.contains(I128)) {
6789 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6790 if (R.match(Res, &Groups))
6791 Res = (Groups[1] + I128 + Groups[3]).str();
6792 }
6793 }
6794
6795 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6796 // Raising the alignment is safe because Clang did not produce f80 values in
6797 // the MSVC environment before this upgrade was added.
6798 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6799 StringRef Ref = Res;
6800 auto I = Ref.find("-f80:32-");
6801 if (I != StringRef::npos)
6802 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6803 }
6804
6805 return Res;
6806}
6807
6808void llvm::UpgradeAttributes(AttrBuilder &B) {
6809 StringRef FramePointer;
6810 Attribute A = B.getAttribute("no-frame-pointer-elim");
6811 if (A.isValid()) {
6812 // The value can be "true" or "false".
6813 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6814 B.removeAttribute("no-frame-pointer-elim");
6815 }
6816 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6817 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6818 if (FramePointer != "all")
6819 FramePointer = "non-leaf";
6820 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6821 }
6822 if (!FramePointer.empty())
6823 B.addAttribute("frame-pointer", FramePointer);
6824
6825 A = B.getAttribute("null-pointer-is-valid");
6826 if (A.isValid()) {
6827 // The value can be "true" or "false".
6828 bool NullPointerIsValid = A.getValueAsString() == "true";
6829 B.removeAttribute("null-pointer-is-valid");
6830 if (NullPointerIsValid)
6831 B.addAttribute(Attribute::NullPointerIsValid);
6832 }
6833
6834 A = B.getAttribute("uniform-work-group-size");
6835 if (A.isValid()) {
6836 StringRef Val = A.getValueAsString();
6837 if (!Val.empty()) {
6838 bool IsTrue = Val == "true";
6839 B.removeAttribute("uniform-work-group-size");
6840 if (IsTrue)
6841 B.addAttribute("uniform-work-group-size");
6842 }
6843 }
6844}
6845
6846void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6847 // clang.arc.attachedcall bundles are now required to have an operand.
6848 // If they don't, it's okay to drop them entirely: when there is an operand,
6849 // the "attachedcall" is meaningful and required, but without an operand,
6850 // it's just a marker NOP. Dropping it merely prevents an optimization.
6851 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6852 return OBD.getTag() == "clang.arc.attachedcall" &&
6853 OBD.inputs().empty();
6854 });
6855}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static void reportFatalUsageErrorWithCI(StringRef reason, CallBase *CI)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
This class stores enough information to efficiently remove some attributes from an existing AttrBuild...
AttributeMask & addAttribute(Attribute::AttrKind Val)
Add an attribute to the mask.
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:105
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:537
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:23
void setApproxFunc(bool B=true)
Definition FMF.h:96
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:873
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:168
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:211
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:246
const Function & getFunction() const
Definition Function.h:166
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:449
size_t arg_size() const
Definition Function.h:901
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:216
Argument * getArg(unsigned i) const
Definition Function.h:886
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:629
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2847
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1080
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1444
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1572
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1450
LLVMContext & getContext() const
Definition Metadata.h:1244
Tracking metadata reference owned by Metadata.
Definition Metadata.h:902
A single uniqued string.
Definition Metadata.h:722
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1529
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:184
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1760
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1856
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:895
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:882
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:730
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:591
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:258
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:140
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:629
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:143
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:844
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:483
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:314
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:290
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:313
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:155
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:147
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:311
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:370
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:201
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:236
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:287
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:257
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:227
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:290
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:289
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:288
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:255
LLVM_ABI void print(raw_ostream &O, bool IsForDebug=false) const
Implement operator<< on Value.
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:393
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:549
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:258
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:709
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:261
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:318
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:399
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > OverloadTys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &OverloadTys)
Gets the overload types of an intrinsic call by matching type contraints specified by the ....
constexpr StringLiteral GridConstant("nvvm.grid_constant")
constexpr StringLiteral MaxNTID("nvvm.maxntid")
constexpr StringLiteral MaxNReg("nvvm.maxnreg")
constexpr StringLiteral MinCTASm("nvvm.minctasm")
constexpr StringLiteral ReqNTID("nvvm.reqntid")
constexpr StringLiteral MaxClusterRank("nvvm.maxclusterrank")
constexpr StringLiteral ClusterDim("nvvm.cluster_dim")
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:709
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:696
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:668
This is an optimization pass for GlobalISel generic memory operations.
@ Offset
Definition DWP.cpp:557
@ Length
Definition DWP.cpp:557
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:328
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:633
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:334
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1752
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:221
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
DenormalMode parseDenormalFPAttribute(StringRef Str)
Returns the denormal mode to use for inputs and outputs.
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1771
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2191
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
@ Default
The result value is uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
Represents the full denormal controls for a function, including the default mode and the f32 specific...
Represent subnormal handling kind for floating point instruction inputs and outputs.
static constexpr DenormalMode getInvalid()
constexpr bool isValid() const
static constexpr DenormalMode getIEEE()
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106