LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
51#include "llvm/Support/Regex.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
67// changed their type from v4f32 to v2i64.
69 Function *&NewFn) {
70 // Check whether this is an old version of the function, which received
71 // v4f32 arguments.
72 Type *Arg0Type = F->getFunctionType()->getParamType(0);
73 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
74 return false;
75
76 // Yes, it's old, replace it with new version.
77 rename(F);
78 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
79 return true;
80}
81
82// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
83// arguments have changed their type from i32 to i8.
85 Function *&NewFn) {
86 // Check that the last argument is an i32.
87 Type *LastArgType = F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
89 if (!LastArgType->isIntegerTy(32))
90 return false;
91
92 // Move this function aside and map down.
93 rename(F);
94 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
95 return true;
96}
97
98// Upgrade the declaration of fp compare intrinsics that change return type
99// from scalar to vXi1 mask.
101 Function *&NewFn) {
102 // Check if the return type is a vector.
103 if (F->getReturnType()->isVectorTy())
104 return false;
105
106 rename(F);
107 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
108 return true;
109}
110
111// Upgrade the declaration of multiply and add bytes intrinsics whose input
112// arguments' types have changed from vectors of i32 to vectors of i8
114 Function *&NewFn) {
115 // check if input argument type is a vector of i8
116 Type *Arg1Type = F->getFunctionType()->getParamType(1);
117 Type *Arg2Type = F->getFunctionType()->getParamType(2);
118 if (Arg1Type->isVectorTy() &&
119 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
120 Arg2Type->isVectorTy() &&
121 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127}
128
129// Upgrade the declaration of multipy and add words intrinsics whose input
130// arguments' types have changed to vectors of i32 to vectors of i16
132 Function *&NewFn) {
133 // check if input argument type is a vector of i16
134 Type *Arg1Type = F->getFunctionType()->getParamType(1);
135 Type *Arg2Type = F->getFunctionType()->getParamType(2);
136 if (Arg1Type->isVectorTy() &&
137 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
138 Arg2Type->isVectorTy() &&
139 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
140 return false;
141
142 rename(F);
143 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
144 return true;
145}
146
148 Function *&NewFn) {
149 if (F->getReturnType()->getScalarType()->isBFloatTy())
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 // All of the intrinsics matches below should be marked with which llvm
169 // version started autoupgrading them. At some point in the future we would
170 // like to use this information to remove upgrade code for some older
171 // intrinsics. It is currently undecided how we will determine that future
172 // point.
173 if (Name.consume_front("avx."))
174 return (Name.starts_with("blend.p") || // Added in 3.7
175 Name == "cvt.ps2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.pd.256" || // Added in 3.9
177 Name == "cvtdq2.ps.256" || // Added in 7.0
178 Name.starts_with("movnt.") || // Added in 3.2
179 Name.starts_with("sqrt.p") || // Added in 7.0
180 Name.starts_with("storeu.") || // Added in 3.9
181 Name.starts_with("vbroadcast.s") || // Added in 3.5
182 Name.starts_with("vbroadcastf128") || // Added in 4.0
183 Name.starts_with("vextractf128.") || // Added in 3.7
184 Name.starts_with("vinsertf128.") || // Added in 3.7
185 Name.starts_with("vperm2f128.") || // Added in 6.0
186 Name.starts_with("vpermil.")); // Added in 3.1
187
188 if (Name.consume_front("avx2."))
189 return (Name == "movntdqa" || // Added in 5.0
190 Name.starts_with("pabs.") || // Added in 6.0
191 Name.starts_with("padds.") || // Added in 8.0
192 Name.starts_with("paddus.") || // Added in 8.0
193 Name.starts_with("pblendd.") || // Added in 3.7
194 Name == "pblendw" || // Added in 3.7
195 Name.starts_with("pbroadcast") || // Added in 3.8
196 Name.starts_with("pcmpeq.") || // Added in 3.1
197 Name.starts_with("pcmpgt.") || // Added in 3.1
198 Name.starts_with("pmax") || // Added in 3.9
199 Name.starts_with("pmin") || // Added in 3.9
200 Name.starts_with("pmovsx") || // Added in 3.9
201 Name.starts_with("pmovzx") || // Added in 3.9
202 Name == "pmul.dq" || // Added in 7.0
203 Name == "pmulu.dq" || // Added in 7.0
204 Name.starts_with("psll.dq") || // Added in 3.7
205 Name.starts_with("psrl.dq") || // Added in 3.7
206 Name.starts_with("psubs.") || // Added in 8.0
207 Name.starts_with("psubus.") || // Added in 8.0
208 Name.starts_with("vbroadcast") || // Added in 3.8
209 Name == "vbroadcasti128" || // Added in 3.7
210 Name == "vextracti128" || // Added in 3.7
211 Name == "vinserti128" || // Added in 3.7
212 Name == "vperm2i128"); // Added in 6.0
213
214 if (Name.consume_front("avx512.")) {
215 if (Name.consume_front("mask."))
216 // 'avx512.mask.*'
217 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
218 Name.starts_with("and.") || // Added in 3.9
219 Name.starts_with("andn.") || // Added in 3.9
220 Name.starts_with("broadcast.s") || // Added in 3.9
221 Name.starts_with("broadcastf32x4.") || // Added in 6.0
222 Name.starts_with("broadcastf32x8.") || // Added in 6.0
223 Name.starts_with("broadcastf64x2.") || // Added in 6.0
224 Name.starts_with("broadcastf64x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x4.") || // Added in 6.0
226 Name.starts_with("broadcasti32x8.") || // Added in 6.0
227 Name.starts_with("broadcasti64x2.") || // Added in 6.0
228 Name.starts_with("broadcasti64x4.") || // Added in 6.0
229 Name.starts_with("cmp.b") || // Added in 5.0
230 Name.starts_with("cmp.d") || // Added in 5.0
231 Name.starts_with("cmp.q") || // Added in 5.0
232 Name.starts_with("cmp.w") || // Added in 5.0
233 Name.starts_with("compress.b") || // Added in 9.0
234 Name.starts_with("compress.d") || // Added in 9.0
235 Name.starts_with("compress.p") || // Added in 9.0
236 Name.starts_with("compress.q") || // Added in 9.0
237 Name.starts_with("compress.store.") || // Added in 7.0
238 Name.starts_with("compress.w") || // Added in 9.0
239 Name.starts_with("conflict.") || // Added in 9.0
240 Name.starts_with("cvtdq2pd.") || // Added in 4.0
241 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
242 Name == "cvtpd2dq.256" || // Added in 7.0
243 Name == "cvtpd2ps.256" || // Added in 7.0
244 Name == "cvtps2pd.128" || // Added in 7.0
245 Name == "cvtps2pd.256" || // Added in 7.0
246 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
247 Name == "cvtqq2ps.256" || // Added in 9.0
248 Name == "cvtqq2ps.512" || // Added in 9.0
249 Name == "cvttpd2dq.256" || // Added in 7.0
250 Name == "cvttps2dq.128" || // Added in 7.0
251 Name == "cvttps2dq.256" || // Added in 7.0
252 Name.starts_with("cvtudq2pd.") || // Added in 4.0
253 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
254 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name == "cvtuqq2ps.256" || // Added in 9.0
256 Name == "cvtuqq2ps.512" || // Added in 9.0
257 Name.starts_with("dbpsadbw.") || // Added in 7.0
258 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
259 Name.starts_with("expand.b") || // Added in 9.0
260 Name.starts_with("expand.d") || // Added in 9.0
261 Name.starts_with("expand.load.") || // Added in 7.0
262 Name.starts_with("expand.p") || // Added in 9.0
263 Name.starts_with("expand.q") || // Added in 9.0
264 Name.starts_with("expand.w") || // Added in 9.0
265 Name.starts_with("fpclass.p") || // Added in 7.0
266 Name.starts_with("insert") || // Added in 4.0
267 Name.starts_with("load.") || // Added in 3.9
268 Name.starts_with("loadu.") || // Added in 3.9
269 Name.starts_with("lzcnt.") || // Added in 5.0
270 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
272 Name.starts_with("movddup") || // Added in 3.9
273 Name.starts_with("move.s") || // Added in 4.0
274 Name.starts_with("movshdup") || // Added in 3.9
275 Name.starts_with("movsldup") || // Added in 3.9
276 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
277 Name.starts_with("or.") || // Added in 3.9
278 Name.starts_with("pabs.") || // Added in 6.0
279 Name.starts_with("packssdw.") || // Added in 5.0
280 Name.starts_with("packsswb.") || // Added in 5.0
281 Name.starts_with("packusdw.") || // Added in 5.0
282 Name.starts_with("packuswb.") || // Added in 5.0
283 Name.starts_with("padd.") || // Added in 4.0
284 Name.starts_with("padds.") || // Added in 8.0
285 Name.starts_with("paddus.") || // Added in 8.0
286 Name.starts_with("palignr.") || // Added in 3.9
287 Name.starts_with("pand.") || // Added in 3.9
288 Name.starts_with("pandn.") || // Added in 3.9
289 Name.starts_with("pavg") || // Added in 6.0
290 Name.starts_with("pbroadcast") || // Added in 6.0
291 Name.starts_with("pcmpeq.") || // Added in 3.9
292 Name.starts_with("pcmpgt.") || // Added in 3.9
293 Name.starts_with("perm.df.") || // Added in 3.9
294 Name.starts_with("perm.di.") || // Added in 3.9
295 Name.starts_with("permvar.") || // Added in 7.0
296 Name.starts_with("pmaddubs.w.") || // Added in 7.0
297 Name.starts_with("pmaddw.d.") || // Added in 7.0
298 Name.starts_with("pmax") || // Added in 4.0
299 Name.starts_with("pmin") || // Added in 4.0
300 Name == "pmov.qd.256" || // Added in 9.0
301 Name == "pmov.qd.512" || // Added in 9.0
302 Name == "pmov.wb.256" || // Added in 9.0
303 Name == "pmov.wb.512" || // Added in 9.0
304 Name.starts_with("pmovsx") || // Added in 4.0
305 Name.starts_with("pmovzx") || // Added in 4.0
306 Name.starts_with("pmul.dq.") || // Added in 4.0
307 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
308 Name.starts_with("pmulh.w.") || // Added in 7.0
309 Name.starts_with("pmulhu.w.") || // Added in 7.0
310 Name.starts_with("pmull.") || // Added in 4.0
311 Name.starts_with("pmultishift.qb.") || // Added in 8.0
312 Name.starts_with("pmulu.dq.") || // Added in 4.0
313 Name.starts_with("por.") || // Added in 3.9
314 Name.starts_with("prol.") || // Added in 8.0
315 Name.starts_with("prolv.") || // Added in 8.0
316 Name.starts_with("pror.") || // Added in 8.0
317 Name.starts_with("prorv.") || // Added in 8.0
318 Name.starts_with("pshuf.b.") || // Added in 4.0
319 Name.starts_with("pshuf.d.") || // Added in 3.9
320 Name.starts_with("pshufh.w.") || // Added in 3.9
321 Name.starts_with("pshufl.w.") || // Added in 3.9
322 Name.starts_with("psll.d") || // Added in 4.0
323 Name.starts_with("psll.q") || // Added in 4.0
324 Name.starts_with("psll.w") || // Added in 4.0
325 Name.starts_with("pslli") || // Added in 4.0
326 Name.starts_with("psllv") || // Added in 4.0
327 Name.starts_with("psra.d") || // Added in 4.0
328 Name.starts_with("psra.q") || // Added in 4.0
329 Name.starts_with("psra.w") || // Added in 4.0
330 Name.starts_with("psrai") || // Added in 4.0
331 Name.starts_with("psrav") || // Added in 4.0
332 Name.starts_with("psrl.d") || // Added in 4.0
333 Name.starts_with("psrl.q") || // Added in 4.0
334 Name.starts_with("psrl.w") || // Added in 4.0
335 Name.starts_with("psrli") || // Added in 4.0
336 Name.starts_with("psrlv") || // Added in 4.0
337 Name.starts_with("psub.") || // Added in 4.0
338 Name.starts_with("psubs.") || // Added in 8.0
339 Name.starts_with("psubus.") || // Added in 8.0
340 Name.starts_with("pternlog.") || // Added in 7.0
341 Name.starts_with("punpckh") || // Added in 3.9
342 Name.starts_with("punpckl") || // Added in 3.9
343 Name.starts_with("pxor.") || // Added in 3.9
344 Name.starts_with("shuf.f") || // Added in 6.0
345 Name.starts_with("shuf.i") || // Added in 6.0
346 Name.starts_with("shuf.p") || // Added in 4.0
347 Name.starts_with("sqrt.p") || // Added in 7.0
348 Name.starts_with("store.b.") || // Added in 3.9
349 Name.starts_with("store.d.") || // Added in 3.9
350 Name.starts_with("store.p") || // Added in 3.9
351 Name.starts_with("store.q.") || // Added in 3.9
352 Name.starts_with("store.w.") || // Added in 3.9
353 Name == "store.ss" || // Added in 7.0
354 Name.starts_with("storeu.") || // Added in 3.9
355 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
356 Name.starts_with("ucmp.") || // Added in 5.0
357 Name.starts_with("unpckh.") || // Added in 3.9
358 Name.starts_with("unpckl.") || // Added in 3.9
359 Name.starts_with("valign.") || // Added in 4.0
360 Name == "vcvtph2ps.128" || // Added in 11.0
361 Name == "vcvtph2ps.256" || // Added in 11.0
362 Name.starts_with("vextract") || // Added in 4.0
363 Name.starts_with("vfmadd.") || // Added in 7.0
364 Name.starts_with("vfmaddsub.") || // Added in 7.0
365 Name.starts_with("vfnmadd.") || // Added in 7.0
366 Name.starts_with("vfnmsub.") || // Added in 7.0
367 Name.starts_with("vpdpbusd.") || // Added in 7.0
368 Name.starts_with("vpdpbusds.") || // Added in 7.0
369 Name.starts_with("vpdpwssd.") || // Added in 7.0
370 Name.starts_with("vpdpwssds.") || // Added in 7.0
371 Name.starts_with("vpermi2var.") || // Added in 7.0
372 Name.starts_with("vpermil.p") || // Added in 3.9
373 Name.starts_with("vpermilvar.") || // Added in 4.0
374 Name.starts_with("vpermt2var.") || // Added in 7.0
375 Name.starts_with("vpmadd52") || // Added in 7.0
376 Name.starts_with("vpshld.") || // Added in 7.0
377 Name.starts_with("vpshldv.") || // Added in 8.0
378 Name.starts_with("vpshrd.") || // Added in 7.0
379 Name.starts_with("vpshrdv.") || // Added in 8.0
380 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
381 Name.starts_with("xor.")); // Added in 3.9
382
383 if (Name.consume_front("mask3."))
384 // 'avx512.mask3.*'
385 return (Name.starts_with("vfmadd.") || // Added in 7.0
386 Name.starts_with("vfmaddsub.") || // Added in 7.0
387 Name.starts_with("vfmsub.") || // Added in 7.0
388 Name.starts_with("vfmsubadd.") || // Added in 7.0
389 Name.starts_with("vfnmsub.")); // Added in 7.0
390
391 if (Name.consume_front("maskz."))
392 // 'avx512.maskz.*'
393 return (Name.starts_with("pternlog.") || // Added in 7.0
394 Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmaddsub.") || // Added in 7.0
396 Name.starts_with("vpdpbusd.") || // Added in 7.0
397 Name.starts_with("vpdpbusds.") || // Added in 7.0
398 Name.starts_with("vpdpwssd.") || // Added in 7.0
399 Name.starts_with("vpdpwssds.") || // Added in 7.0
400 Name.starts_with("vpermt2var.") || // Added in 7.0
401 Name.starts_with("vpmadd52") || // Added in 7.0
402 Name.starts_with("vpshldv.") || // Added in 8.0
403 Name.starts_with("vpshrdv.")); // Added in 8.0
404
405 // 'avx512.*'
406 return (Name == "movntdqa" || // Added in 5.0
407 Name == "pmul.dq.512" || // Added in 7.0
408 Name == "pmulu.dq.512" || // Added in 7.0
409 Name.starts_with("broadcastm") || // Added in 6.0
410 Name.starts_with("cmp.p") || // Added in 12.0
411 Name.starts_with("cvtb2mask.") || // Added in 7.0
412 Name.starts_with("cvtd2mask.") || // Added in 7.0
413 Name.starts_with("cvtmask2") || // Added in 5.0
414 Name.starts_with("cvtq2mask.") || // Added in 7.0
415 Name == "cvtusi2sd" || // Added in 7.0
416 Name.starts_with("cvtw2mask.") || // Added in 7.0
417 Name == "kand.w" || // Added in 7.0
418 Name == "kandn.w" || // Added in 7.0
419 Name == "knot.w" || // Added in 7.0
420 Name == "kor.w" || // Added in 7.0
421 Name == "kortestc.w" || // Added in 7.0
422 Name == "kortestz.w" || // Added in 7.0
423 Name.starts_with("kunpck") || // added in 6.0
424 Name == "kxnor.w" || // Added in 7.0
425 Name == "kxor.w" || // Added in 7.0
426 Name.starts_with("padds.") || // Added in 8.0
427 Name.starts_with("pbroadcast") || // Added in 3.9
428 Name.starts_with("prol") || // Added in 8.0
429 Name.starts_with("pror") || // Added in 8.0
430 Name.starts_with("psll.dq") || // Added in 3.9
431 Name.starts_with("psrl.dq") || // Added in 3.9
432 Name.starts_with("psubs.") || // Added in 8.0
433 Name.starts_with("ptestm") || // Added in 6.0
434 Name.starts_with("ptestnm") || // Added in 6.0
435 Name.starts_with("storent.") || // Added in 3.9
436 Name.starts_with("vbroadcast.s") || // Added in 7.0
437 Name.starts_with("vpshld.") || // Added in 8.0
438 Name.starts_with("vpshrd.")); // Added in 8.0
439 }
440
441 if (Name.consume_front("fma."))
442 return (Name.starts_with("vfmadd.") || // Added in 7.0
443 Name.starts_with("vfmsub.") || // Added in 7.0
444 Name.starts_with("vfmsubadd.") || // Added in 7.0
445 Name.starts_with("vfnmadd.") || // Added in 7.0
446 Name.starts_with("vfnmsub.")); // Added in 7.0
447
448 if (Name.consume_front("fma4."))
449 return Name.starts_with("vfmadd.s"); // Added in 7.0
450
451 if (Name.consume_front("sse."))
452 return (Name == "add.ss" || // Added in 4.0
453 Name == "cvtsi2ss" || // Added in 7.0
454 Name == "cvtsi642ss" || // Added in 7.0
455 Name == "div.ss" || // Added in 4.0
456 Name == "mul.ss" || // Added in 4.0
457 Name.starts_with("sqrt.p") || // Added in 7.0
458 Name == "sqrt.ss" || // Added in 7.0
459 Name.starts_with("storeu.") || // Added in 3.9
460 Name == "sub.ss"); // Added in 4.0
461
462 if (Name.consume_front("sse2."))
463 return (Name == "add.sd" || // Added in 4.0
464 Name == "cvtdq2pd" || // Added in 3.9
465 Name == "cvtdq2ps" || // Added in 7.0
466 Name == "cvtps2pd" || // Added in 3.9
467 Name == "cvtsi2sd" || // Added in 7.0
468 Name == "cvtsi642sd" || // Added in 7.0
469 Name == "cvtss2sd" || // Added in 7.0
470 Name == "div.sd" || // Added in 4.0
471 Name == "mul.sd" || // Added in 4.0
472 Name.starts_with("padds.") || // Added in 8.0
473 Name.starts_with("paddus.") || // Added in 8.0
474 Name.starts_with("pcmpeq.") || // Added in 3.1
475 Name.starts_with("pcmpgt.") || // Added in 3.1
476 Name == "pmaxs.w" || // Added in 3.9
477 Name == "pmaxu.b" || // Added in 3.9
478 Name == "pmins.w" || // Added in 3.9
479 Name == "pminu.b" || // Added in 3.9
480 Name == "pmulu.dq" || // Added in 7.0
481 Name.starts_with("pshuf") || // Added in 3.9
482 Name.starts_with("psll.dq") || // Added in 3.7
483 Name.starts_with("psrl.dq") || // Added in 3.7
484 Name.starts_with("psubs.") || // Added in 8.0
485 Name.starts_with("psubus.") || // Added in 8.0
486 Name.starts_with("sqrt.p") || // Added in 7.0
487 Name == "sqrt.sd" || // Added in 7.0
488 Name == "storel.dq" || // Added in 3.9
489 Name.starts_with("storeu.") || // Added in 3.9
490 Name == "sub.sd"); // Added in 4.0
491
492 if (Name.consume_front("sse41."))
493 return (Name.starts_with("blendp") || // Added in 3.7
494 Name == "movntdqa" || // Added in 5.0
495 Name == "pblendw" || // Added in 3.7
496 Name == "pmaxsb" || // Added in 3.9
497 Name == "pmaxsd" || // Added in 3.9
498 Name == "pmaxud" || // Added in 3.9
499 Name == "pmaxuw" || // Added in 3.9
500 Name == "pminsb" || // Added in 3.9
501 Name == "pminsd" || // Added in 3.9
502 Name == "pminud" || // Added in 3.9
503 Name == "pminuw" || // Added in 3.9
504 Name.starts_with("pmovsx") || // Added in 3.8
505 Name.starts_with("pmovzx") || // Added in 3.9
506 Name == "pmuldq"); // Added in 7.0
507
508 if (Name.consume_front("sse42."))
509 return Name == "crc32.64.8"; // Added in 3.4
510
511 if (Name.consume_front("sse4a."))
512 return Name.starts_with("movnt."); // Added in 3.9
513
514 if (Name.consume_front("ssse3."))
515 return (Name == "pabs.b.128" || // Added in 6.0
516 Name == "pabs.d.128" || // Added in 6.0
517 Name == "pabs.w.128"); // Added in 6.0
518
519 if (Name.consume_front("xop."))
520 return (Name == "vpcmov" || // Added in 3.8
521 Name == "vpcmov.256" || // Added in 5.0
522 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
523 Name.starts_with("vprot")); // Added in 8.0
524
525 return (Name == "addcarry.u32" || // Added in 8.0
526 Name == "addcarry.u64" || // Added in 8.0
527 Name == "addcarryx.u32" || // Added in 8.0
528 Name == "addcarryx.u64" || // Added in 8.0
529 Name == "subborrow.u32" || // Added in 8.0
530 Name == "subborrow.u64" || // Added in 8.0
531 Name.starts_with("vcvtph2ps.")); // Added in 11.0
532}
533
535 Function *&NewFn) {
536 // Only handle intrinsics that start with "x86.".
537 if (!Name.consume_front("x86."))
538 return false;
539
540 if (shouldUpgradeX86Intrinsic(F, Name)) {
541 NewFn = nullptr;
542 return true;
543 }
544
545 if (Name == "rdtscp") { // Added in 8.0
546 // If this intrinsic has 0 operands, it's the new version.
547 if (F->getFunctionType()->getNumParams() == 0)
548 return false;
549
550 rename(F);
551 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
552 Intrinsic::x86_rdtscp);
553 return true;
554 }
555
557
558 // SSE4.1 ptest functions may have an old signature.
559 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
561 .Case("c", Intrinsic::x86_sse41_ptestc)
562 .Case("z", Intrinsic::x86_sse41_ptestz)
563 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
566 return upgradePTESTIntrinsic(F, ID, NewFn);
567
568 return false;
569 }
570
571 // Several blend and other instructions with masks used the wrong number of
572 // bits.
573
574 // Added in 3.6
576 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
584 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
585
586 if (Name.consume_front("avx512.")) {
587 if (Name.consume_front("mask.cmp.")) {
588 // Added in 7.0
590 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 return upgradeX86MaskedFPCompare(F, ID, NewFn);
599 } else if (Name.starts_with("vpdpbusd.") ||
600 Name.starts_with("vpdpbusds.")) {
601 // Added in 21.1
603 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
612 } else if (Name.starts_with("vpdpwssd.") ||
613 Name.starts_with("vpdpwssds.")) {
614 // Added in 21.1
616 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
624 return upgradeX86MultiplyAddWords(F, ID, NewFn);
625 }
626 return false; // No other 'x86.avx512.*'.
627 }
628
629 if (Name.consume_front("avx2.")) {
630 if (Name.consume_front("vpdpb")) {
631 // Added in 21.1
633 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
648 } else if (Name.consume_front("vpdpw")) {
649 // Added in 21.1
651 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
665 return upgradeX86MultiplyAddWords(F, ID, NewFn);
666 }
667 return false; // No other 'x86.avx2.*'
668 }
669
670 if (Name.consume_front("avx10.")) {
671 if (Name.consume_front("vpdpb")) {
672 // Added in 21.1
674 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
683 } else if (Name.consume_front("vpdpw")) {
685 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
693 return upgradeX86MultiplyAddWords(F, ID, NewFn);
694 }
695 return false; // No other 'x86.avx10.*'
696 }
697
698 if (Name.consume_front("avx512bf16.")) {
699 // Added in 9.0
701 .Case("cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .Case("cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .Case("cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .Case("mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .Case("cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .Case("cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
715 return upgradeX86BF16Intrinsic(F, ID, NewFn);
716
717 // Added in 9.0
719 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
724 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
725 return false; // No other 'x86.avx512bf16.*'.
726 }
727
728 if (Name.consume_front("xop.")) {
730 if (Name.starts_with("vpermil2")) { // Added in 3.9
731 // Upgrade any XOP PERMIL2 index operand still using a float/double
732 // vector.
733 auto Idx = F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 else
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
746 } else if (F->arg_size() == 2)
747 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
749 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
752
754 rename(F);
755 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
756 return true;
757 }
758 return false; // No other 'x86.xop.*'
759 }
760
761 if (Name == "seh.recoverfp") {
762 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
763 Intrinsic::eh_recoverfp);
764 return true;
765 }
766
767 return false;
768}
769
770// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
771// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
773 StringRef Name,
774 Function *&NewFn) {
775 if (Name.starts_with("rbit")) {
776 // '(arm|aarch64).rbit'.
778 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
779 return true;
780 }
781
782 if (Name == "thread.pointer") {
783 // '(arm|aarch64).thread.pointer'.
785 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
786 return true;
787 }
788
789 bool Neon = Name.consume_front("neon.");
790 if (Neon) {
791 // '(arm|aarch64).neon.*'.
792 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
793 // v16i8 respectively.
794 if (Name.consume_front("bfdot.")) {
795 // (arm|aarch64).neon.bfdot.*'.
798 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
799 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
800 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
803 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
806 LLVMContext &Ctx = F->getParent()->getContext();
807 std::array<Type *, 2> Tys{
808 {F->getReturnType(),
809 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
811 return true;
812 }
813 return false; // No other '(arm|aarch64).neon.bfdot.*'.
814 }
815
816 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
817 // anymore and accept v8bf16 instead of v16i8.
818 if (Name.consume_front("bfm")) {
819 // (arm|aarch64).neon.bfm*'.
820 if (Name.consume_back(".v4f32.v16i8")) {
821 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
824 .Case("mla",
825 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
827 .Case("lalb",
828 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
829 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
830 .Case("lalt",
831 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
832 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
835 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
836 return true;
837 }
838 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
839 }
840 return false; // No other '(arm|aarch64).neon.bfm*.
841 }
842 // Continue on to Aarch64 Neon or Arm Neon.
843 }
844 // Continue on to Arm or Aarch64.
845
846 if (IsArm) {
847 // 'arm.*'.
848 if (Neon) {
849 // 'arm.neon.*'.
851 .StartsWith("vclz.", Intrinsic::ctlz)
852 .StartsWith("vcnt.", Intrinsic::ctpop)
853 .StartsWith("vqadds.", Intrinsic::sadd_sat)
854 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
855 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
856 .StartsWith("vqsubu.", Intrinsic::usub_sat)
857 .StartsWith("vrinta.", Intrinsic::round)
858 .StartsWith("vrintn.", Intrinsic::roundeven)
859 .StartsWith("vrintm.", Intrinsic::floor)
860 .StartsWith("vrintp.", Intrinsic::ceil)
861 .StartsWith("vrintx.", Intrinsic::rint)
862 .StartsWith("vrintz.", Intrinsic::trunc)
865 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
866 F->arg_begin()->getType());
867 return true;
868 }
869
870 if (Name.consume_front("vst")) {
871 // 'arm.neon.vst*'.
872 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
874 if (vstRegex.match(Name, &Groups)) {
875 static const Intrinsic::ID StoreInts[] = {
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
878
879 static const Intrinsic::ID StoreLaneInts[] = {
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
882
883 auto fArgs = F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
885 if (Groups[1].size() == 1)
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
888 else
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
891 return true;
892 }
893 return false; // No other 'arm.neon.vst*'.
894 }
895
896 return false; // No other 'arm.neon.*'.
897 }
898
899 if (Name.consume_front("mve.")) {
900 // 'arm.mve.*'.
901 if (Name == "vctp64") {
902 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
903 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
904 // the function and deal with it below in UpgradeIntrinsicCall.
905 rename(F);
906 return true;
907 }
908 return false; // Not 'arm.mve.vctp64'.
909 }
910
911 if (Name.starts_with("vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
914 return true;
915 }
916
917 // These too are changed to accept a v2i1 instead of the old v4i1.
918 if (Name.consume_back(".v4i1")) {
919 // 'arm.mve.*.v4i1'.
920 if (Name.consume_back(".predicated.v2i64.v4i32"))
921 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
922 return Name == "mull.int" || Name == "vqdmull";
923
924 if (Name.consume_back(".v2i64")) {
925 // 'arm.mve.*.v2i64.v4i1'
926 bool IsGather = Name.consume_front("vldr.gather.");
927 if (IsGather || Name.consume_front("vstr.scatter.")) {
928 if (Name.consume_front("base.")) {
929 // Optional 'wb.' prefix.
930 Name.consume_front("wb.");
931 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
932 // predicated.v2i64.v2i64.v4i1'.
933 return Name == "predicated.v2i64";
934 }
935
936 if (Name.consume_front("offset.predicated."))
937 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
938 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
939
940 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
941 return false;
942 }
943
944 return false; // No other 'arm.mve.*.v2i64.v4i1'.
945 }
946 return false; // No other 'arm.mve.*.v4i1'.
947 }
948 return false; // No other 'arm.mve.*'.
949 }
950
951 if (Name.consume_front("cde.vcx")) {
952 // 'arm.cde.vcx*'.
953 if (Name.consume_back(".predicated.v2i64.v4i1"))
954 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
955 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
956 Name == "3q" || Name == "3qa";
957
958 return false; // No other 'arm.cde.vcx*'.
959 }
960 } else {
961 // 'aarch64.*'.
962 if (Neon) {
963 // 'aarch64.neon.*'.
965 .StartsWith("frintn", Intrinsic::roundeven)
966 .StartsWith("rbit", Intrinsic::bitreverse)
969 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
970 F->arg_begin()->getType());
971 return true;
972 }
973
974 if (Name.starts_with("addp")) {
975 // 'aarch64.neon.addp*'.
976 if (F->arg_size() != 2)
977 return false; // Invalid IR.
978 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
982 return true;
983 }
984 }
985
986 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
987 if (Name.starts_with("bfcvt")) {
988 NewFn = nullptr;
989 return true;
990 }
991
992 return false; // No other 'aarch64.neon.*'.
993 }
994 if (Name.consume_front("sve.")) {
995 // 'aarch64.sve.*'.
996 if (Name.consume_front("bf")) {
997 if (Name.consume_back(".lane")) {
998 // 'aarch64.sve.bf*.lane'.
1001 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1006 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1007 return true;
1008 }
1009 return false; // No other 'aarch64.sve.bf*.lane'.
1010 }
1011 return false; // No other 'aarch64.sve.bf*'.
1012 }
1013
1014 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1015 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1016 NewFn = nullptr;
1017 return true;
1018 }
1019
1020 if (Name.consume_front("addqv")) {
1021 // 'aarch64.sve.addqv'.
1022 if (!F->getReturnType()->isFPOrFPVectorTy())
1023 return false;
1024
1025 auto Args = F->getFunctionType()->params();
1026 Type *Tys[] = {F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1029 return true;
1030 }
1031
1032 if (Name.consume_front("ld")) {
1033 // 'aarch64.sve.ld*'.
1034 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.match(Name)) {
1036 Type *ScalarTy =
1037 cast<VectorType>(F->getReturnType())->getElementType();
1038 ElementCount EC =
1039 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1040 Type *Ty = VectorType::get(ScalarTy, EC);
1041 static const Intrinsic::ID LoadIDs[] = {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1045 };
1046 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1047 LoadIDs[Name[0] - '2'], Ty);
1048 return true;
1049 }
1050 return false; // No other 'aarch64.sve.ld*'.
1051 }
1052
1053 if (Name.consume_front("tuple.")) {
1054 // 'aarch64.sve.tuple.*'.
1055 if (Name.starts_with("get")) {
1056 // 'aarch64.sve.tuple.get*'.
1057 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1060 return true;
1061 }
1062
1063 if (Name.starts_with("set")) {
1064 // 'aarch64.sve.tuple.set*'.
1065 auto Args = F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1069 return true;
1070 }
1071
1072 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.match(Name)) {
1074 // 'aarch64.sve.tuple.create*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.tuple.*'.
1082 }
1083
1084 if (Name.starts_with("rev.nxv")) {
1085 // 'aarch64.sve.rev.<Ty>'
1087 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1088 return true;
1089 }
1090
1091 return false; // No other 'aarch64.sve.*'.
1092 }
1093 }
1094 return false; // No other 'arm.*', 'aarch64.*'.
1095}
1096
1098 StringRef Name) {
1099 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1102 .Case("im2col.3d",
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 .Case("im2col.4d",
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 .Case("im2col.5d",
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1114
1116 return ID;
1117
1118 // These intrinsics may need upgrade for two reasons:
1119 // (1) When the address-space of the first argument is shared[AS=3]
1120 // (and we upgrade it to use shared_cluster address-space[AS=7])
1121 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1123 return ID;
1124
1125 // (2) When there are only two boolean flag arguments at the end:
1126 //
1127 // The last three parameters of the older version of these
1128 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1129 //
1130 // The newer version reads as:
1131 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1132 //
1133 // So, when the type of the [N-3]rd argument is "not i1", then
1134 // it is the older version and we need to upgrade.
1135 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1137 if (!ArgType->isIntegerTy(1))
1138 return ID;
1139 }
1140
1142}
1143
1145 StringRef Name) {
1146 if (Name.consume_front("mapa.shared.cluster"))
1147 if (F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1150
1151 if (Name.consume_front("cp.async.bulk.")) {
1154 .Case("global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .Case("shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1159
1161 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1163 return ID;
1164 }
1165
1167}
1168
1170 if (Name.consume_front("fma.rn."))
1171 return StringSwitch<Intrinsic::ID>(Name)
1172 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1175 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1177
1178 if (Name.consume_front("fmax."))
1179 return StringSwitch<Intrinsic::ID>(Name)
1180 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1181 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1182 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1183 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1184 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1185 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1186 .Case("ftz.nan.xorsign.abs.bf16",
1187 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1188 .Case("ftz.nan.xorsign.abs.bf16x2",
1189 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1190 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1191 .Case("ftz.xorsign.abs.bf16x2",
1192 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1193 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1194 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1195 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1196 .Case("nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1198 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1199 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1201
1202 if (Name.consume_front("fmin."))
1203 return StringSwitch<Intrinsic::ID>(Name)
1204 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1205 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1206 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1207 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1208 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1209 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1210 .Case("ftz.nan.xorsign.abs.bf16",
1211 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1212 .Case("ftz.nan.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1214 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1215 .Case("ftz.xorsign.abs.bf16x2",
1216 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1217 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1218 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1219 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1220 .Case("nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1222 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1223 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1225
1226 if (Name.consume_front("neg."))
1227 return StringSwitch<Intrinsic::ID>(Name)
1228 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1229 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1231
1233}
1234
1236 return Name.consume_front("local") || Name.consume_front("shared") ||
1237 Name.consume_front("global") || Name.consume_front("constant") ||
1238 Name.consume_front("param");
1239}
1240
1242 bool CanUpgradeDebugIntrinsicsToRecords) {
1243 assert(F && "Illegal to upgrade a non-existent Function.");
1244
1245 StringRef Name = F->getName();
1246
1247 // Quickly eliminate it, if it's not a candidate.
1248 if (!Name.consume_front("llvm.") || Name.empty())
1249 return false;
1250
1251 switch (Name[0]) {
1252 default: break;
1253 case 'a': {
1254 bool IsArm = Name.consume_front("arm.");
1255 if (IsArm || Name.consume_front("aarch64.")) {
1256 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1257 return true;
1258 break;
1259 }
1260
1261 if (Name.consume_front("amdgcn.")) {
1262 if (Name == "alignbit") {
1263 // Target specific intrinsic became redundant
1265 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1266 return true;
1267 }
1268
1269 if (Name.consume_front("atomic.")) {
1270 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1271 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1272 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1273 // and usub_sat so there's no new declaration.
1274 NewFn = nullptr;
1275 return true;
1276 }
1277 break; // No other 'amdgcn.atomic.*'
1278 }
1279
1280 // Legacy wmma iu intrinsics without the optional clamp operand.
1281 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1282 F->arg_size() == 7) {
1283 NewFn = nullptr;
1284 return true;
1285 }
1286 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1287 F->arg_size() == 8) {
1288 NewFn = nullptr;
1289 return true;
1290 }
1291
1292 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1293 Name.consume_front("flat.atomic.")) {
1294 if (Name.starts_with("fadd") ||
1295 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1296 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1297 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1298 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1299 // declaration.
1300 NewFn = nullptr;
1301 return true;
1302 }
1303 }
1304
1305 if (Name.starts_with("ldexp.")) {
1306 // Target specific intrinsic became redundant
1308 F->getParent(), Intrinsic::ldexp,
1309 {F->getReturnType(), F->getArg(1)->getType()});
1310 return true;
1311 }
1312 break; // No other 'amdgcn.*'
1313 }
1314
1315 break;
1316 }
1317 case 'c': {
1318 if (F->arg_size() == 1) {
1320 .StartsWith("ctlz.", Intrinsic::ctlz)
1321 .StartsWith("cttz.", Intrinsic::cttz)
1324 rename(F);
1325 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1326 F->arg_begin()->getType());
1327 return true;
1328 }
1329 }
1330
1331 if (F->arg_size() == 2 && Name == "coro.end") {
1332 rename(F);
1333 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1334 Intrinsic::coro_end);
1335 return true;
1336 }
1337
1338 break;
1339 }
1340 case 'd':
1341 if (Name.consume_front("dbg.")) {
1342 // Mark debug intrinsics for upgrade to new debug format.
1343 if (CanUpgradeDebugIntrinsicsToRecords) {
1344 if (Name == "addr" || Name == "value" || Name == "assign" ||
1345 Name == "declare" || Name == "label") {
1346 // There's no function to replace these with.
1347 NewFn = nullptr;
1348 // But we do want these to get upgraded.
1349 return true;
1350 }
1351 }
1352 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1353 // converted to DbgVariableRecords later.
1354 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1355 rename(F);
1356 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1357 Intrinsic::dbg_value);
1358 return true;
1359 }
1360 break; // No other 'dbg.*'.
1361 }
1362 break;
1363 case 'e':
1364 if (Name.consume_front("experimental.vector.")) {
1367 // Skip over extract.last.active, otherwise it will be 'upgraded'
1368 // to a regular vector extract which is a different operation.
1369 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1370 .StartsWith("extract.", Intrinsic::vector_extract)
1371 .StartsWith("insert.", Intrinsic::vector_insert)
1372 .StartsWith("reverse.", Intrinsic::vector_reverse)
1373 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1374 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1375 .StartsWith("partial.reduce.add",
1376 Intrinsic::vector_partial_reduce_add)
1379 const auto *FT = F->getFunctionType();
1381 if (ID == Intrinsic::vector_extract ||
1382 ID == Intrinsic::vector_interleave2)
1383 // Extracting overloads the return type.
1384 Tys.push_back(FT->getReturnType());
1385 if (ID != Intrinsic::vector_interleave2)
1386 Tys.push_back(FT->getParamType(0));
1387 if (ID == Intrinsic::vector_insert ||
1388 ID == Intrinsic::vector_partial_reduce_add)
1389 // Inserting overloads the inserted type.
1390 Tys.push_back(FT->getParamType(1));
1391 rename(F);
1392 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1393 return true;
1394 }
1395
1396 if (Name.consume_front("reduce.")) {
1398 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1399 if (R.match(Name, &Groups))
1401 .Case("add", Intrinsic::vector_reduce_add)
1402 .Case("mul", Intrinsic::vector_reduce_mul)
1403 .Case("and", Intrinsic::vector_reduce_and)
1404 .Case("or", Intrinsic::vector_reduce_or)
1405 .Case("xor", Intrinsic::vector_reduce_xor)
1406 .Case("smax", Intrinsic::vector_reduce_smax)
1407 .Case("smin", Intrinsic::vector_reduce_smin)
1408 .Case("umax", Intrinsic::vector_reduce_umax)
1409 .Case("umin", Intrinsic::vector_reduce_umin)
1410 .Case("fmax", Intrinsic::vector_reduce_fmax)
1411 .Case("fmin", Intrinsic::vector_reduce_fmin)
1413
1414 bool V2 = false;
1416 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1417 Groups.clear();
1418 V2 = true;
1419 if (R2.match(Name, &Groups))
1421 .Case("fadd", Intrinsic::vector_reduce_fadd)
1422 .Case("fmul", Intrinsic::vector_reduce_fmul)
1424 }
1426 rename(F);
1427 auto Args = F->getFunctionType()->params();
1428 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1429 {Args[V2 ? 1 : 0]});
1430 return true;
1431 }
1432 break; // No other 'expermental.vector.reduce.*'.
1433 }
1434
1435 if (Name.consume_front("splice"))
1436 return true;
1437 break; // No other 'experimental.vector.*'.
1438 }
1439 if (Name.consume_front("experimental.stepvector.")) {
1440 Intrinsic::ID ID = Intrinsic::stepvector;
1441 rename(F);
1443 F->getParent(), ID, F->getFunctionType()->getReturnType());
1444 return true;
1445 }
1446 break; // No other 'e*'.
1447 case 'f':
1448 if (Name.starts_with("flt.rounds")) {
1449 rename(F);
1450 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1451 Intrinsic::get_rounding);
1452 return true;
1453 }
1454 break;
1455 case 'i':
1456 if (Name.starts_with("invariant.group.barrier")) {
1457 // Rename invariant.group.barrier to launder.invariant.group
1458 auto Args = F->getFunctionType()->params();
1459 Type* ObjectPtr[1] = {Args[0]};
1460 rename(F);
1462 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1463 return true;
1464 }
1465 break;
1466 case 'l':
1467 if ((Name.starts_with("lifetime.start") ||
1468 Name.starts_with("lifetime.end")) &&
1469 F->arg_size() == 2) {
1470 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1471 ? Intrinsic::lifetime_start
1472 : Intrinsic::lifetime_end;
1473 rename(F);
1474 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1475 F->getArg(0)->getType());
1476 return true;
1477 }
1478 break;
1479 case 'm': {
1480 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1481 // alignment parameter to embedding the alignment as an attribute of
1482 // the pointer args.
1483 if (unsigned ID = StringSwitch<unsigned>(Name)
1484 .StartsWith("memcpy.", Intrinsic::memcpy)
1485 .StartsWith("memmove.", Intrinsic::memmove)
1486 .Default(0)) {
1487 if (F->arg_size() == 5) {
1488 rename(F);
1489 // Get the types of dest, src, and len
1490 ArrayRef<Type *> ParamTypes =
1491 F->getFunctionType()->params().slice(0, 3);
1492 NewFn =
1493 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1494 return true;
1495 }
1496 }
1497 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1498 rename(F);
1499 // Get the types of dest, and len
1500 const auto *FT = F->getFunctionType();
1501 Type *ParamTypes[2] = {
1502 FT->getParamType(0), // Dest
1503 FT->getParamType(2) // len
1504 };
1505 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1506 Intrinsic::memset, ParamTypes);
1507 return true;
1508 }
1509
1510 unsigned MaskedID =
1512 .StartsWith("masked.load", Intrinsic::masked_load)
1513 .StartsWith("masked.gather", Intrinsic::masked_gather)
1514 .StartsWith("masked.store", Intrinsic::masked_store)
1515 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1516 .Default(0);
1517 if (MaskedID && F->arg_size() == 4) {
1518 rename(F);
1519 if (MaskedID == Intrinsic::masked_load ||
1520 MaskedID == Intrinsic::masked_gather) {
1522 F->getParent(), MaskedID,
1523 {F->getReturnType(), F->getArg(0)->getType()});
1524 return true;
1525 }
1527 F->getParent(), MaskedID,
1528 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1529 return true;
1530 }
1531 break;
1532 }
1533 case 'n': {
1534 if (Name.consume_front("nvvm.")) {
1535 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1536 if (F->arg_size() == 1) {
1537 Intrinsic::ID IID =
1539 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1540 .Case("clz.i", Intrinsic::ctlz)
1541 .Case("popc.i", Intrinsic::ctpop)
1543 if (IID != Intrinsic::not_intrinsic) {
1544 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1545 {F->getReturnType()});
1546 return true;
1547 }
1548 } else if (F->arg_size() == 2) {
1549 Intrinsic::ID IID =
1551 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1552 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1553 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1554 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1556 if (IID != Intrinsic::not_intrinsic) {
1557 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1558 {F->getReturnType()});
1559 return true;
1560 }
1561 }
1562
1563 // Check for nvvm intrinsics that need a return type adjustment.
1564 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1566 if (IID != Intrinsic::not_intrinsic) {
1567 NewFn = nullptr;
1568 return true;
1569 }
1570 }
1571
1572 // Upgrade Distributed Shared Memory Intrinsics
1574 if (IID != Intrinsic::not_intrinsic) {
1575 rename(F);
1576 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1577 return true;
1578 }
1579
1580 // Upgrade TMA copy G2S Intrinsics
1582 if (IID != Intrinsic::not_intrinsic) {
1583 rename(F);
1584 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1585 return true;
1586 }
1587
1588 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1589 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1590 //
1591 // TODO: We could add lohi.i2d.
1592 bool Expand = false;
1593 if (Name.consume_front("abs."))
1594 // nvvm.abs.{i,ii}
1595 Expand =
1596 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1597 else if (Name.consume_front("fabs."))
1598 // nvvm.fabs.{f,ftz.f,d}
1599 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1600 else if (Name.consume_front("ex2.approx."))
1601 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1602 Expand =
1603 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1604 else if (Name.consume_front("atomic.load."))
1605 // nvvm.atomic.load.add.{f32,f64}.p
1606 // nvvm.atomic.load.{inc,dec}.32.p
1607 Expand = StringSwitch<bool>(Name)
1608 .StartsWith("add.f32.p", true)
1609 .StartsWith("add.f64.p", true)
1610 .StartsWith("inc.32.p", true)
1611 .StartsWith("dec.32.p", true)
1612 .Default(false);
1613 else if (Name.consume_front("bitcast."))
1614 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1615 Expand =
1616 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1617 else if (Name.consume_front("rotate."))
1618 // nvvm.rotate.{b32,b64,right.b64}
1619 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1620 else if (Name.consume_front("ptr.gen.to."))
1621 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1622 Expand = consumeNVVMPtrAddrSpace(Name);
1623 else if (Name.consume_front("ptr."))
1624 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1625 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1626 else if (Name.consume_front("ldg.global."))
1627 // nvvm.ldg.global.{i,p,f}
1628 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1629 Name.starts_with("p."));
1630 else
1631 Expand = StringSwitch<bool>(Name)
1632 .Case("barrier0", true)
1633 .Case("barrier.n", true)
1634 .Case("barrier.sync.cnt", true)
1635 .Case("barrier.sync", true)
1636 .Case("barrier", true)
1637 .Case("bar.sync", true)
1638 .Case("barrier0.popc", true)
1639 .Case("barrier0.and", true)
1640 .Case("barrier0.or", true)
1641 .Case("clz.ll", true)
1642 .Case("popc.ll", true)
1643 .Case("h2f", true)
1644 .Case("swap.lo.hi.b64", true)
1645 .Case("tanh.approx.f32", true)
1646 .Default(false);
1647
1648 if (Expand) {
1649 NewFn = nullptr;
1650 return true;
1651 }
1652 break; // No other 'nvvm.*'.
1653 }
1654 break;
1655 }
1656 case 'o':
1657 if (Name.starts_with("objectsize.")) {
1658 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1659 if (F->arg_size() == 2 || F->arg_size() == 3) {
1660 rename(F);
1661 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1662 Intrinsic::objectsize, Tys);
1663 return true;
1664 }
1665 }
1666 break;
1667
1668 case 'p':
1669 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1670 rename(F);
1672 F->getParent(), Intrinsic::ptr_annotation,
1673 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1674 return true;
1675 }
1676 break;
1677
1678 case 'r': {
1679 if (Name.consume_front("riscv.")) {
1682 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1683 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1684 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1685 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1688 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1689 rename(F);
1690 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1691 return true;
1692 }
1693 break; // No other applicable upgrades.
1694 }
1695
1697 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1698 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1701 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1702 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1703 rename(F);
1704 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1705 return true;
1706 }
1707 break; // No other applicable upgrades.
1708 }
1709
1711 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1712 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1713 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1714 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1715 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1716 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1719 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1720 rename(F);
1721 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1722 return true;
1723 }
1724 break; // No other applicable upgrades.
1725 }
1726 break; // No other 'riscv.*' intrinsics
1727 }
1728 } break;
1729
1730 case 's':
1731 if (Name == "stackprotectorcheck") {
1732 NewFn = nullptr;
1733 return true;
1734 }
1735 break;
1736
1737 case 't':
1738 if (Name == "thread.pointer") {
1740 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1741 return true;
1742 }
1743 break;
1744
1745 case 'v': {
1746 if (Name == "var.annotation" && F->arg_size() == 4) {
1747 rename(F);
1749 F->getParent(), Intrinsic::var_annotation,
1750 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1751 return true;
1752 }
1753 if (Name.consume_front("vector.splice")) {
1754 if (Name.starts_with(".left") || Name.starts_with(".right"))
1755 break;
1756 return true;
1757 }
1758 break;
1759 }
1760
1761 case 'w':
1762 if (Name.consume_front("wasm.")) {
1765 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1766 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1767 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1770 rename(F);
1771 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1772 F->getReturnType());
1773 return true;
1774 }
1775
1776 if (Name.consume_front("dot.i8x16.i7x16.")) {
1778 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1779 .Case("add.signed",
1780 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1783 rename(F);
1784 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1785 return true;
1786 }
1787 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1788 }
1789 break; // No other 'wasm.*'.
1790 }
1791 break;
1792
1793 case 'x':
1794 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1795 return true;
1796 }
1797
1798 auto *ST = dyn_cast<StructType>(F->getReturnType());
1799 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1800 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1801 // Replace return type with literal non-packed struct. Only do this for
1802 // intrinsics declared to return a struct, not for intrinsics with
1803 // overloaded return type, in which case the exact struct type will be
1804 // mangled into the name.
1807 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1808 auto *FT = F->getFunctionType();
1809 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1810 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1811 std::string Name = F->getName().str();
1812 rename(F);
1813 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1814 Name, F->getParent());
1815
1816 // The new function may also need remangling.
1817 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1818 NewFn = *Result;
1819 return true;
1820 }
1821 }
1822
1823 // Remangle our intrinsic since we upgrade the mangling
1825 if (Result != std::nullopt) {
1826 NewFn = *Result;
1827 return true;
1828 }
1829
1830 // This may not belong here. This function is effectively being overloaded
1831 // to both detect an intrinsic which needs upgrading, and to provide the
1832 // upgraded form of the intrinsic. We should perhaps have two separate
1833 // functions for this.
1834 return false;
1835}
1836
1838 bool CanUpgradeDebugIntrinsicsToRecords) {
1839 NewFn = nullptr;
1840 bool Upgraded =
1841 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1842
1843 // Upgrade intrinsic attributes. This does not change the function.
1844 if (NewFn)
1845 F = NewFn;
1846 if (Intrinsic::ID id = F->getIntrinsicID()) {
1847 // Only do this if the intrinsic signature is valid.
1848 SmallVector<Type *> OverloadTys;
1849 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1850 F->setAttributes(
1851 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1852 }
1853 return Upgraded;
1854}
1855
1857 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1858 GV->getName() == "llvm.global_dtors")) ||
1859 !GV->hasInitializer())
1860 return nullptr;
1862 if (!ATy)
1863 return nullptr;
1865 if (!STy || STy->getNumElements() != 2)
1866 return nullptr;
1867
1868 LLVMContext &C = GV->getContext();
1869 IRBuilder<> IRB(C);
1870 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1871 IRB.getPtrTy());
1872 Constant *Init = GV->getInitializer();
1873 unsigned N = Init->getNumOperands();
1874 std::vector<Constant *> NewCtors(N);
1875 for (unsigned i = 0; i != N; ++i) {
1876 auto Ctor = cast<Constant>(Init->getOperand(i));
1877 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1878 Ctor->getAggregateElement(1),
1880 }
1881 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1882
1883 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1884 NewInit, GV->getName());
1885}
1886
1887// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1888// to byte shuffles.
1890 unsigned Shift) {
1891 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1892 unsigned NumElts = ResultTy->getNumElements() * 8;
1893
1894 // Bitcast from a 64-bit element type to a byte element type.
1895 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1896 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1897
1898 // We'll be shuffling in zeroes.
1899 Value *Res = Constant::getNullValue(VecTy);
1900
1901 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1902 // we'll just return the zero vector.
1903 if (Shift < 16) {
1904 int Idxs[64];
1905 // 256/512-bit version is split into 2/4 16-byte lanes.
1906 for (unsigned l = 0; l != NumElts; l += 16)
1907 for (unsigned i = 0; i != 16; ++i) {
1908 unsigned Idx = NumElts + i - Shift;
1909 if (Idx < NumElts)
1910 Idx -= NumElts - 16; // end of lane, switch operand.
1911 Idxs[l + i] = Idx + l;
1912 }
1913
1914 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1915 }
1916
1917 // Bitcast back to a 64-bit element type.
1918 return Builder.CreateBitCast(Res, ResultTy, "cast");
1919}
1920
1921// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1922// to byte shuffles.
1924 unsigned Shift) {
1925 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1926 unsigned NumElts = ResultTy->getNumElements() * 8;
1927
1928 // Bitcast from a 64-bit element type to a byte element type.
1929 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1930 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1931
1932 // We'll be shuffling in zeroes.
1933 Value *Res = Constant::getNullValue(VecTy);
1934
1935 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1936 // we'll just return the zero vector.
1937 if (Shift < 16) {
1938 int Idxs[64];
1939 // 256/512-bit version is split into 2/4 16-byte lanes.
1940 for (unsigned l = 0; l != NumElts; l += 16)
1941 for (unsigned i = 0; i != 16; ++i) {
1942 unsigned Idx = i + Shift;
1943 if (Idx >= 16)
1944 Idx += NumElts - 16; // end of lane, switch operand.
1945 Idxs[l + i] = Idx + l;
1946 }
1947
1948 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1949 }
1950
1951 // Bitcast back to a 64-bit element type.
1952 return Builder.CreateBitCast(Res, ResultTy, "cast");
1953}
1954
1955static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1956 unsigned NumElts) {
1957 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1959 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1960 Mask = Builder.CreateBitCast(Mask, MaskTy);
1961
1962 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1963 // i8 and we need to extract down to the right number of elements.
1964 if (NumElts <= 4) {
1965 int Indices[4];
1966 for (unsigned i = 0; i != NumElts; ++i)
1967 Indices[i] = i;
1968 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1969 "extract");
1970 }
1971
1972 return Mask;
1973}
1974
1975static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1976 Value *Op1) {
1977 // If the mask is all ones just emit the first operation.
1978 if (const auto *C = dyn_cast<Constant>(Mask))
1979 if (C->isAllOnesValue())
1980 return Op0;
1981
1982 Mask = getX86MaskVec(Builder, Mask,
1983 cast<FixedVectorType>(Op0->getType())->getNumElements());
1984 return Builder.CreateSelect(Mask, Op0, Op1);
1985}
1986
1987static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
1988 Value *Op1) {
1989 // If the mask is all ones just emit the first operation.
1990 if (const auto *C = dyn_cast<Constant>(Mask))
1991 if (C->isAllOnesValue())
1992 return Op0;
1993
1994 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
1995 Mask->getType()->getIntegerBitWidth());
1996 Mask = Builder.CreateBitCast(Mask, MaskTy);
1997 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
1998 return Builder.CreateSelect(Mask, Op0, Op1);
1999}
2000
2001// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2002// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2003// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2005 Value *Op1, Value *Shift,
2006 Value *Passthru, Value *Mask,
2007 bool IsVALIGN) {
2008 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2009
2010 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2011 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2012 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2013 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2014
2015 // Mask the immediate for VALIGN.
2016 if (IsVALIGN)
2017 ShiftVal &= (NumElts - 1);
2018
2019 // If palignr is shifting the pair of vectors more than the size of two
2020 // lanes, emit zero.
2021 if (ShiftVal >= 32)
2023
2024 // If palignr is shifting the pair of input vectors more than one lane,
2025 // but less than two lanes, convert to shifting in zeroes.
2026 if (ShiftVal > 16) {
2027 ShiftVal -= 16;
2028 Op1 = Op0;
2030 }
2031
2032 int Indices[64];
2033 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2034 for (unsigned l = 0; l < NumElts; l += 16) {
2035 for (unsigned i = 0; i != 16; ++i) {
2036 unsigned Idx = ShiftVal + i;
2037 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2038 Idx += NumElts - 16; // End of lane, switch operand.
2039 Indices[l + i] = Idx + l;
2040 }
2041 }
2042
2043 Value *Align = Builder.CreateShuffleVector(
2044 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2045
2046 return emitX86Select(Builder, Mask, Align, Passthru);
2047}
2048
2050 bool ZeroMask, bool IndexForm) {
2051 Type *Ty = CI.getType();
2052 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2053 unsigned EltWidth = Ty->getScalarSizeInBits();
2054 bool IsFloat = Ty->isFPOrFPVectorTy();
2055 Intrinsic::ID IID;
2056 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2057 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2058 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2059 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2060 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2061 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2062 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2063 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2064 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2065 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2066 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2067 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2068 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2069 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2070 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2071 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2072 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2073 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2074 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2075 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2076 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2077 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2078 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2079 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2080 else if (VecWidth == 128 && EltWidth == 16)
2081 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2082 else if (VecWidth == 256 && EltWidth == 16)
2083 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2084 else if (VecWidth == 512 && EltWidth == 16)
2085 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2086 else if (VecWidth == 128 && EltWidth == 8)
2087 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2088 else if (VecWidth == 256 && EltWidth == 8)
2089 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2090 else if (VecWidth == 512 && EltWidth == 8)
2091 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2092 else
2093 llvm_unreachable("Unexpected intrinsic");
2094
2095 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2096 CI.getArgOperand(2) };
2097
2098 // If this isn't index form we need to swap operand 0 and 1.
2099 if (!IndexForm)
2100 std::swap(Args[0], Args[1]);
2101
2102 Value *V = Builder.CreateIntrinsic(IID, Args);
2103 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2104 : Builder.CreateBitCast(CI.getArgOperand(1),
2105 Ty);
2106 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2107}
2108
2110 Intrinsic::ID IID) {
2111 Type *Ty = CI.getType();
2112 Value *Op0 = CI.getOperand(0);
2113 Value *Op1 = CI.getOperand(1);
2114 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2115
2116 if (CI.arg_size() == 4) { // For masked intrinsics.
2117 Value *VecSrc = CI.getOperand(2);
2118 Value *Mask = CI.getOperand(3);
2119 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2120 }
2121 return Res;
2122}
2123
2125 bool IsRotateRight) {
2126 Type *Ty = CI.getType();
2127 Value *Src = CI.getArgOperand(0);
2128 Value *Amt = CI.getArgOperand(1);
2129
2130 // Amount may be scalar immediate, in which case create a splat vector.
2131 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2132 // we only care about the lowest log2 bits anyway.
2133 if (Amt->getType() != Ty) {
2134 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2135 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2136 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2137 }
2138
2139 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2140 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2141
2142 if (CI.arg_size() == 4) { // For masked intrinsics.
2143 Value *VecSrc = CI.getOperand(2);
2144 Value *Mask = CI.getOperand(3);
2145 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2146 }
2147 return Res;
2148}
2149
2150static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2151 bool IsSigned) {
2152 Type *Ty = CI.getType();
2153 Value *LHS = CI.getArgOperand(0);
2154 Value *RHS = CI.getArgOperand(1);
2155
2156 CmpInst::Predicate Pred;
2157 switch (Imm) {
2158 case 0x0:
2159 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2160 break;
2161 case 0x1:
2162 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2163 break;
2164 case 0x2:
2165 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2166 break;
2167 case 0x3:
2168 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2169 break;
2170 case 0x4:
2171 Pred = ICmpInst::ICMP_EQ;
2172 break;
2173 case 0x5:
2174 Pred = ICmpInst::ICMP_NE;
2175 break;
2176 case 0x6:
2177 return Constant::getNullValue(Ty); // FALSE
2178 case 0x7:
2179 return Constant::getAllOnesValue(Ty); // TRUE
2180 default:
2181 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2182 }
2183
2184 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2185 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2186 return Ext;
2187}
2188
2190 bool IsShiftRight, bool ZeroMask) {
2191 Type *Ty = CI.getType();
2192 Value *Op0 = CI.getArgOperand(0);
2193 Value *Op1 = CI.getArgOperand(1);
2194 Value *Amt = CI.getArgOperand(2);
2195
2196 if (IsShiftRight)
2197 std::swap(Op0, Op1);
2198
2199 // Amount may be scalar immediate, in which case create a splat vector.
2200 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2201 // we only care about the lowest log2 bits anyway.
2202 if (Amt->getType() != Ty) {
2203 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2204 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2205 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2206 }
2207
2208 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2209 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2210
2211 unsigned NumArgs = CI.arg_size();
2212 if (NumArgs >= 4) { // For masked intrinsics.
2213 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2214 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2215 CI.getArgOperand(0);
2216 Value *Mask = CI.getOperand(NumArgs - 1);
2217 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2218 }
2219 return Res;
2220}
2221
2223 Value *Mask, bool Aligned) {
2224 const Align Alignment =
2225 Aligned
2226 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2227 : Align(1);
2228
2229 // If the mask is all ones just emit a regular store.
2230 if (const auto *C = dyn_cast<Constant>(Mask))
2231 if (C->isAllOnesValue())
2232 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2233
2234 // Convert the mask from an integer type to a vector of i1.
2235 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2236 Mask = getX86MaskVec(Builder, Mask, NumElts);
2237 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2238}
2239
2241 Value *Passthru, Value *Mask, bool Aligned) {
2242 Type *ValTy = Passthru->getType();
2243 const Align Alignment =
2244 Aligned
2245 ? Align(
2247 8)
2248 : Align(1);
2249
2250 // If the mask is all ones just emit a regular store.
2251 if (const auto *C = dyn_cast<Constant>(Mask))
2252 if (C->isAllOnesValue())
2253 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2254
2255 // Convert the mask from an integer type to a vector of i1.
2256 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2257 Mask = getX86MaskVec(Builder, Mask, NumElts);
2258 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2259}
2260
2261static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2262 Type *Ty = CI.getType();
2263 Value *Op0 = CI.getArgOperand(0);
2264 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2265 {Op0, Builder.getInt1(false)});
2266 if (CI.arg_size() == 3)
2267 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2268 return Res;
2269}
2270
2271static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2272 Type *Ty = CI.getType();
2273
2274 // Arguments have a vXi32 type so cast to vXi64.
2275 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2276 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2277
2278 if (IsSigned) {
2279 // Shift left then arithmetic shift right.
2280 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2281 LHS = Builder.CreateShl(LHS, ShiftAmt);
2282 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2283 RHS = Builder.CreateShl(RHS, ShiftAmt);
2284 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2285 } else {
2286 // Clear the upper bits.
2287 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2288 LHS = Builder.CreateAnd(LHS, Mask);
2289 RHS = Builder.CreateAnd(RHS, Mask);
2290 }
2291
2292 Value *Res = Builder.CreateMul(LHS, RHS);
2293
2294 if (CI.arg_size() == 4)
2295 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2296
2297 return Res;
2298}
2299
2300// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2302 Value *Mask) {
2303 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2304 if (Mask) {
2305 const auto *C = dyn_cast<Constant>(Mask);
2306 if (!C || !C->isAllOnesValue())
2307 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2308 }
2309
2310 if (NumElts < 8) {
2311 int Indices[8];
2312 for (unsigned i = 0; i != NumElts; ++i)
2313 Indices[i] = i;
2314 for (unsigned i = NumElts; i != 8; ++i)
2315 Indices[i] = NumElts + i % NumElts;
2316 Vec = Builder.CreateShuffleVector(Vec,
2318 Indices);
2319 }
2320 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2321}
2322
2324 unsigned CC, bool Signed) {
2325 Value *Op0 = CI.getArgOperand(0);
2326 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2327
2328 Value *Cmp;
2329 if (CC == 3) {
2331 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2332 } else if (CC == 7) {
2334 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2335 } else {
2337 switch (CC) {
2338 default: llvm_unreachable("Unknown condition code");
2339 case 0: Pred = ICmpInst::ICMP_EQ; break;
2340 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2341 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2342 case 4: Pred = ICmpInst::ICMP_NE; break;
2343 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2344 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2345 }
2346 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2347 }
2348
2349 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2350
2351 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2352}
2353
2354// Replace a masked intrinsic with an older unmasked intrinsic.
2356 Intrinsic::ID IID) {
2357 Value *Rep =
2358 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2359 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2360}
2361
2363 Value* A = CI.getArgOperand(0);
2364 Value* B = CI.getArgOperand(1);
2365 Value* Src = CI.getArgOperand(2);
2366 Value* Mask = CI.getArgOperand(3);
2367
2368 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2369 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2370 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2371 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2372 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2373 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2374}
2375
2377 Value* Op = CI.getArgOperand(0);
2378 Type* ReturnOp = CI.getType();
2379 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2380 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2381 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2382}
2383
2384// Replace intrinsic with unmasked version and a select.
2386 CallBase &CI, Value *&Rep) {
2387 Name = Name.substr(12); // Remove avx512.mask.
2388
2389 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2390 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2391 Intrinsic::ID IID;
2392 if (Name.starts_with("max.p")) {
2393 if (VecWidth == 128 && EltWidth == 32)
2394 IID = Intrinsic::x86_sse_max_ps;
2395 else if (VecWidth == 128 && EltWidth == 64)
2396 IID = Intrinsic::x86_sse2_max_pd;
2397 else if (VecWidth == 256 && EltWidth == 32)
2398 IID = Intrinsic::x86_avx_max_ps_256;
2399 else if (VecWidth == 256 && EltWidth == 64)
2400 IID = Intrinsic::x86_avx_max_pd_256;
2401 else
2402 llvm_unreachable("Unexpected intrinsic");
2403 } else if (Name.starts_with("min.p")) {
2404 if (VecWidth == 128 && EltWidth == 32)
2405 IID = Intrinsic::x86_sse_min_ps;
2406 else if (VecWidth == 128 && EltWidth == 64)
2407 IID = Intrinsic::x86_sse2_min_pd;
2408 else if (VecWidth == 256 && EltWidth == 32)
2409 IID = Intrinsic::x86_avx_min_ps_256;
2410 else if (VecWidth == 256 && EltWidth == 64)
2411 IID = Intrinsic::x86_avx_min_pd_256;
2412 else
2413 llvm_unreachable("Unexpected intrinsic");
2414 } else if (Name.starts_with("pshuf.b.")) {
2415 if (VecWidth == 128)
2416 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2417 else if (VecWidth == 256)
2418 IID = Intrinsic::x86_avx2_pshuf_b;
2419 else if (VecWidth == 512)
2420 IID = Intrinsic::x86_avx512_pshuf_b_512;
2421 else
2422 llvm_unreachable("Unexpected intrinsic");
2423 } else if (Name.starts_with("pmul.hr.sw.")) {
2424 if (VecWidth == 128)
2425 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2426 else if (VecWidth == 256)
2427 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2428 else if (VecWidth == 512)
2429 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2430 else
2431 llvm_unreachable("Unexpected intrinsic");
2432 } else if (Name.starts_with("pmulh.w.")) {
2433 if (VecWidth == 128)
2434 IID = Intrinsic::x86_sse2_pmulh_w;
2435 else if (VecWidth == 256)
2436 IID = Intrinsic::x86_avx2_pmulh_w;
2437 else if (VecWidth == 512)
2438 IID = Intrinsic::x86_avx512_pmulh_w_512;
2439 else
2440 llvm_unreachable("Unexpected intrinsic");
2441 } else if (Name.starts_with("pmulhu.w.")) {
2442 if (VecWidth == 128)
2443 IID = Intrinsic::x86_sse2_pmulhu_w;
2444 else if (VecWidth == 256)
2445 IID = Intrinsic::x86_avx2_pmulhu_w;
2446 else if (VecWidth == 512)
2447 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2448 else
2449 llvm_unreachable("Unexpected intrinsic");
2450 } else if (Name.starts_with("pmaddw.d.")) {
2451 if (VecWidth == 128)
2452 IID = Intrinsic::x86_sse2_pmadd_wd;
2453 else if (VecWidth == 256)
2454 IID = Intrinsic::x86_avx2_pmadd_wd;
2455 else if (VecWidth == 512)
2456 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2457 else
2458 llvm_unreachable("Unexpected intrinsic");
2459 } else if (Name.starts_with("pmaddubs.w.")) {
2460 if (VecWidth == 128)
2461 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2462 else if (VecWidth == 256)
2463 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2464 else if (VecWidth == 512)
2465 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2466 else
2467 llvm_unreachable("Unexpected intrinsic");
2468 } else if (Name.starts_with("packsswb.")) {
2469 if (VecWidth == 128)
2470 IID = Intrinsic::x86_sse2_packsswb_128;
2471 else if (VecWidth == 256)
2472 IID = Intrinsic::x86_avx2_packsswb;
2473 else if (VecWidth == 512)
2474 IID = Intrinsic::x86_avx512_packsswb_512;
2475 else
2476 llvm_unreachable("Unexpected intrinsic");
2477 } else if (Name.starts_with("packssdw.")) {
2478 if (VecWidth == 128)
2479 IID = Intrinsic::x86_sse2_packssdw_128;
2480 else if (VecWidth == 256)
2481 IID = Intrinsic::x86_avx2_packssdw;
2482 else if (VecWidth == 512)
2483 IID = Intrinsic::x86_avx512_packssdw_512;
2484 else
2485 llvm_unreachable("Unexpected intrinsic");
2486 } else if (Name.starts_with("packuswb.")) {
2487 if (VecWidth == 128)
2488 IID = Intrinsic::x86_sse2_packuswb_128;
2489 else if (VecWidth == 256)
2490 IID = Intrinsic::x86_avx2_packuswb;
2491 else if (VecWidth == 512)
2492 IID = Intrinsic::x86_avx512_packuswb_512;
2493 else
2494 llvm_unreachable("Unexpected intrinsic");
2495 } else if (Name.starts_with("packusdw.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_sse41_packusdw;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_packusdw;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_packusdw_512;
2502 else
2503 llvm_unreachable("Unexpected intrinsic");
2504 } else if (Name.starts_with("vpermilvar.")) {
2505 if (VecWidth == 128 && EltWidth == 32)
2506 IID = Intrinsic::x86_avx_vpermilvar_ps;
2507 else if (VecWidth == 128 && EltWidth == 64)
2508 IID = Intrinsic::x86_avx_vpermilvar_pd;
2509 else if (VecWidth == 256 && EltWidth == 32)
2510 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2511 else if (VecWidth == 256 && EltWidth == 64)
2512 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2513 else if (VecWidth == 512 && EltWidth == 32)
2514 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2515 else if (VecWidth == 512 && EltWidth == 64)
2516 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2517 else
2518 llvm_unreachable("Unexpected intrinsic");
2519 } else if (Name == "cvtpd2dq.256") {
2520 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2521 } else if (Name == "cvtpd2ps.256") {
2522 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2523 } else if (Name == "cvttpd2dq.256") {
2524 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2525 } else if (Name == "cvttps2dq.128") {
2526 IID = Intrinsic::x86_sse2_cvttps2dq;
2527 } else if (Name == "cvttps2dq.256") {
2528 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2529 } else if (Name.starts_with("permvar.")) {
2530 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2531 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2532 IID = Intrinsic::x86_avx2_permps;
2533 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2534 IID = Intrinsic::x86_avx2_permd;
2535 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2536 IID = Intrinsic::x86_avx512_permvar_df_256;
2537 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2538 IID = Intrinsic::x86_avx512_permvar_di_256;
2539 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2540 IID = Intrinsic::x86_avx512_permvar_sf_512;
2541 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2542 IID = Intrinsic::x86_avx512_permvar_si_512;
2543 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2544 IID = Intrinsic::x86_avx512_permvar_df_512;
2545 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2546 IID = Intrinsic::x86_avx512_permvar_di_512;
2547 else if (VecWidth == 128 && EltWidth == 16)
2548 IID = Intrinsic::x86_avx512_permvar_hi_128;
2549 else if (VecWidth == 256 && EltWidth == 16)
2550 IID = Intrinsic::x86_avx512_permvar_hi_256;
2551 else if (VecWidth == 512 && EltWidth == 16)
2552 IID = Intrinsic::x86_avx512_permvar_hi_512;
2553 else if (VecWidth == 128 && EltWidth == 8)
2554 IID = Intrinsic::x86_avx512_permvar_qi_128;
2555 else if (VecWidth == 256 && EltWidth == 8)
2556 IID = Intrinsic::x86_avx512_permvar_qi_256;
2557 else if (VecWidth == 512 && EltWidth == 8)
2558 IID = Intrinsic::x86_avx512_permvar_qi_512;
2559 else
2560 llvm_unreachable("Unexpected intrinsic");
2561 } else if (Name.starts_with("dbpsadbw.")) {
2562 if (VecWidth == 128)
2563 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2564 else if (VecWidth == 256)
2565 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2566 else if (VecWidth == 512)
2567 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2568 else
2569 llvm_unreachable("Unexpected intrinsic");
2570 } else if (Name.starts_with("pmultishift.qb.")) {
2571 if (VecWidth == 128)
2572 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2573 else if (VecWidth == 256)
2574 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2575 else if (VecWidth == 512)
2576 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2577 else
2578 llvm_unreachable("Unexpected intrinsic");
2579 } else if (Name.starts_with("conflict.")) {
2580 if (Name[9] == 'd' && VecWidth == 128)
2581 IID = Intrinsic::x86_avx512_conflict_d_128;
2582 else if (Name[9] == 'd' && VecWidth == 256)
2583 IID = Intrinsic::x86_avx512_conflict_d_256;
2584 else if (Name[9] == 'd' && VecWidth == 512)
2585 IID = Intrinsic::x86_avx512_conflict_d_512;
2586 else if (Name[9] == 'q' && VecWidth == 128)
2587 IID = Intrinsic::x86_avx512_conflict_q_128;
2588 else if (Name[9] == 'q' && VecWidth == 256)
2589 IID = Intrinsic::x86_avx512_conflict_q_256;
2590 else if (Name[9] == 'q' && VecWidth == 512)
2591 IID = Intrinsic::x86_avx512_conflict_q_512;
2592 else
2593 llvm_unreachable("Unexpected intrinsic");
2594 } else if (Name.starts_with("pavg.")) {
2595 if (Name[5] == 'b' && VecWidth == 128)
2596 IID = Intrinsic::x86_sse2_pavg_b;
2597 else if (Name[5] == 'b' && VecWidth == 256)
2598 IID = Intrinsic::x86_avx2_pavg_b;
2599 else if (Name[5] == 'b' && VecWidth == 512)
2600 IID = Intrinsic::x86_avx512_pavg_b_512;
2601 else if (Name[5] == 'w' && VecWidth == 128)
2602 IID = Intrinsic::x86_sse2_pavg_w;
2603 else if (Name[5] == 'w' && VecWidth == 256)
2604 IID = Intrinsic::x86_avx2_pavg_w;
2605 else if (Name[5] == 'w' && VecWidth == 512)
2606 IID = Intrinsic::x86_avx512_pavg_w_512;
2607 else
2608 llvm_unreachable("Unexpected intrinsic");
2609 } else
2610 return false;
2611
2612 SmallVector<Value *, 4> Args(CI.args());
2613 Args.pop_back();
2614 Args.pop_back();
2615 Rep = Builder.CreateIntrinsic(IID, Args);
2616 unsigned NumArgs = CI.arg_size();
2617 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2618 CI.getArgOperand(NumArgs - 2));
2619 return true;
2620}
2621
2622/// Upgrade comment in call to inline asm that represents an objc retain release
2623/// marker.
2624void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2625 size_t Pos;
2626 if (AsmStr->find("mov\tfp") == 0 &&
2627 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2628 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2629 AsmStr->replace(Pos, 1, ";");
2630 }
2631}
2632
2634 Function *F, IRBuilder<> &Builder) {
2635 Value *Rep = nullptr;
2636
2637 if (Name == "abs.i" || Name == "abs.ll") {
2638 Value *Arg = CI->getArgOperand(0);
2639 Value *Neg = Builder.CreateNeg(Arg, "neg");
2640 Value *Cmp = Builder.CreateICmpSGE(
2641 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2642 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2643 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2644 Type *Ty = (Name == "abs.bf16")
2645 ? Builder.getBFloatTy()
2646 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2647 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2648 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2649 Rep = Builder.CreateBitCast(Abs, CI->getType());
2650 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2651 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2652 : Intrinsic::nvvm_fabs;
2653 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2654 } else if (Name.consume_front("ex2.approx.")) {
2655 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2656 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2657 : Intrinsic::nvvm_ex2_approx;
2658 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2659 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2660 Name.starts_with("atomic.load.add.f64.p")) {
2661 Value *Ptr = CI->getArgOperand(0);
2662 Value *Val = CI->getArgOperand(1);
2663 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2665 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2666 Name.starts_with("atomic.load.dec.32.p")) {
2667 Value *Ptr = CI->getArgOperand(0);
2668 Value *Val = CI->getArgOperand(1);
2669 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2671 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2673 } else if (Name == "clz.ll") {
2674 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2675 Value *Arg = CI->getArgOperand(0);
2676 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2677 {Arg, Builder.getFalse()},
2678 /*FMFSource=*/nullptr, "ctlz");
2679 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2680 } else if (Name == "popc.ll") {
2681 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2682 // i64.
2683 Value *Arg = CI->getArgOperand(0);
2684 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2685 Arg, /*FMFSource=*/nullptr, "ctpop");
2686 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2687 } else if (Name == "h2f") {
2688 Rep = Builder.CreateIntrinsic(Intrinsic::convert_from_fp16,
2689 {Builder.getFloatTy()}, CI->getArgOperand(0),
2690 /*FMFSource=*/nullptr, "h2f");
2691 } else if (Name.consume_front("bitcast.") &&
2692 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2693 Name == "d2ll")) {
2694 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2695 } else if (Name == "rotate.b32") {
2696 Value *Arg = CI->getOperand(0);
2697 Value *ShiftAmt = CI->getOperand(1);
2698 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2699 {Arg, Arg, ShiftAmt});
2700 } else if (Name == "rotate.b64") {
2701 Type *Int64Ty = Builder.getInt64Ty();
2702 Value *Arg = CI->getOperand(0);
2703 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2704 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2705 {Arg, Arg, ZExtShiftAmt});
2706 } else if (Name == "rotate.right.b64") {
2707 Type *Int64Ty = Builder.getInt64Ty();
2708 Value *Arg = CI->getOperand(0);
2709 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2710 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2711 {Arg, Arg, ZExtShiftAmt});
2712 } else if (Name == "swap.lo.hi.b64") {
2713 Type *Int64Ty = Builder.getInt64Ty();
2714 Value *Arg = CI->getOperand(0);
2715 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2716 {Arg, Arg, Builder.getInt64(32)});
2717 } else if ((Name.consume_front("ptr.gen.to.") &&
2718 consumeNVVMPtrAddrSpace(Name)) ||
2719 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2720 Name.starts_with(".to.gen"))) {
2721 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2722 } else if (Name.consume_front("ldg.global")) {
2723 Value *Ptr = CI->getArgOperand(0);
2724 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2725 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2726 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2727 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2728 MDNode *MD = MDNode::get(Builder.getContext(), {});
2729 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2730 return LD;
2731 } else if (Name == "tanh.approx.f32") {
2732 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2733 FastMathFlags FMF;
2734 FMF.setApproxFunc();
2735 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2736 FMF);
2737 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2738 Value *Arg =
2739 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2740 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2741 {}, {Arg});
2742 } else if (Name == "barrier") {
2743 Rep = Builder.CreateIntrinsic(
2744 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2745 {CI->getArgOperand(0), CI->getArgOperand(1)});
2746 } else if (Name == "barrier.sync") {
2747 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2748 {CI->getArgOperand(0)});
2749 } else if (Name == "barrier.sync.cnt") {
2750 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2751 {CI->getArgOperand(0), CI->getArgOperand(1)});
2752 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2753 Name == "barrier0.or") {
2754 Value *C = CI->getArgOperand(0);
2755 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2756
2757 Intrinsic::ID IID =
2759 .Case("barrier0.popc",
2760 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2761 .Case("barrier0.and",
2762 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2763 .Case("barrier0.or",
2764 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2765 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2766 Rep = Builder.CreateZExt(Bar, CI->getType());
2767 } else {
2769 if (IID != Intrinsic::not_intrinsic &&
2770 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2771 rename(F);
2772 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2774 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2775 Value *Arg = CI->getArgOperand(I);
2776 Type *OldType = Arg->getType();
2777 Type *NewType = NewFn->getArg(I)->getType();
2778 Args.push_back(
2779 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2780 ? Builder.CreateBitCast(Arg, NewType)
2781 : Arg);
2782 }
2783 Rep = Builder.CreateCall(NewFn, Args);
2784 if (F->getReturnType()->isIntegerTy())
2785 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2786 }
2787 }
2788
2789 return Rep;
2790}
2791
2793 IRBuilder<> &Builder) {
2794 LLVMContext &C = F->getContext();
2795 Value *Rep = nullptr;
2796
2797 if (Name.starts_with("sse4a.movnt.")) {
2799 Elts.push_back(
2800 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2801 MDNode *Node = MDNode::get(C, Elts);
2802
2803 Value *Arg0 = CI->getArgOperand(0);
2804 Value *Arg1 = CI->getArgOperand(1);
2805
2806 // Nontemporal (unaligned) store of the 0'th element of the float/double
2807 // vector.
2808 Value *Extract =
2809 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2810
2811 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2812 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2813 } else if (Name.starts_with("avx.movnt.") ||
2814 Name.starts_with("avx512.storent.")) {
2816 Elts.push_back(
2817 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2818 MDNode *Node = MDNode::get(C, Elts);
2819
2820 Value *Arg0 = CI->getArgOperand(0);
2821 Value *Arg1 = CI->getArgOperand(1);
2822
2823 StoreInst *SI = Builder.CreateAlignedStore(
2824 Arg1, Arg0,
2826 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2827 } else if (Name == "sse2.storel.dq") {
2828 Value *Arg0 = CI->getArgOperand(0);
2829 Value *Arg1 = CI->getArgOperand(1);
2830
2831 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2832 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2833 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2834 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2835 } else if (Name.starts_with("sse.storeu.") ||
2836 Name.starts_with("sse2.storeu.") ||
2837 Name.starts_with("avx.storeu.")) {
2838 Value *Arg0 = CI->getArgOperand(0);
2839 Value *Arg1 = CI->getArgOperand(1);
2840 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2841 } else if (Name == "avx512.mask.store.ss") {
2842 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2843 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2844 Mask, false);
2845 } else if (Name.starts_with("avx512.mask.store")) {
2846 // "avx512.mask.storeu." or "avx512.mask.store."
2847 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2848 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2849 CI->getArgOperand(2), Aligned);
2850 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2851 // Upgrade packed integer vector compare intrinsics to compare instructions.
2852 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2853 bool CmpEq = Name[9] == 'e';
2854 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2855 CI->getArgOperand(0), CI->getArgOperand(1));
2856 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2857 } else if (Name.starts_with("avx512.broadcastm")) {
2858 Type *ExtTy = Type::getInt32Ty(C);
2859 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2860 ExtTy = Type::getInt64Ty(C);
2861 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2862 ExtTy->getPrimitiveSizeInBits();
2863 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2864 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2865 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2866 Value *Vec = CI->getArgOperand(0);
2867 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2868 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2869 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2870 } else if (Name.starts_with("avx.sqrt.p") ||
2871 Name.starts_with("sse2.sqrt.p") ||
2872 Name.starts_with("sse.sqrt.p")) {
2873 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2874 {CI->getArgOperand(0)});
2875 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2876 if (CI->arg_size() == 4 &&
2877 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2878 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2879 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2880 : Intrinsic::x86_avx512_sqrt_pd_512;
2881
2882 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2883 Rep = Builder.CreateIntrinsic(IID, Args);
2884 } else {
2885 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2886 {CI->getArgOperand(0)});
2887 }
2888 Rep =
2889 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2890 } else if (Name.starts_with("avx512.ptestm") ||
2891 Name.starts_with("avx512.ptestnm")) {
2892 Value *Op0 = CI->getArgOperand(0);
2893 Value *Op1 = CI->getArgOperand(1);
2894 Value *Mask = CI->getArgOperand(2);
2895 Rep = Builder.CreateAnd(Op0, Op1);
2896 llvm::Type *Ty = Op0->getType();
2898 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2901 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2902 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2903 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2904 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2905 ->getNumElements();
2906 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2907 Rep =
2908 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2909 } else if (Name.starts_with("avx512.kunpck")) {
2910 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2911 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2912 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2913 int Indices[64];
2914 for (unsigned i = 0; i != NumElts; ++i)
2915 Indices[i] = i;
2916
2917 // First extract half of each vector. This gives better codegen than
2918 // doing it in a single shuffle.
2919 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2920 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2921 // Concat the vectors.
2922 // NOTE: Operands have to be swapped to match intrinsic definition.
2923 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2924 Rep = Builder.CreateBitCast(Rep, CI->getType());
2925 } else if (Name == "avx512.kand.w") {
2926 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2927 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2928 Rep = Builder.CreateAnd(LHS, RHS);
2929 Rep = Builder.CreateBitCast(Rep, CI->getType());
2930 } else if (Name == "avx512.kandn.w") {
2931 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2932 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2933 LHS = Builder.CreateNot(LHS);
2934 Rep = Builder.CreateAnd(LHS, RHS);
2935 Rep = Builder.CreateBitCast(Rep, CI->getType());
2936 } else if (Name == "avx512.kor.w") {
2937 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2938 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2939 Rep = Builder.CreateOr(LHS, RHS);
2940 Rep = Builder.CreateBitCast(Rep, CI->getType());
2941 } else if (Name == "avx512.kxor.w") {
2942 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2943 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2944 Rep = Builder.CreateXor(LHS, RHS);
2945 Rep = Builder.CreateBitCast(Rep, CI->getType());
2946 } else if (Name == "avx512.kxnor.w") {
2947 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2948 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2949 LHS = Builder.CreateNot(LHS);
2950 Rep = Builder.CreateXor(LHS, RHS);
2951 Rep = Builder.CreateBitCast(Rep, CI->getType());
2952 } else if (Name == "avx512.knot.w") {
2953 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2954 Rep = Builder.CreateNot(Rep);
2955 Rep = Builder.CreateBitCast(Rep, CI->getType());
2956 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2957 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2958 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2959 Rep = Builder.CreateOr(LHS, RHS);
2960 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2961 Value *C;
2962 if (Name[14] == 'c')
2963 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2964 else
2965 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2966 Rep = Builder.CreateICmpEQ(Rep, C);
2967 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2968 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2969 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2970 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2971 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2972 Type *I32Ty = Type::getInt32Ty(C);
2973 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
2974 ConstantInt::get(I32Ty, 0));
2975 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
2976 ConstantInt::get(I32Ty, 0));
2977 Value *EltOp;
2978 if (Name.contains(".add."))
2979 EltOp = Builder.CreateFAdd(Elt0, Elt1);
2980 else if (Name.contains(".sub."))
2981 EltOp = Builder.CreateFSub(Elt0, Elt1);
2982 else if (Name.contains(".mul."))
2983 EltOp = Builder.CreateFMul(Elt0, Elt1);
2984 else
2985 EltOp = Builder.CreateFDiv(Elt0, Elt1);
2986 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
2987 ConstantInt::get(I32Ty, 0));
2988 } else if (Name.starts_with("avx512.mask.pcmp")) {
2989 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
2990 bool CmpEq = Name[16] == 'e';
2991 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
2992 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
2993 Type *OpTy = CI->getArgOperand(0)->getType();
2994 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
2995 Intrinsic::ID IID;
2996 switch (VecWidth) {
2997 default:
2998 llvm_unreachable("Unexpected intrinsic");
2999 case 128:
3000 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3001 break;
3002 case 256:
3003 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3004 break;
3005 case 512:
3006 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3007 break;
3008 }
3009
3010 Rep =
3011 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3012 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3013 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3014 Type *OpTy = CI->getArgOperand(0)->getType();
3015 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3016 unsigned EltWidth = OpTy->getScalarSizeInBits();
3017 Intrinsic::ID IID;
3018 if (VecWidth == 128 && EltWidth == 32)
3019 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3020 else if (VecWidth == 256 && EltWidth == 32)
3021 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3022 else if (VecWidth == 512 && EltWidth == 32)
3023 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3024 else if (VecWidth == 128 && EltWidth == 64)
3025 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3026 else if (VecWidth == 256 && EltWidth == 64)
3027 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3028 else if (VecWidth == 512 && EltWidth == 64)
3029 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3030 else
3031 llvm_unreachable("Unexpected intrinsic");
3032
3033 Rep =
3034 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3035 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3036 } else if (Name.starts_with("avx512.cmp.p")) {
3037 SmallVector<Value *, 4> Args(CI->args());
3038 Type *OpTy = Args[0]->getType();
3039 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3040 unsigned EltWidth = OpTy->getScalarSizeInBits();
3041 Intrinsic::ID IID;
3042 if (VecWidth == 128 && EltWidth == 32)
3043 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3044 else if (VecWidth == 256 && EltWidth == 32)
3045 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3046 else if (VecWidth == 512 && EltWidth == 32)
3047 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3048 else if (VecWidth == 128 && EltWidth == 64)
3049 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3050 else if (VecWidth == 256 && EltWidth == 64)
3051 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3052 else if (VecWidth == 512 && EltWidth == 64)
3053 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3054 else
3055 llvm_unreachable("Unexpected intrinsic");
3056
3058 if (VecWidth == 512)
3059 std::swap(Mask, Args.back());
3060 Args.push_back(Mask);
3061
3062 Rep = Builder.CreateIntrinsic(IID, Args);
3063 } else if (Name.starts_with("avx512.mask.cmp.")) {
3064 // Integer compare intrinsics.
3065 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3066 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3067 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3068 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3069 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3070 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3071 Name.starts_with("avx512.cvtw2mask.") ||
3072 Name.starts_with("avx512.cvtd2mask.") ||
3073 Name.starts_with("avx512.cvtq2mask.")) {
3074 Value *Op = CI->getArgOperand(0);
3075 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3076 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3077 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3078 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3079 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3080 Name.starts_with("avx512.mask.pabs")) {
3081 Rep = upgradeAbs(Builder, *CI);
3082 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3083 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3084 Name.starts_with("avx512.mask.pmaxs")) {
3085 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3086 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3087 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3088 Name.starts_with("avx512.mask.pmaxu")) {
3089 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3090 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3091 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3092 Name.starts_with("avx512.mask.pmins")) {
3093 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3094 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3095 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3096 Name.starts_with("avx512.mask.pminu")) {
3097 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3098 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3099 Name == "avx512.pmulu.dq.512" ||
3100 Name.starts_with("avx512.mask.pmulu.dq.")) {
3101 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3102 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3103 Name == "avx512.pmul.dq.512" ||
3104 Name.starts_with("avx512.mask.pmul.dq.")) {
3105 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3106 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3107 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3108 Rep =
3109 Builder.CreateSIToFP(CI->getArgOperand(1),
3110 cast<VectorType>(CI->getType())->getElementType());
3111 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3112 } else if (Name == "avx512.cvtusi2sd") {
3113 Rep =
3114 Builder.CreateUIToFP(CI->getArgOperand(1),
3115 cast<VectorType>(CI->getType())->getElementType());
3116 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3117 } else if (Name == "sse2.cvtss2sd") {
3118 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3119 Rep = Builder.CreateFPExt(
3120 Rep, cast<VectorType>(CI->getType())->getElementType());
3121 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3122 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3123 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3124 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3125 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3126 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3127 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3128 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3129 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3130 Name == "avx512.mask.cvtqq2ps.256" ||
3131 Name == "avx512.mask.cvtqq2ps.512" ||
3132 Name == "avx512.mask.cvtuqq2ps.256" ||
3133 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3134 Name == "avx.cvt.ps2.pd.256" ||
3135 Name == "avx512.mask.cvtps2pd.128" ||
3136 Name == "avx512.mask.cvtps2pd.256") {
3137 auto *DstTy = cast<FixedVectorType>(CI->getType());
3138 Rep = CI->getArgOperand(0);
3139 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3140
3141 unsigned NumDstElts = DstTy->getNumElements();
3142 if (NumDstElts < SrcTy->getNumElements()) {
3143 assert(NumDstElts == 2 && "Unexpected vector size");
3144 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3145 }
3146
3147 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3148 bool IsUnsigned = Name.contains("cvtu");
3149 if (IsPS2PD)
3150 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3151 else if (CI->arg_size() == 4 &&
3152 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3153 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3154 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3155 : Intrinsic::x86_avx512_sitofp_round;
3156 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3157 {Rep, CI->getArgOperand(3)});
3158 } else {
3159 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3160 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3161 }
3162
3163 if (CI->arg_size() >= 3)
3164 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3165 CI->getArgOperand(1));
3166 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3167 Name.starts_with("vcvtph2ps.")) {
3168 auto *DstTy = cast<FixedVectorType>(CI->getType());
3169 Rep = CI->getArgOperand(0);
3170 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3171 unsigned NumDstElts = DstTy->getNumElements();
3172 if (NumDstElts != SrcTy->getNumElements()) {
3173 assert(NumDstElts == 4 && "Unexpected vector size");
3174 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3175 }
3176 Rep = Builder.CreateBitCast(
3177 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3178 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3179 if (CI->arg_size() >= 3)
3180 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3181 CI->getArgOperand(1));
3182 } else if (Name.starts_with("avx512.mask.load")) {
3183 // "avx512.mask.loadu." or "avx512.mask.load."
3184 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3185 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3186 CI->getArgOperand(2), Aligned);
3187 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3188 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3189 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3190 ResultTy->getNumElements());
3191
3192 Rep = Builder.CreateIntrinsic(
3193 Intrinsic::masked_expandload, ResultTy,
3194 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3195 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3196 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3197 Value *MaskVec =
3198 getX86MaskVec(Builder, CI->getArgOperand(2),
3199 cast<FixedVectorType>(ResultTy)->getNumElements());
3200
3201 Rep = Builder.CreateIntrinsic(
3202 Intrinsic::masked_compressstore, ResultTy,
3203 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3204 } else if (Name.starts_with("avx512.mask.compress.") ||
3205 Name.starts_with("avx512.mask.expand.")) {
3206 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3207
3208 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3209 ResultTy->getNumElements());
3210
3211 bool IsCompress = Name[12] == 'c';
3212 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3213 : Intrinsic::x86_avx512_mask_expand;
3214 Rep = Builder.CreateIntrinsic(
3215 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3216 } else if (Name.starts_with("xop.vpcom")) {
3217 bool IsSigned;
3218 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3219 Name.ends_with("uq"))
3220 IsSigned = false;
3221 else if (Name.ends_with("b") || Name.ends_with("w") ||
3222 Name.ends_with("d") || Name.ends_with("q"))
3223 IsSigned = true;
3224 else
3225 llvm_unreachable("Unknown suffix");
3226
3227 unsigned Imm;
3228 if (CI->arg_size() == 3) {
3229 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3230 } else {
3231 Name = Name.substr(9); // strip off "xop.vpcom"
3232 if (Name.starts_with("lt"))
3233 Imm = 0;
3234 else if (Name.starts_with("le"))
3235 Imm = 1;
3236 else if (Name.starts_with("gt"))
3237 Imm = 2;
3238 else if (Name.starts_with("ge"))
3239 Imm = 3;
3240 else if (Name.starts_with("eq"))
3241 Imm = 4;
3242 else if (Name.starts_with("ne"))
3243 Imm = 5;
3244 else if (Name.starts_with("false"))
3245 Imm = 6;
3246 else if (Name.starts_with("true"))
3247 Imm = 7;
3248 else
3249 llvm_unreachable("Unknown condition");
3250 }
3251
3252 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3253 } else if (Name.starts_with("xop.vpcmov")) {
3254 Value *Sel = CI->getArgOperand(2);
3255 Value *NotSel = Builder.CreateNot(Sel);
3256 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3257 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3258 Rep = Builder.CreateOr(Sel0, Sel1);
3259 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3260 Name.starts_with("avx512.mask.prol")) {
3261 Rep = upgradeX86Rotate(Builder, *CI, false);
3262 } else if (Name.starts_with("avx512.pror") ||
3263 Name.starts_with("avx512.mask.pror")) {
3264 Rep = upgradeX86Rotate(Builder, *CI, true);
3265 } else if (Name.starts_with("avx512.vpshld.") ||
3266 Name.starts_with("avx512.mask.vpshld") ||
3267 Name.starts_with("avx512.maskz.vpshld")) {
3268 bool ZeroMask = Name[11] == 'z';
3269 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3270 } else if (Name.starts_with("avx512.vpshrd.") ||
3271 Name.starts_with("avx512.mask.vpshrd") ||
3272 Name.starts_with("avx512.maskz.vpshrd")) {
3273 bool ZeroMask = Name[11] == 'z';
3274 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3275 } else if (Name == "sse42.crc32.64.8") {
3276 Value *Trunc0 =
3277 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3278 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3279 {Trunc0, CI->getArgOperand(1)});
3280 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3281 } else if (Name.starts_with("avx.vbroadcast.s") ||
3282 Name.starts_with("avx512.vbroadcast.s")) {
3283 // Replace broadcasts with a series of insertelements.
3284 auto *VecTy = cast<FixedVectorType>(CI->getType());
3285 Type *EltTy = VecTy->getElementType();
3286 unsigned EltNum = VecTy->getNumElements();
3287 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3288 Type *I32Ty = Type::getInt32Ty(C);
3289 Rep = PoisonValue::get(VecTy);
3290 for (unsigned I = 0; I < EltNum; ++I)
3291 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3292 } else if (Name.starts_with("sse41.pmovsx") ||
3293 Name.starts_with("sse41.pmovzx") ||
3294 Name.starts_with("avx2.pmovsx") ||
3295 Name.starts_with("avx2.pmovzx") ||
3296 Name.starts_with("avx512.mask.pmovsx") ||
3297 Name.starts_with("avx512.mask.pmovzx")) {
3298 auto *DstTy = cast<FixedVectorType>(CI->getType());
3299 unsigned NumDstElts = DstTy->getNumElements();
3300
3301 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3302 SmallVector<int, 8> ShuffleMask(NumDstElts);
3303 for (unsigned i = 0; i != NumDstElts; ++i)
3304 ShuffleMask[i] = i;
3305
3306 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3307
3308 bool DoSext = Name.contains("pmovsx");
3309 Rep =
3310 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3311 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3312 if (CI->arg_size() == 3)
3313 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3314 CI->getArgOperand(1));
3315 } else if (Name == "avx512.mask.pmov.qd.256" ||
3316 Name == "avx512.mask.pmov.qd.512" ||
3317 Name == "avx512.mask.pmov.wb.256" ||
3318 Name == "avx512.mask.pmov.wb.512") {
3319 Type *Ty = CI->getArgOperand(1)->getType();
3320 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3321 Rep =
3322 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3323 } else if (Name.starts_with("avx.vbroadcastf128") ||
3324 Name == "avx2.vbroadcasti128") {
3325 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3326 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3327 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3328 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3329 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3330 if (NumSrcElts == 2)
3331 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3332 else
3333 Rep = Builder.CreateShuffleVector(Load,
3334 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3335 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3336 Name.starts_with("avx512.mask.shuf.f")) {
3337 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3338 Type *VT = CI->getType();
3339 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3340 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3341 unsigned ControlBitsMask = NumLanes - 1;
3342 unsigned NumControlBits = NumLanes / 2;
3343 SmallVector<int, 8> ShuffleMask(0);
3344
3345 for (unsigned l = 0; l != NumLanes; ++l) {
3346 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3347 // We actually need the other source.
3348 if (l >= NumLanes / 2)
3349 LaneMask += NumLanes;
3350 for (unsigned i = 0; i != NumElementsInLane; ++i)
3351 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3352 }
3353 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3354 CI->getArgOperand(1), ShuffleMask);
3355 Rep =
3356 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3357 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3358 Name.starts_with("avx512.mask.broadcasti")) {
3359 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3360 ->getNumElements();
3361 unsigned NumDstElts =
3362 cast<FixedVectorType>(CI->getType())->getNumElements();
3363
3364 SmallVector<int, 8> ShuffleMask(NumDstElts);
3365 for (unsigned i = 0; i != NumDstElts; ++i)
3366 ShuffleMask[i] = i % NumSrcElts;
3367
3368 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3369 CI->getArgOperand(0), ShuffleMask);
3370 Rep =
3371 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3372 } else if (Name.starts_with("avx2.pbroadcast") ||
3373 Name.starts_with("avx2.vbroadcast") ||
3374 Name.starts_with("avx512.pbroadcast") ||
3375 Name.starts_with("avx512.mask.broadcast.s")) {
3376 // Replace vp?broadcasts with a vector shuffle.
3377 Value *Op = CI->getArgOperand(0);
3378 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3379 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3382 Rep = Builder.CreateShuffleVector(Op, M);
3383
3384 if (CI->arg_size() == 3)
3385 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3386 CI->getArgOperand(1));
3387 } else if (Name.starts_with("sse2.padds.") ||
3388 Name.starts_with("avx2.padds.") ||
3389 Name.starts_with("avx512.padds.") ||
3390 Name.starts_with("avx512.mask.padds.")) {
3391 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3392 } else if (Name.starts_with("sse2.psubs.") ||
3393 Name.starts_with("avx2.psubs.") ||
3394 Name.starts_with("avx512.psubs.") ||
3395 Name.starts_with("avx512.mask.psubs.")) {
3396 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3397 } else if (Name.starts_with("sse2.paddus.") ||
3398 Name.starts_with("avx2.paddus.") ||
3399 Name.starts_with("avx512.mask.paddus.")) {
3400 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3401 } else if (Name.starts_with("sse2.psubus.") ||
3402 Name.starts_with("avx2.psubus.") ||
3403 Name.starts_with("avx512.mask.psubus.")) {
3404 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3405 } else if (Name.starts_with("avx512.mask.palignr.")) {
3406 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3407 CI->getArgOperand(1), CI->getArgOperand(2),
3408 CI->getArgOperand(3), CI->getArgOperand(4),
3409 false);
3410 } else if (Name.starts_with("avx512.mask.valign.")) {
3412 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3413 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3414 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3415 // 128/256-bit shift left specified in bits.
3416 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3417 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3418 Shift / 8); // Shift is in bits.
3419 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3420 // 128/256-bit shift right specified in bits.
3421 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3422 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3423 Shift / 8); // Shift is in bits.
3424 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3425 Name == "avx512.psll.dq.512") {
3426 // 128/256/512-bit shift left specified in bytes.
3427 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3428 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3429 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3430 Name == "avx512.psrl.dq.512") {
3431 // 128/256/512-bit shift right specified in bytes.
3432 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3433 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3434 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3435 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3436 Name.starts_with("avx2.pblendd.")) {
3437 Value *Op0 = CI->getArgOperand(0);
3438 Value *Op1 = CI->getArgOperand(1);
3439 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3440 auto *VecTy = cast<FixedVectorType>(CI->getType());
3441 unsigned NumElts = VecTy->getNumElements();
3442
3443 SmallVector<int, 16> Idxs(NumElts);
3444 for (unsigned i = 0; i != NumElts; ++i)
3445 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3446
3447 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3448 } else if (Name.starts_with("avx.vinsertf128.") ||
3449 Name == "avx2.vinserti128" ||
3450 Name.starts_with("avx512.mask.insert")) {
3451 Value *Op0 = CI->getArgOperand(0);
3452 Value *Op1 = CI->getArgOperand(1);
3453 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3454 unsigned DstNumElts =
3455 cast<FixedVectorType>(CI->getType())->getNumElements();
3456 unsigned SrcNumElts =
3457 cast<FixedVectorType>(Op1->getType())->getNumElements();
3458 unsigned Scale = DstNumElts / SrcNumElts;
3459
3460 // Mask off the high bits of the immediate value; hardware ignores those.
3461 Imm = Imm % Scale;
3462
3463 // Extend the second operand into a vector the size of the destination.
3464 SmallVector<int, 8> Idxs(DstNumElts);
3465 for (unsigned i = 0; i != SrcNumElts; ++i)
3466 Idxs[i] = i;
3467 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3468 Idxs[i] = SrcNumElts;
3469 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3470
3471 // Insert the second operand into the first operand.
3472
3473 // Note that there is no guarantee that instruction lowering will actually
3474 // produce a vinsertf128 instruction for the created shuffles. In
3475 // particular, the 0 immediate case involves no lane changes, so it can
3476 // be handled as a blend.
3477
3478 // Example of shuffle mask for 32-bit elements:
3479 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3480 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3481
3482 // First fill with identify mask.
3483 for (unsigned i = 0; i != DstNumElts; ++i)
3484 Idxs[i] = i;
3485 // Then replace the elements where we need to insert.
3486 for (unsigned i = 0; i != SrcNumElts; ++i)
3487 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3488 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3489
3490 // If the intrinsic has a mask operand, handle that.
3491 if (CI->arg_size() == 5)
3492 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3493 CI->getArgOperand(3));
3494 } else if (Name.starts_with("avx.vextractf128.") ||
3495 Name == "avx2.vextracti128" ||
3496 Name.starts_with("avx512.mask.vextract")) {
3497 Value *Op0 = CI->getArgOperand(0);
3498 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3499 unsigned DstNumElts =
3500 cast<FixedVectorType>(CI->getType())->getNumElements();
3501 unsigned SrcNumElts =
3502 cast<FixedVectorType>(Op0->getType())->getNumElements();
3503 unsigned Scale = SrcNumElts / DstNumElts;
3504
3505 // Mask off the high bits of the immediate value; hardware ignores those.
3506 Imm = Imm % Scale;
3507
3508 // Get indexes for the subvector of the input vector.
3509 SmallVector<int, 8> Idxs(DstNumElts);
3510 for (unsigned i = 0; i != DstNumElts; ++i) {
3511 Idxs[i] = i + (Imm * DstNumElts);
3512 }
3513 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3514
3515 // If the intrinsic has a mask operand, handle that.
3516 if (CI->arg_size() == 4)
3517 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3518 CI->getArgOperand(2));
3519 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3520 Name.starts_with("avx512.mask.perm.di.")) {
3521 Value *Op0 = CI->getArgOperand(0);
3522 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3523 auto *VecTy = cast<FixedVectorType>(CI->getType());
3524 unsigned NumElts = VecTy->getNumElements();
3525
3526 SmallVector<int, 8> Idxs(NumElts);
3527 for (unsigned i = 0; i != NumElts; ++i)
3528 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3529
3530 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3531
3532 if (CI->arg_size() == 4)
3533 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3534 CI->getArgOperand(2));
3535 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3536 // The immediate permute control byte looks like this:
3537 // [1:0] - select 128 bits from sources for low half of destination
3538 // [2] - ignore
3539 // [3] - zero low half of destination
3540 // [5:4] - select 128 bits from sources for high half of destination
3541 // [6] - ignore
3542 // [7] - zero high half of destination
3543
3544 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3545
3546 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3547 unsigned HalfSize = NumElts / 2;
3548 SmallVector<int, 8> ShuffleMask(NumElts);
3549
3550 // Determine which operand(s) are actually in use for this instruction.
3551 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3552 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3553
3554 // If needed, replace operands based on zero mask.
3555 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3556 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3557
3558 // Permute low half of result.
3559 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3560 for (unsigned i = 0; i < HalfSize; ++i)
3561 ShuffleMask[i] = StartIndex + i;
3562
3563 // Permute high half of result.
3564 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3565 for (unsigned i = 0; i < HalfSize; ++i)
3566 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3567
3568 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3569
3570 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3571 Name.starts_with("avx512.mask.vpermil.p") ||
3572 Name.starts_with("avx512.mask.pshuf.d.")) {
3573 Value *Op0 = CI->getArgOperand(0);
3574 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3575 auto *VecTy = cast<FixedVectorType>(CI->getType());
3576 unsigned NumElts = VecTy->getNumElements();
3577 // Calculate the size of each index in the immediate.
3578 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3579 unsigned IdxMask = ((1 << IdxSize) - 1);
3580
3581 SmallVector<int, 8> Idxs(NumElts);
3582 // Lookup the bits for this element, wrapping around the immediate every
3583 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3584 // to offset by the first index of each group.
3585 for (unsigned i = 0; i != NumElts; ++i)
3586 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3587
3588 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3589
3590 if (CI->arg_size() == 4)
3591 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3592 CI->getArgOperand(2));
3593 } else if (Name == "sse2.pshufl.w" ||
3594 Name.starts_with("avx512.mask.pshufl.w.")) {
3595 Value *Op0 = CI->getArgOperand(0);
3596 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3597 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3598
3599 SmallVector<int, 16> Idxs(NumElts);
3600 for (unsigned l = 0; l != NumElts; l += 8) {
3601 for (unsigned i = 0; i != 4; ++i)
3602 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3603 for (unsigned i = 4; i != 8; ++i)
3604 Idxs[i + l] = i + l;
3605 }
3606
3607 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3608
3609 if (CI->arg_size() == 4)
3610 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3611 CI->getArgOperand(2));
3612 } else if (Name == "sse2.pshufh.w" ||
3613 Name.starts_with("avx512.mask.pshufh.w.")) {
3614 Value *Op0 = CI->getArgOperand(0);
3615 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3616 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3617
3618 SmallVector<int, 16> Idxs(NumElts);
3619 for (unsigned l = 0; l != NumElts; l += 8) {
3620 for (unsigned i = 0; i != 4; ++i)
3621 Idxs[i + l] = i + l;
3622 for (unsigned i = 0; i != 4; ++i)
3623 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3624 }
3625
3626 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3627
3628 if (CI->arg_size() == 4)
3629 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3630 CI->getArgOperand(2));
3631 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3632 Value *Op0 = CI->getArgOperand(0);
3633 Value *Op1 = CI->getArgOperand(1);
3634 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3635 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3636
3637 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3638 unsigned HalfLaneElts = NumLaneElts / 2;
3639
3640 SmallVector<int, 16> Idxs(NumElts);
3641 for (unsigned i = 0; i != NumElts; ++i) {
3642 // Base index is the starting element of the lane.
3643 Idxs[i] = i - (i % NumLaneElts);
3644 // If we are half way through the lane switch to the other source.
3645 if ((i % NumLaneElts) >= HalfLaneElts)
3646 Idxs[i] += NumElts;
3647 // Now select the specific element. By adding HalfLaneElts bits from
3648 // the immediate. Wrapping around the immediate every 8-bits.
3649 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3650 }
3651
3652 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3653
3654 Rep =
3655 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3656 } else if (Name.starts_with("avx512.mask.movddup") ||
3657 Name.starts_with("avx512.mask.movshdup") ||
3658 Name.starts_with("avx512.mask.movsldup")) {
3659 Value *Op0 = CI->getArgOperand(0);
3660 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3661 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3662
3663 unsigned Offset = 0;
3664 if (Name.starts_with("avx512.mask.movshdup."))
3665 Offset = 1;
3666
3667 SmallVector<int, 16> Idxs(NumElts);
3668 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3669 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3670 Idxs[i + l + 0] = i + l + Offset;
3671 Idxs[i + l + 1] = i + l + Offset;
3672 }
3673
3674 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3675
3676 Rep =
3677 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3678 } else if (Name.starts_with("avx512.mask.punpckl") ||
3679 Name.starts_with("avx512.mask.unpckl.")) {
3680 Value *Op0 = CI->getArgOperand(0);
3681 Value *Op1 = CI->getArgOperand(1);
3682 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3683 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3684
3685 SmallVector<int, 64> Idxs(NumElts);
3686 for (int l = 0; l != NumElts; l += NumLaneElts)
3687 for (int i = 0; i != NumLaneElts; ++i)
3688 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3689
3690 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3691
3692 Rep =
3693 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3694 } else if (Name.starts_with("avx512.mask.punpckh") ||
3695 Name.starts_with("avx512.mask.unpckh.")) {
3696 Value *Op0 = CI->getArgOperand(0);
3697 Value *Op1 = CI->getArgOperand(1);
3698 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3699 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3700
3701 SmallVector<int, 64> Idxs(NumElts);
3702 for (int l = 0; l != NumElts; l += NumLaneElts)
3703 for (int i = 0; i != NumLaneElts; ++i)
3704 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3705
3706 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3707
3708 Rep =
3709 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3710 } else if (Name.starts_with("avx512.mask.and.") ||
3711 Name.starts_with("avx512.mask.pand.")) {
3712 VectorType *FTy = cast<VectorType>(CI->getType());
3714 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3715 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3716 Rep = Builder.CreateBitCast(Rep, FTy);
3717 Rep =
3718 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3719 } else if (Name.starts_with("avx512.mask.andn.") ||
3720 Name.starts_with("avx512.mask.pandn.")) {
3721 VectorType *FTy = cast<VectorType>(CI->getType());
3723 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3724 Rep = Builder.CreateAnd(Rep,
3725 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3726 Rep = Builder.CreateBitCast(Rep, FTy);
3727 Rep =
3728 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3729 } else if (Name.starts_with("avx512.mask.or.") ||
3730 Name.starts_with("avx512.mask.por.")) {
3731 VectorType *FTy = cast<VectorType>(CI->getType());
3733 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3734 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3735 Rep = Builder.CreateBitCast(Rep, FTy);
3736 Rep =
3737 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3738 } else if (Name.starts_with("avx512.mask.xor.") ||
3739 Name.starts_with("avx512.mask.pxor.")) {
3740 VectorType *FTy = cast<VectorType>(CI->getType());
3742 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3743 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3744 Rep = Builder.CreateBitCast(Rep, FTy);
3745 Rep =
3746 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3747 } else if (Name.starts_with("avx512.mask.padd.")) {
3748 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3749 Rep =
3750 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3751 } else if (Name.starts_with("avx512.mask.psub.")) {
3752 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3753 Rep =
3754 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3755 } else if (Name.starts_with("avx512.mask.pmull.")) {
3756 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3757 Rep =
3758 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3759 } else if (Name.starts_with("avx512.mask.add.p")) {
3760 if (Name.ends_with(".512")) {
3761 Intrinsic::ID IID;
3762 if (Name[17] == 's')
3763 IID = Intrinsic::x86_avx512_add_ps_512;
3764 else
3765 IID = Intrinsic::x86_avx512_add_pd_512;
3766
3767 Rep = Builder.CreateIntrinsic(
3768 IID,
3769 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3770 } else {
3771 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3772 }
3773 Rep =
3774 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3775 } else if (Name.starts_with("avx512.mask.div.p")) {
3776 if (Name.ends_with(".512")) {
3777 Intrinsic::ID IID;
3778 if (Name[17] == 's')
3779 IID = Intrinsic::x86_avx512_div_ps_512;
3780 else
3781 IID = Intrinsic::x86_avx512_div_pd_512;
3782
3783 Rep = Builder.CreateIntrinsic(
3784 IID,
3785 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3786 } else {
3787 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3788 }
3789 Rep =
3790 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3791 } else if (Name.starts_with("avx512.mask.mul.p")) {
3792 if (Name.ends_with(".512")) {
3793 Intrinsic::ID IID;
3794 if (Name[17] == 's')
3795 IID = Intrinsic::x86_avx512_mul_ps_512;
3796 else
3797 IID = Intrinsic::x86_avx512_mul_pd_512;
3798
3799 Rep = Builder.CreateIntrinsic(
3800 IID,
3801 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3802 } else {
3803 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3804 }
3805 Rep =
3806 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3807 } else if (Name.starts_with("avx512.mask.sub.p")) {
3808 if (Name.ends_with(".512")) {
3809 Intrinsic::ID IID;
3810 if (Name[17] == 's')
3811 IID = Intrinsic::x86_avx512_sub_ps_512;
3812 else
3813 IID = Intrinsic::x86_avx512_sub_pd_512;
3814
3815 Rep = Builder.CreateIntrinsic(
3816 IID,
3817 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3818 } else {
3819 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3820 }
3821 Rep =
3822 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3823 } else if ((Name.starts_with("avx512.mask.max.p") ||
3824 Name.starts_with("avx512.mask.min.p")) &&
3825 Name.drop_front(18) == ".512") {
3826 bool IsDouble = Name[17] == 'd';
3827 bool IsMin = Name[13] == 'i';
3828 static const Intrinsic::ID MinMaxTbl[2][2] = {
3829 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3830 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3831 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3832
3833 Rep = Builder.CreateIntrinsic(
3834 IID,
3835 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3836 Rep =
3837 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3838 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3839 Rep =
3840 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3841 {CI->getArgOperand(0), Builder.getInt1(false)});
3842 Rep =
3843 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3844 } else if (Name.starts_with("avx512.mask.psll")) {
3845 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3846 bool IsVariable = Name[16] == 'v';
3847 char Size = Name[16] == '.' ? Name[17]
3848 : Name[17] == '.' ? Name[18]
3849 : Name[18] == '.' ? Name[19]
3850 : Name[20];
3851
3852 Intrinsic::ID IID;
3853 if (IsVariable && Name[17] != '.') {
3854 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3855 IID = Intrinsic::x86_avx2_psllv_q;
3856 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3857 IID = Intrinsic::x86_avx2_psllv_q_256;
3858 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3859 IID = Intrinsic::x86_avx2_psllv_d;
3860 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3861 IID = Intrinsic::x86_avx2_psllv_d_256;
3862 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3863 IID = Intrinsic::x86_avx512_psllv_w_128;
3864 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3865 IID = Intrinsic::x86_avx512_psllv_w_256;
3866 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3867 IID = Intrinsic::x86_avx512_psllv_w_512;
3868 else
3869 llvm_unreachable("Unexpected size");
3870 } else if (Name.ends_with(".128")) {
3871 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3872 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3873 : Intrinsic::x86_sse2_psll_d;
3874 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3875 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3876 : Intrinsic::x86_sse2_psll_q;
3877 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3878 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3879 : Intrinsic::x86_sse2_psll_w;
3880 else
3881 llvm_unreachable("Unexpected size");
3882 } else if (Name.ends_with(".256")) {
3883 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3884 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3885 : Intrinsic::x86_avx2_psll_d;
3886 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3887 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3888 : Intrinsic::x86_avx2_psll_q;
3889 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3890 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3891 : Intrinsic::x86_avx2_psll_w;
3892 else
3893 llvm_unreachable("Unexpected size");
3894 } else {
3895 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3896 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3897 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3898 : Intrinsic::x86_avx512_psll_d_512;
3899 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3900 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3901 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3902 : Intrinsic::x86_avx512_psll_q_512;
3903 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3904 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3905 : Intrinsic::x86_avx512_psll_w_512;
3906 else
3907 llvm_unreachable("Unexpected size");
3908 }
3909
3910 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3911 } else if (Name.starts_with("avx512.mask.psrl")) {
3912 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3913 bool IsVariable = Name[16] == 'v';
3914 char Size = Name[16] == '.' ? Name[17]
3915 : Name[17] == '.' ? Name[18]
3916 : Name[18] == '.' ? Name[19]
3917 : Name[20];
3918
3919 Intrinsic::ID IID;
3920 if (IsVariable && Name[17] != '.') {
3921 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3922 IID = Intrinsic::x86_avx2_psrlv_q;
3923 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3924 IID = Intrinsic::x86_avx2_psrlv_q_256;
3925 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3926 IID = Intrinsic::x86_avx2_psrlv_d;
3927 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3928 IID = Intrinsic::x86_avx2_psrlv_d_256;
3929 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3930 IID = Intrinsic::x86_avx512_psrlv_w_128;
3931 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3932 IID = Intrinsic::x86_avx512_psrlv_w_256;
3933 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3934 IID = Intrinsic::x86_avx512_psrlv_w_512;
3935 else
3936 llvm_unreachable("Unexpected size");
3937 } else if (Name.ends_with(".128")) {
3938 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3939 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3940 : Intrinsic::x86_sse2_psrl_d;
3941 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3942 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3943 : Intrinsic::x86_sse2_psrl_q;
3944 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3945 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3946 : Intrinsic::x86_sse2_psrl_w;
3947 else
3948 llvm_unreachable("Unexpected size");
3949 } else if (Name.ends_with(".256")) {
3950 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3951 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3952 : Intrinsic::x86_avx2_psrl_d;
3953 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3954 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3955 : Intrinsic::x86_avx2_psrl_q;
3956 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3957 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3958 : Intrinsic::x86_avx2_psrl_w;
3959 else
3960 llvm_unreachable("Unexpected size");
3961 } else {
3962 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3963 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3964 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3965 : Intrinsic::x86_avx512_psrl_d_512;
3966 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3967 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3968 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3969 : Intrinsic::x86_avx512_psrl_q_512;
3970 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3971 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3972 : Intrinsic::x86_avx512_psrl_w_512;
3973 else
3974 llvm_unreachable("Unexpected size");
3975 }
3976
3977 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3978 } else if (Name.starts_with("avx512.mask.psra")) {
3979 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3980 bool IsVariable = Name[16] == 'v';
3981 char Size = Name[16] == '.' ? Name[17]
3982 : Name[17] == '.' ? Name[18]
3983 : Name[18] == '.' ? Name[19]
3984 : Name[20];
3985
3986 Intrinsic::ID IID;
3987 if (IsVariable && Name[17] != '.') {
3988 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
3989 IID = Intrinsic::x86_avx2_psrav_d;
3990 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
3991 IID = Intrinsic::x86_avx2_psrav_d_256;
3992 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
3993 IID = Intrinsic::x86_avx512_psrav_w_128;
3994 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
3995 IID = Intrinsic::x86_avx512_psrav_w_256;
3996 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
3997 IID = Intrinsic::x86_avx512_psrav_w_512;
3998 else
3999 llvm_unreachable("Unexpected size");
4000 } else if (Name.ends_with(".128")) {
4001 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4002 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4003 : Intrinsic::x86_sse2_psra_d;
4004 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4005 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4006 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4007 : Intrinsic::x86_avx512_psra_q_128;
4008 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4009 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4010 : Intrinsic::x86_sse2_psra_w;
4011 else
4012 llvm_unreachable("Unexpected size");
4013 } else if (Name.ends_with(".256")) {
4014 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4015 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4016 : Intrinsic::x86_avx2_psra_d;
4017 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4018 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4019 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4020 : Intrinsic::x86_avx512_psra_q_256;
4021 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4022 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4023 : Intrinsic::x86_avx2_psra_w;
4024 else
4025 llvm_unreachable("Unexpected size");
4026 } else {
4027 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4028 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4029 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4030 : Intrinsic::x86_avx512_psra_d_512;
4031 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4032 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4033 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4034 : Intrinsic::x86_avx512_psra_q_512;
4035 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4036 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4037 : Intrinsic::x86_avx512_psra_w_512;
4038 else
4039 llvm_unreachable("Unexpected size");
4040 }
4041
4042 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4043 } else if (Name.starts_with("avx512.mask.move.s")) {
4044 Rep = upgradeMaskedMove(Builder, *CI);
4045 } else if (Name.starts_with("avx512.cvtmask2")) {
4046 Rep = upgradeMaskToInt(Builder, *CI);
4047 } else if (Name.ends_with(".movntdqa")) {
4049 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4050
4051 LoadInst *LI = Builder.CreateAlignedLoad(
4052 CI->getType(), CI->getArgOperand(0),
4054 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4055 Rep = LI;
4056 } else if (Name.starts_with("fma.vfmadd.") ||
4057 Name.starts_with("fma.vfmsub.") ||
4058 Name.starts_with("fma.vfnmadd.") ||
4059 Name.starts_with("fma.vfnmsub.")) {
4060 bool NegMul = Name[6] == 'n';
4061 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4062 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4063
4064 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4065 CI->getArgOperand(2)};
4066
4067 if (IsScalar) {
4068 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4069 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4070 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4071 }
4072
4073 if (NegMul && !IsScalar)
4074 Ops[0] = Builder.CreateFNeg(Ops[0]);
4075 if (NegMul && IsScalar)
4076 Ops[1] = Builder.CreateFNeg(Ops[1]);
4077 if (NegAcc)
4078 Ops[2] = Builder.CreateFNeg(Ops[2]);
4079
4080 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4081
4082 if (IsScalar)
4083 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4084 } else if (Name.starts_with("fma4.vfmadd.s")) {
4085 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4086 CI->getArgOperand(2)};
4087
4088 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4089 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4090 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4091
4092 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4093
4094 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4095 Rep, (uint64_t)0);
4096 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4097 Name.starts_with("avx512.maskz.vfmadd.s") ||
4098 Name.starts_with("avx512.mask3.vfmadd.s") ||
4099 Name.starts_with("avx512.mask3.vfmsub.s") ||
4100 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4101 bool IsMask3 = Name[11] == '3';
4102 bool IsMaskZ = Name[11] == 'z';
4103 // Drop the "avx512.mask." to make it easier.
4104 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4105 bool NegMul = Name[2] == 'n';
4106 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4107
4108 Value *A = CI->getArgOperand(0);
4109 Value *B = CI->getArgOperand(1);
4110 Value *C = CI->getArgOperand(2);
4111
4112 if (NegMul && (IsMask3 || IsMaskZ))
4113 A = Builder.CreateFNeg(A);
4114 if (NegMul && !(IsMask3 || IsMaskZ))
4115 B = Builder.CreateFNeg(B);
4116 if (NegAcc)
4117 C = Builder.CreateFNeg(C);
4118
4119 A = Builder.CreateExtractElement(A, (uint64_t)0);
4120 B = Builder.CreateExtractElement(B, (uint64_t)0);
4121 C = Builder.CreateExtractElement(C, (uint64_t)0);
4122
4123 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4124 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4125 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4126
4127 Intrinsic::ID IID;
4128 if (Name.back() == 'd')
4129 IID = Intrinsic::x86_avx512_vfmadd_f64;
4130 else
4131 IID = Intrinsic::x86_avx512_vfmadd_f32;
4132 Rep = Builder.CreateIntrinsic(IID, Ops);
4133 } else {
4134 Rep = Builder.CreateFMA(A, B, C);
4135 }
4136
4137 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4138 : IsMask3 ? C
4139 : A;
4140
4141 // For Mask3 with NegAcc, we need to create a new extractelement that
4142 // avoids the negation above.
4143 if (NegAcc && IsMask3)
4144 PassThru =
4145 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4146
4147 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4148 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4149 (uint64_t)0);
4150 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4151 Name.starts_with("avx512.mask.vfnmadd.p") ||
4152 Name.starts_with("avx512.mask.vfnmsub.p") ||
4153 Name.starts_with("avx512.mask3.vfmadd.p") ||
4154 Name.starts_with("avx512.mask3.vfmsub.p") ||
4155 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4156 Name.starts_with("avx512.maskz.vfmadd.p")) {
4157 bool IsMask3 = Name[11] == '3';
4158 bool IsMaskZ = Name[11] == 'z';
4159 // Drop the "avx512.mask." to make it easier.
4160 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4161 bool NegMul = Name[2] == 'n';
4162 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4163
4164 Value *A = CI->getArgOperand(0);
4165 Value *B = CI->getArgOperand(1);
4166 Value *C = CI->getArgOperand(2);
4167
4168 if (NegMul && (IsMask3 || IsMaskZ))
4169 A = Builder.CreateFNeg(A);
4170 if (NegMul && !(IsMask3 || IsMaskZ))
4171 B = Builder.CreateFNeg(B);
4172 if (NegAcc)
4173 C = Builder.CreateFNeg(C);
4174
4175 if (CI->arg_size() == 5 &&
4176 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4177 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4178 Intrinsic::ID IID;
4179 // Check the character before ".512" in string.
4180 if (Name[Name.size() - 5] == 's')
4181 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4182 else
4183 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4184
4185 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4186 } else {
4187 Rep = Builder.CreateFMA(A, B, C);
4188 }
4189
4190 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4191 : IsMask3 ? CI->getArgOperand(2)
4192 : CI->getArgOperand(0);
4193
4194 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4195 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4196 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4197 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4198 Intrinsic::ID IID;
4199 if (VecWidth == 128 && EltWidth == 32)
4200 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4201 else if (VecWidth == 256 && EltWidth == 32)
4202 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4203 else if (VecWidth == 128 && EltWidth == 64)
4204 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4205 else if (VecWidth == 256 && EltWidth == 64)
4206 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4207 else
4208 llvm_unreachable("Unexpected intrinsic");
4209
4210 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4211 CI->getArgOperand(2)};
4212 Ops[2] = Builder.CreateFNeg(Ops[2]);
4213 Rep = Builder.CreateIntrinsic(IID, Ops);
4214 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4215 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4216 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4217 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4218 bool IsMask3 = Name[11] == '3';
4219 bool IsMaskZ = Name[11] == 'z';
4220 // Drop the "avx512.mask." to make it easier.
4221 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4222 bool IsSubAdd = Name[3] == 's';
4223 if (CI->arg_size() == 5) {
4224 Intrinsic::ID IID;
4225 // Check the character before ".512" in string.
4226 if (Name[Name.size() - 5] == 's')
4227 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4228 else
4229 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4230
4231 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4232 CI->getArgOperand(2), CI->getArgOperand(4)};
4233 if (IsSubAdd)
4234 Ops[2] = Builder.CreateFNeg(Ops[2]);
4235
4236 Rep = Builder.CreateIntrinsic(IID, Ops);
4237 } else {
4238 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4239
4240 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4241 CI->getArgOperand(2)};
4242
4244 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4245 Value *Odd = Builder.CreateCall(FMA, Ops);
4246 Ops[2] = Builder.CreateFNeg(Ops[2]);
4247 Value *Even = Builder.CreateCall(FMA, Ops);
4248
4249 if (IsSubAdd)
4250 std::swap(Even, Odd);
4251
4252 SmallVector<int, 32> Idxs(NumElts);
4253 for (int i = 0; i != NumElts; ++i)
4254 Idxs[i] = i + (i % 2) * NumElts;
4255
4256 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4257 }
4258
4259 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4260 : IsMask3 ? CI->getArgOperand(2)
4261 : CI->getArgOperand(0);
4262
4263 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4264 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4265 Name.starts_with("avx512.maskz.pternlog.")) {
4266 bool ZeroMask = Name[11] == 'z';
4267 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4268 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4269 Intrinsic::ID IID;
4270 if (VecWidth == 128 && EltWidth == 32)
4271 IID = Intrinsic::x86_avx512_pternlog_d_128;
4272 else if (VecWidth == 256 && EltWidth == 32)
4273 IID = Intrinsic::x86_avx512_pternlog_d_256;
4274 else if (VecWidth == 512 && EltWidth == 32)
4275 IID = Intrinsic::x86_avx512_pternlog_d_512;
4276 else if (VecWidth == 128 && EltWidth == 64)
4277 IID = Intrinsic::x86_avx512_pternlog_q_128;
4278 else if (VecWidth == 256 && EltWidth == 64)
4279 IID = Intrinsic::x86_avx512_pternlog_q_256;
4280 else if (VecWidth == 512 && EltWidth == 64)
4281 IID = Intrinsic::x86_avx512_pternlog_q_512;
4282 else
4283 llvm_unreachable("Unexpected intrinsic");
4284
4285 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4286 CI->getArgOperand(2), CI->getArgOperand(3)};
4287 Rep = Builder.CreateIntrinsic(IID, Args);
4288 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4289 : CI->getArgOperand(0);
4290 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4291 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4292 Name.starts_with("avx512.maskz.vpmadd52")) {
4293 bool ZeroMask = Name[11] == 'z';
4294 bool High = Name[20] == 'h' || Name[21] == 'h';
4295 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4296 Intrinsic::ID IID;
4297 if (VecWidth == 128 && !High)
4298 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4299 else if (VecWidth == 256 && !High)
4300 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4301 else if (VecWidth == 512 && !High)
4302 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4303 else if (VecWidth == 128 && High)
4304 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4305 else if (VecWidth == 256 && High)
4306 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4307 else if (VecWidth == 512 && High)
4308 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4309 else
4310 llvm_unreachable("Unexpected intrinsic");
4311
4312 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4313 CI->getArgOperand(2)};
4314 Rep = Builder.CreateIntrinsic(IID, Args);
4315 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4316 : CI->getArgOperand(0);
4317 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4318 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4319 Name.starts_with("avx512.mask.vpermt2var.") ||
4320 Name.starts_with("avx512.maskz.vpermt2var.")) {
4321 bool ZeroMask = Name[11] == 'z';
4322 bool IndexForm = Name[17] == 'i';
4323 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4324 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4325 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4326 Name.starts_with("avx512.mask.vpdpbusds.") ||
4327 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4328 bool ZeroMask = Name[11] == 'z';
4329 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4330 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4331 Intrinsic::ID IID;
4332 if (VecWidth == 128 && !IsSaturating)
4333 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4334 else if (VecWidth == 256 && !IsSaturating)
4335 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4336 else if (VecWidth == 512 && !IsSaturating)
4337 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4338 else if (VecWidth == 128 && IsSaturating)
4339 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4340 else if (VecWidth == 256 && IsSaturating)
4341 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4342 else if (VecWidth == 512 && IsSaturating)
4343 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4344 else
4345 llvm_unreachable("Unexpected intrinsic");
4346
4347 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4348 CI->getArgOperand(2)};
4349
4350 // Input arguments types were incorrectly set to vectors of i32 before but
4351 // they should be vectors of i8. Insert bit cast when encountering the old
4352 // types
4353 if (Args[1]->getType()->isVectorTy() &&
4354 cast<VectorType>(Args[1]->getType())
4355 ->getElementType()
4356 ->isIntegerTy(32) &&
4357 Args[2]->getType()->isVectorTy() &&
4358 cast<VectorType>(Args[2]->getType())
4359 ->getElementType()
4360 ->isIntegerTy(32)) {
4361 Type *NewArgType = nullptr;
4362 if (VecWidth == 128)
4363 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4364 else if (VecWidth == 256)
4365 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4366 else if (VecWidth == 512)
4367 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4368 else
4369 llvm_unreachable("Unexpected vector bit width");
4370
4371 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4372 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4373 }
4374
4375 Rep = Builder.CreateIntrinsic(IID, Args);
4376 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4377 : CI->getArgOperand(0);
4378 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4379 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4380 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4381 Name.starts_with("avx512.mask.vpdpwssds.") ||
4382 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4383 bool ZeroMask = Name[11] == 'z';
4384 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4385 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4386 Intrinsic::ID IID;
4387 if (VecWidth == 128 && !IsSaturating)
4388 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4389 else if (VecWidth == 256 && !IsSaturating)
4390 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4391 else if (VecWidth == 512 && !IsSaturating)
4392 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4393 else if (VecWidth == 128 && IsSaturating)
4394 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4395 else if (VecWidth == 256 && IsSaturating)
4396 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4397 else if (VecWidth == 512 && IsSaturating)
4398 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4399 else
4400 llvm_unreachable("Unexpected intrinsic");
4401
4402 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4403 CI->getArgOperand(2)};
4404
4405 // Input arguments types were incorrectly set to vectors of i32 before but
4406 // they should be vectors of i16. Insert bit cast when encountering the old
4407 // types
4408 if (Args[1]->getType()->isVectorTy() &&
4409 cast<VectorType>(Args[1]->getType())
4410 ->getElementType()
4411 ->isIntegerTy(32) &&
4412 Args[2]->getType()->isVectorTy() &&
4413 cast<VectorType>(Args[2]->getType())
4414 ->getElementType()
4415 ->isIntegerTy(32)) {
4416 Type *NewArgType = nullptr;
4417 if (VecWidth == 128)
4418 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4419 else if (VecWidth == 256)
4420 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4421 else if (VecWidth == 512)
4422 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4423 else
4424 llvm_unreachable("Unexpected vector bit width");
4425
4426 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4427 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4428 }
4429
4430 Rep = Builder.CreateIntrinsic(IID, Args);
4431 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4432 : CI->getArgOperand(0);
4433 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4434 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4435 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4436 Name == "subborrow.u32" || Name == "subborrow.u64") {
4437 Intrinsic::ID IID;
4438 if (Name[0] == 'a' && Name.back() == '2')
4439 IID = Intrinsic::x86_addcarry_32;
4440 else if (Name[0] == 'a' && Name.back() == '4')
4441 IID = Intrinsic::x86_addcarry_64;
4442 else if (Name[0] == 's' && Name.back() == '2')
4443 IID = Intrinsic::x86_subborrow_32;
4444 else if (Name[0] == 's' && Name.back() == '4')
4445 IID = Intrinsic::x86_subborrow_64;
4446 else
4447 llvm_unreachable("Unexpected intrinsic");
4448
4449 // Make a call with 3 operands.
4450 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4451 CI->getArgOperand(2)};
4452 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4453
4454 // Extract the second result and store it.
4455 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4456 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4457 // Replace the original call result with the first result of the new call.
4458 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4459
4460 CI->replaceAllUsesWith(CF);
4461 Rep = nullptr;
4462 } else if (Name.starts_with("avx512.mask.") &&
4463 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4464 // Rep will be updated by the call in the condition.
4465 }
4466
4467 return Rep;
4468}
4469
4471 Function *F, IRBuilder<> &Builder) {
4472 if (Name.starts_with("neon.bfcvt")) {
4473 if (Name.starts_with("neon.bfcvtn2")) {
4474 SmallVector<int, 32> LoMask(4);
4475 std::iota(LoMask.begin(), LoMask.end(), 0);
4476 SmallVector<int, 32> ConcatMask(8);
4477 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4478 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4479 Value *Trunc =
4480 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4481 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4482 } else if (Name.starts_with("neon.bfcvtn")) {
4483 SmallVector<int, 32> ConcatMask(8);
4484 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4485 Type *V4BF16 =
4486 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4487 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4488 dbgs() << "Trunc: " << *Trunc << "\n";
4489 return Builder.CreateShuffleVector(
4490 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4491 } else {
4492 return Builder.CreateFPTrunc(CI->getOperand(0),
4493 Type::getBFloatTy(F->getContext()));
4494 }
4495 } else if (Name.starts_with("sve.fcvt")) {
4496 Intrinsic::ID NewID =
4498 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4499 .Case("sve.fcvtnt.bf16f32",
4500 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4502 if (NewID == Intrinsic::not_intrinsic)
4503 llvm_unreachable("Unhandled Intrinsic!");
4504
4505 SmallVector<Value *, 3> Args(CI->args());
4506
4507 // The original intrinsics incorrectly used a predicate based on the
4508 // smallest element type rather than the largest.
4509 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4510 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4511
4512 if (Args[1]->getType() != BadPredTy)
4513 llvm_unreachable("Unexpected predicate type!");
4514
4515 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4516 BadPredTy, Args[1]);
4517 Args[1] = Builder.CreateIntrinsic(
4518 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4519
4520 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4521 CI->getName());
4522 }
4523
4524 llvm_unreachable("Unhandled Intrinsic!");
4525}
4526
4528 IRBuilder<> &Builder) {
4529 if (Name == "mve.vctp64.old") {
4530 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4531 // correct type.
4532 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4533 CI->getArgOperand(0),
4534 /*FMFSource=*/nullptr, CI->getName());
4535 Value *C1 = Builder.CreateIntrinsic(
4536 Intrinsic::arm_mve_pred_v2i,
4537 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4538 return Builder.CreateIntrinsic(
4539 Intrinsic::arm_mve_pred_i2v,
4540 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4541 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4542 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4543 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4544 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4545 Name ==
4546 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4547 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4548 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4549 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4550 Name ==
4551 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4552 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4553 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4554 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4555 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4556 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4557 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4558 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4559 std::vector<Type *> Tys;
4560 unsigned ID = CI->getIntrinsicID();
4561 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4562 switch (ID) {
4563 case Intrinsic::arm_mve_mull_int_predicated:
4564 case Intrinsic::arm_mve_vqdmull_predicated:
4565 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4566 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4567 break;
4568 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4569 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4570 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4571 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4572 V2I1Ty};
4573 break;
4574 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4575 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4576 CI->getOperand(1)->getType(), V2I1Ty};
4577 break;
4578 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4579 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4580 CI->getOperand(2)->getType(), V2I1Ty};
4581 break;
4582 case Intrinsic::arm_cde_vcx1q_predicated:
4583 case Intrinsic::arm_cde_vcx1qa_predicated:
4584 case Intrinsic::arm_cde_vcx2q_predicated:
4585 case Intrinsic::arm_cde_vcx2qa_predicated:
4586 case Intrinsic::arm_cde_vcx3q_predicated:
4587 case Intrinsic::arm_cde_vcx3qa_predicated:
4588 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4589 break;
4590 default:
4591 llvm_unreachable("Unhandled Intrinsic!");
4592 }
4593
4594 std::vector<Value *> Ops;
4595 for (Value *Op : CI->args()) {
4596 Type *Ty = Op->getType();
4597 if (Ty->getScalarSizeInBits() == 1) {
4598 Value *C1 = Builder.CreateIntrinsic(
4599 Intrinsic::arm_mve_pred_v2i,
4600 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4601 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4602 }
4603 Ops.push_back(Op);
4604 }
4605
4606 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4607 CI->getName());
4608 }
4609 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4610}
4611
4612// These are expected to have the arguments:
4613// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4614//
4615// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4616//
4618 Function *F, IRBuilder<> &Builder) {
4619 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4620 // for compatibility.
4621 auto UpgradeLegacyWMMAIUIntrinsicCall =
4622 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4623 ArrayRef<Type *> OverloadTys) -> Value * {
4624 // Prepare arguments, append clamp=0 for compatibility
4625 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4626 Args.push_back(Builder.getFalse());
4627
4628 // Insert the declaration for the right overload types
4630 F->getParent(), F->getIntrinsicID(), OverloadTys);
4631
4632 // Copy operand bundles if any
4634 CI->getOperandBundlesAsDefs(Bundles);
4635
4636 // Create the new call and copy calling properties
4637 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4638 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4639 NewCall->setCallingConv(CI->getCallingConv());
4640 NewCall->setAttributes(CI->getAttributes());
4641 NewCall->setDebugLoc(CI->getDebugLoc());
4642 NewCall->copyMetadata(*CI);
4643 return NewCall;
4644 };
4645
4646 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4647 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4648 "intrinsic should have 7 arguments");
4649 Type *T1 = CI->getArgOperand(4)->getType();
4650 Type *T2 = CI->getArgOperand(1)->getType();
4651 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4652 }
4653 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4654 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4655 "intrinsic should have 8 arguments");
4656 Type *T1 = CI->getArgOperand(4)->getType();
4657 Type *T2 = CI->getArgOperand(1)->getType();
4658 Type *T3 = CI->getArgOperand(3)->getType();
4659 Type *T4 = CI->getArgOperand(5)->getType();
4660 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4661 }
4662
4663 AtomicRMWInst::BinOp RMWOp =
4665 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4666 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4667 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4668 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4669 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4670 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4671 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4672 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4673 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4674 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4675 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4676 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4677 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4678
4679 unsigned NumOperands = CI->getNumOperands();
4680 if (NumOperands < 3) // Malformed bitcode.
4681 return nullptr;
4682
4683 Value *Ptr = CI->getArgOperand(0);
4684 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4685 if (!PtrTy) // Malformed.
4686 return nullptr;
4687
4688 Value *Val = CI->getArgOperand(1);
4689 if (Val->getType() != CI->getType()) // Malformed.
4690 return nullptr;
4691
4692 ConstantInt *OrderArg = nullptr;
4693 bool IsVolatile = false;
4694
4695 // These should have 5 arguments (plus the callee). A separate version of the
4696 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4697 if (NumOperands > 3)
4698 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4699
4700 // Ignore scope argument at 3
4701
4702 if (NumOperands > 5) {
4703 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4704 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4705 }
4706
4708 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4709 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4712
4713 LLVMContext &Ctx = F->getContext();
4714
4715 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4716 Type *RetTy = CI->getType();
4717 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4718 if (VT->getElementType()->isIntegerTy(16)) {
4719 VectorType *AsBF16 =
4720 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4721 Val = Builder.CreateBitCast(Val, AsBF16);
4722 }
4723 }
4724
4725 // The scope argument never really worked correctly. Use agent as the most
4726 // conservative option which should still always produce the instruction.
4727 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4728 AtomicRMWInst *RMW =
4729 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4730
4731 unsigned AddrSpace = PtrTy->getAddressSpace();
4732 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4733 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4734 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4735 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4736 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4737 }
4738
4739 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4740 MDBuilder MDB(F->getContext());
4741 MDNode *RangeNotPrivate =
4744 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4745 }
4746
4747 if (IsVolatile)
4748 RMW->setVolatile(true);
4749
4750 return Builder.CreateBitCast(RMW, RetTy);
4751}
4752
4753/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4754/// plain MDNode, as it's the verifier's job to check these are the correct
4755/// types later.
4756static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4757 if (Op < CI->arg_size()) {
4758 if (MetadataAsValue *MAV =
4760 Metadata *MD = MAV->getMetadata();
4761 return dyn_cast_if_present<MDNode>(MD);
4762 }
4763 }
4764 return nullptr;
4765}
4766
4767/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4768static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4769 if (Op < CI->arg_size())
4771 return MAV->getMetadata();
4772 return nullptr;
4773}
4774
4776 // The MDNode attached to this instruction might not be the correct type,
4777 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4778 return I->getDebugLoc().getAsMDNode();
4779}
4780
4781/// Convert debug intrinsic calls to non-instruction debug records.
4782/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4783/// \p CI - The debug intrinsic call.
4785 DbgRecord *DR = nullptr;
4786 if (Name == "label") {
4788 CI->getDebugLoc());
4789 } else if (Name == "assign") {
4792 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4793 unwrapMAVMetadataOp(CI, 4),
4794 /*The address is a Value ref, it will be stored as a Metadata */
4795 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4796 } else if (Name == "declare") {
4799 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4800 getDebugLocSafe(CI));
4801 } else if (Name == "addr") {
4802 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4803 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4804 // Don't try to add something to the expression if it's not an expression.
4805 // Instead, allow the verifier to fail later.
4806 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4807 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4808 }
4811 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4812 getDebugLocSafe(CI));
4813 } else if (Name == "value") {
4814 // An old version of dbg.value had an extra offset argument.
4815 unsigned VarOp = 1;
4816 unsigned ExprOp = 2;
4817 if (CI->arg_size() == 4) {
4819 // Nonzero offset dbg.values get dropped without a replacement.
4820 if (!Offset || !Offset->isZeroValue())
4821 return;
4822 VarOp = 2;
4823 ExprOp = 3;
4824 }
4827 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4828 nullptr, getDebugLocSafe(CI));
4829 }
4830 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4831 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4832}
4833
4836 if (!Offset)
4837 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4838 int64_t OffsetVal = Offset->getSExtValue();
4839 return Builder.CreateIntrinsic(OffsetVal >= 0
4840 ? Intrinsic::vector_splice_left
4841 : Intrinsic::vector_splice_right,
4842 CI->getType(),
4843 {CI->getArgOperand(0), CI->getArgOperand(1),
4844 Builder.getInt32(std::abs(OffsetVal))});
4845}
4846
4847/// Upgrade a call to an old intrinsic. All argument and return casting must be
4848/// provided to seamlessly integrate with existing context.
4850 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4851 // checks the callee's function type matches. It's likely we need to handle
4852 // type changes here.
4854 if (!F)
4855 return;
4856
4857 LLVMContext &C = CI->getContext();
4858 IRBuilder<> Builder(C);
4859 if (isa<FPMathOperator>(CI))
4860 Builder.setFastMathFlags(CI->getFastMathFlags());
4861 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4862
4863 if (!NewFn) {
4864 // Get the Function's name.
4865 StringRef Name = F->getName();
4866
4867 assert(Name.starts_with("llvm.") && "Intrinsic doesn't start with 'llvm.'");
4868 Name = Name.substr(5);
4869
4870 bool IsX86 = Name.consume_front("x86.");
4871 bool IsNVVM = Name.consume_front("nvvm.");
4872 bool IsAArch64 = Name.consume_front("aarch64.");
4873 bool IsARM = Name.consume_front("arm.");
4874 bool IsAMDGCN = Name.consume_front("amdgcn.");
4875 bool IsDbg = Name.consume_front("dbg.");
4876 bool IsOldSplice =
4877 (Name.consume_front("experimental.vector.splice") ||
4878 Name.consume_front("vector.splice")) &&
4879 !(Name.starts_with(".left") || Name.starts_with(".right"));
4880 Value *Rep = nullptr;
4881
4882 if (!IsX86 && Name == "stackprotectorcheck") {
4883 Rep = nullptr;
4884 } else if (IsNVVM) {
4885 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4886 } else if (IsX86) {
4887 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4888 } else if (IsAArch64) {
4889 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4890 } else if (IsARM) {
4891 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4892 } else if (IsAMDGCN) {
4893 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4894 } else if (IsDbg) {
4896 } else if (IsOldSplice) {
4897 Rep = upgradeVectorSplice(CI, Builder);
4898 } else {
4899 llvm_unreachable("Unknown function for CallBase upgrade.");
4900 }
4901
4902 if (Rep)
4903 CI->replaceAllUsesWith(Rep);
4904 CI->eraseFromParent();
4905 return;
4906 }
4907
4908 const auto &DefaultCase = [&]() -> void {
4909 if (F == NewFn)
4910 return;
4911
4912 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4913 // Handle generic mangling change.
4914 assert(
4915 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4916 "Unknown function for CallBase upgrade and isn't just a name change");
4917 CI->setCalledFunction(NewFn);
4918 return;
4919 }
4920
4921 // This must be an upgrade from a named to a literal struct.
4922 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4923 assert(OldST != NewFn->getReturnType() &&
4924 "Return type must have changed");
4925 assert(OldST->getNumElements() ==
4926 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4927 "Must have same number of elements");
4928
4929 SmallVector<Value *> Args(CI->args());
4930 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4931 NewCI->setAttributes(CI->getAttributes());
4932 Value *Res = PoisonValue::get(OldST);
4933 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4934 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4935 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4936 }
4937 CI->replaceAllUsesWith(Res);
4938 CI->eraseFromParent();
4939 return;
4940 }
4941
4942 // We're probably about to produce something invalid. Let the verifier catch
4943 // it instead of dying here.
4944 CI->setCalledOperand(
4946 return;
4947 };
4948 CallInst *NewCall = nullptr;
4949 switch (NewFn->getIntrinsicID()) {
4950 default: {
4951 DefaultCase();
4952 return;
4953 }
4954 case Intrinsic::arm_neon_vst1:
4955 case Intrinsic::arm_neon_vst2:
4956 case Intrinsic::arm_neon_vst3:
4957 case Intrinsic::arm_neon_vst4:
4958 case Intrinsic::arm_neon_vst2lane:
4959 case Intrinsic::arm_neon_vst3lane:
4960 case Intrinsic::arm_neon_vst4lane: {
4961 SmallVector<Value *, 4> Args(CI->args());
4962 NewCall = Builder.CreateCall(NewFn, Args);
4963 break;
4964 }
4965 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
4966 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
4967 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
4968 LLVMContext &Ctx = F->getParent()->getContext();
4969 SmallVector<Value *, 4> Args(CI->args());
4970 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
4971 cast<ConstantInt>(Args[3])->getZExtValue());
4972 NewCall = Builder.CreateCall(NewFn, Args);
4973 break;
4974 }
4975 case Intrinsic::aarch64_sve_ld3_sret:
4976 case Intrinsic::aarch64_sve_ld4_sret:
4977 case Intrinsic::aarch64_sve_ld2_sret: {
4978 StringRef Name = F->getName();
4979 Name = Name.substr(5);
4980 unsigned N = StringSwitch<unsigned>(Name)
4981 .StartsWith("aarch64.sve.ld2", 2)
4982 .StartsWith("aarch64.sve.ld3", 3)
4983 .StartsWith("aarch64.sve.ld4", 4)
4984 .Default(0);
4985 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
4986 unsigned MinElts = RetTy->getMinNumElements() / N;
4987 SmallVector<Value *, 2> Args(CI->args());
4988 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
4989 Value *Ret = llvm::PoisonValue::get(RetTy);
4990 for (unsigned I = 0; I < N; I++) {
4991 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
4992 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
4993 }
4994 NewCall = dyn_cast<CallInst>(Ret);
4995 break;
4996 }
4997
4998 case Intrinsic::coro_end: {
4999 SmallVector<Value *, 3> Args(CI->args());
5000 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5001 NewCall = Builder.CreateCall(NewFn, Args);
5002 break;
5003 }
5004
5005 case Intrinsic::vector_extract: {
5006 StringRef Name = F->getName();
5007 Name = Name.substr(5); // Strip llvm
5008 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5009 DefaultCase();
5010 return;
5011 }
5012 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5013 unsigned MinElts = RetTy->getMinNumElements();
5014 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5015 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5016 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5017 break;
5018 }
5019
5020 case Intrinsic::vector_insert: {
5021 StringRef Name = F->getName();
5022 Name = Name.substr(5);
5023 if (!Name.starts_with("aarch64.sve.tuple")) {
5024 DefaultCase();
5025 return;
5026 }
5027 if (Name.starts_with("aarch64.sve.tuple.set")) {
5028 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5029 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5030 Value *NewIdx =
5031 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5032 NewCall = Builder.CreateCall(
5033 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5034 break;
5035 }
5036 if (Name.starts_with("aarch64.sve.tuple.create")) {
5037 unsigned N = StringSwitch<unsigned>(Name)
5038 .StartsWith("aarch64.sve.tuple.create2", 2)
5039 .StartsWith("aarch64.sve.tuple.create3", 3)
5040 .StartsWith("aarch64.sve.tuple.create4", 4)
5041 .Default(0);
5042 assert(N > 1 && "Create is expected to be between 2-4");
5043 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5044 Value *Ret = llvm::PoisonValue::get(RetTy);
5045 unsigned MinElts = RetTy->getMinNumElements() / N;
5046 for (unsigned I = 0; I < N; I++) {
5047 Value *V = CI->getArgOperand(I);
5048 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5049 }
5050 NewCall = dyn_cast<CallInst>(Ret);
5051 }
5052 break;
5053 }
5054
5055 case Intrinsic::arm_neon_bfdot:
5056 case Intrinsic::arm_neon_bfmmla:
5057 case Intrinsic::arm_neon_bfmlalb:
5058 case Intrinsic::arm_neon_bfmlalt:
5059 case Intrinsic::aarch64_neon_bfdot:
5060 case Intrinsic::aarch64_neon_bfmmla:
5061 case Intrinsic::aarch64_neon_bfmlalb:
5062 case Intrinsic::aarch64_neon_bfmlalt: {
5064 assert(CI->arg_size() == 3 &&
5065 "Mismatch between function args and call args");
5066 size_t OperandWidth =
5068 assert((OperandWidth == 64 || OperandWidth == 128) &&
5069 "Unexpected operand width");
5070 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5071 auto Iter = CI->args().begin();
5072 Args.push_back(*Iter++);
5073 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5074 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5075 NewCall = Builder.CreateCall(NewFn, Args);
5076 break;
5077 }
5078
5079 case Intrinsic::bitreverse:
5080 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5081 break;
5082
5083 case Intrinsic::ctlz:
5084 case Intrinsic::cttz: {
5085 if (CI->arg_size() != 1) {
5086 DefaultCase();
5087 return;
5088 }
5089
5090 NewCall =
5091 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5092 break;
5093 }
5094
5095 case Intrinsic::objectsize: {
5096 Value *NullIsUnknownSize =
5097 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5098 Value *Dynamic =
5099 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5100 NewCall = Builder.CreateCall(
5101 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5102 break;
5103 }
5104
5105 case Intrinsic::ctpop:
5106 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5107 break;
5108
5109 case Intrinsic::convert_from_fp16:
5110 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5111 break;
5112
5113 case Intrinsic::dbg_value: {
5114 StringRef Name = F->getName();
5115 Name = Name.substr(5); // Strip llvm.
5116 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5117 if (Name.starts_with("dbg.addr")) {
5119 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5120 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5121 NewCall =
5122 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5123 MetadataAsValue::get(C, Expr)});
5124 break;
5125 }
5126
5127 // Upgrade from the old version that had an extra offset argument.
5128 assert(CI->arg_size() == 4);
5129 // Drop nonzero offsets instead of attempting to upgrade them.
5131 if (Offset->isZeroValue()) {
5132 NewCall = Builder.CreateCall(
5133 NewFn,
5134 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5135 break;
5136 }
5137 CI->eraseFromParent();
5138 return;
5139 }
5140
5141 case Intrinsic::ptr_annotation:
5142 // Upgrade from versions that lacked the annotation attribute argument.
5143 if (CI->arg_size() != 4) {
5144 DefaultCase();
5145 return;
5146 }
5147
5148 // Create a new call with an added null annotation attribute argument.
5149 NewCall = Builder.CreateCall(
5150 NewFn,
5151 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5152 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5153 NewCall->takeName(CI);
5154 CI->replaceAllUsesWith(NewCall);
5155 CI->eraseFromParent();
5156 return;
5157
5158 case Intrinsic::var_annotation:
5159 // Upgrade from versions that lacked the annotation attribute argument.
5160 if (CI->arg_size() != 4) {
5161 DefaultCase();
5162 return;
5163 }
5164 // Create a new call with an added null annotation attribute argument.
5165 NewCall = Builder.CreateCall(
5166 NewFn,
5167 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5168 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5169 NewCall->takeName(CI);
5170 CI->replaceAllUsesWith(NewCall);
5171 CI->eraseFromParent();
5172 return;
5173
5174 case Intrinsic::riscv_aes32dsi:
5175 case Intrinsic::riscv_aes32dsmi:
5176 case Intrinsic::riscv_aes32esi:
5177 case Intrinsic::riscv_aes32esmi:
5178 case Intrinsic::riscv_sm4ks:
5179 case Intrinsic::riscv_sm4ed: {
5180 // The last argument to these intrinsics used to be i8 and changed to i32.
5181 // The type overload for sm4ks and sm4ed was removed.
5182 Value *Arg2 = CI->getArgOperand(2);
5183 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5184 return;
5185
5186 Value *Arg0 = CI->getArgOperand(0);
5187 Value *Arg1 = CI->getArgOperand(1);
5188 if (CI->getType()->isIntegerTy(64)) {
5189 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5190 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5191 }
5192
5193 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5194 cast<ConstantInt>(Arg2)->getZExtValue());
5195
5196 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5197 Value *Res = NewCall;
5198 if (Res->getType() != CI->getType())
5199 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5200 NewCall->takeName(CI);
5201 CI->replaceAllUsesWith(Res);
5202 CI->eraseFromParent();
5203 return;
5204 }
5205 case Intrinsic::nvvm_mapa_shared_cluster: {
5206 // Create a new call with the correct address space.
5207 NewCall =
5208 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5209 Value *Res = NewCall;
5210 Res = Builder.CreateAddrSpaceCast(
5211 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5212 NewCall->takeName(CI);
5213 CI->replaceAllUsesWith(Res);
5214 CI->eraseFromParent();
5215 return;
5216 }
5217 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5218 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5219 // Create a new call with the correct address space.
5220 SmallVector<Value *, 4> Args(CI->args());
5221 Args[0] = Builder.CreateAddrSpaceCast(
5222 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5223
5224 NewCall = Builder.CreateCall(NewFn, Args);
5225 NewCall->takeName(CI);
5226 CI->replaceAllUsesWith(NewCall);
5227 CI->eraseFromParent();
5228 return;
5229 }
5230 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5231 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5232 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5233 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5234 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5235 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5236 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5237 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5238 SmallVector<Value *, 16> Args(CI->args());
5239
5240 // Create AddrSpaceCast to shared_cluster if needed.
5241 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5242 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5244 Args[0] = Builder.CreateAddrSpaceCast(
5245 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5246
5247 // Attach the flag argument for cta_group, with a
5248 // default value of 0. This handles case (2) in
5249 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5250 size_t NumArgs = CI->arg_size();
5251 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5252 if (!FlagArg->getType()->isIntegerTy(1))
5253 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5254
5255 NewCall = Builder.CreateCall(NewFn, Args);
5256 NewCall->takeName(CI);
5257 CI->replaceAllUsesWith(NewCall);
5258 CI->eraseFromParent();
5259 return;
5260 }
5261 case Intrinsic::riscv_sha256sig0:
5262 case Intrinsic::riscv_sha256sig1:
5263 case Intrinsic::riscv_sha256sum0:
5264 case Intrinsic::riscv_sha256sum1:
5265 case Intrinsic::riscv_sm3p0:
5266 case Intrinsic::riscv_sm3p1: {
5267 // The last argument to these intrinsics used to be i8 and changed to i32.
5268 // The type overload for sm4ks and sm4ed was removed.
5269 if (!CI->getType()->isIntegerTy(64))
5270 return;
5271
5272 Value *Arg =
5273 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5274
5275 NewCall = Builder.CreateCall(NewFn, Arg);
5276 Value *Res =
5277 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5278 NewCall->takeName(CI);
5279 CI->replaceAllUsesWith(Res);
5280 CI->eraseFromParent();
5281 return;
5282 }
5283
5284 case Intrinsic::x86_xop_vfrcz_ss:
5285 case Intrinsic::x86_xop_vfrcz_sd:
5286 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5287 break;
5288
5289 case Intrinsic::x86_xop_vpermil2pd:
5290 case Intrinsic::x86_xop_vpermil2ps:
5291 case Intrinsic::x86_xop_vpermil2pd_256:
5292 case Intrinsic::x86_xop_vpermil2ps_256: {
5293 SmallVector<Value *, 4> Args(CI->args());
5294 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5295 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5296 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5297 NewCall = Builder.CreateCall(NewFn, Args);
5298 break;
5299 }
5300
5301 case Intrinsic::x86_sse41_ptestc:
5302 case Intrinsic::x86_sse41_ptestz:
5303 case Intrinsic::x86_sse41_ptestnzc: {
5304 // The arguments for these intrinsics used to be v4f32, and changed
5305 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5306 // So, the only thing required is a bitcast for both arguments.
5307 // First, check the arguments have the old type.
5308 Value *Arg0 = CI->getArgOperand(0);
5309 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5310 return;
5311
5312 // Old intrinsic, add bitcasts
5313 Value *Arg1 = CI->getArgOperand(1);
5314
5315 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5316
5317 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5318 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5319
5320 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5321 break;
5322 }
5323
5324 case Intrinsic::x86_rdtscp: {
5325 // This used to take 1 arguments. If we have no arguments, it is already
5326 // upgraded.
5327 if (CI->getNumOperands() == 0)
5328 return;
5329
5330 NewCall = Builder.CreateCall(NewFn);
5331 // Extract the second result and store it.
5332 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5333 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5334 // Replace the original call result with the first result of the new call.
5335 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5336
5337 NewCall->takeName(CI);
5338 CI->replaceAllUsesWith(TSC);
5339 CI->eraseFromParent();
5340 return;
5341 }
5342
5343 case Intrinsic::x86_sse41_insertps:
5344 case Intrinsic::x86_sse41_dppd:
5345 case Intrinsic::x86_sse41_dpps:
5346 case Intrinsic::x86_sse41_mpsadbw:
5347 case Intrinsic::x86_avx_dp_ps_256:
5348 case Intrinsic::x86_avx2_mpsadbw: {
5349 // Need to truncate the last argument from i32 to i8 -- this argument models
5350 // an inherently 8-bit immediate operand to these x86 instructions.
5351 SmallVector<Value *, 4> Args(CI->args());
5352
5353 // Replace the last argument with a trunc.
5354 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5355 NewCall = Builder.CreateCall(NewFn, Args);
5356 break;
5357 }
5358
5359 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5360 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5361 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5362 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5363 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5364 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5365 SmallVector<Value *, 4> Args(CI->args());
5366 unsigned NumElts =
5367 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5368 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5369
5370 NewCall = Builder.CreateCall(NewFn, Args);
5371 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5372
5373 NewCall->takeName(CI);
5374 CI->replaceAllUsesWith(Res);
5375 CI->eraseFromParent();
5376 return;
5377 }
5378
5379 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5380 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5381 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5382 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5383 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5384 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5385 SmallVector<Value *, 4> Args(CI->args());
5386 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5387 if (NewFn->getIntrinsicID() ==
5388 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5389 Args[1] = Builder.CreateBitCast(
5390 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5391
5392 NewCall = Builder.CreateCall(NewFn, Args);
5393 Value *Res = Builder.CreateBitCast(
5394 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5395
5396 NewCall->takeName(CI);
5397 CI->replaceAllUsesWith(Res);
5398 CI->eraseFromParent();
5399 return;
5400 }
5401 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5402 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5403 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5404 SmallVector<Value *, 4> Args(CI->args());
5405 unsigned NumElts =
5406 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5407 Args[1] = Builder.CreateBitCast(
5408 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5409 Args[2] = Builder.CreateBitCast(
5410 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5411
5412 NewCall = Builder.CreateCall(NewFn, Args);
5413 break;
5414 }
5415
5416 case Intrinsic::thread_pointer: {
5417 NewCall = Builder.CreateCall(NewFn, {});
5418 break;
5419 }
5420
5421 case Intrinsic::memcpy:
5422 case Intrinsic::memmove:
5423 case Intrinsic::memset: {
5424 // We have to make sure that the call signature is what we're expecting.
5425 // We only want to change the old signatures by removing the alignment arg:
5426 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5427 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5428 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5429 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5430 // Note: i8*'s in the above can be any pointer type
5431 if (CI->arg_size() != 5) {
5432 DefaultCase();
5433 return;
5434 }
5435 // Remove alignment argument (3), and add alignment attributes to the
5436 // dest/src pointers.
5437 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5438 CI->getArgOperand(2), CI->getArgOperand(4)};
5439 NewCall = Builder.CreateCall(NewFn, Args);
5440 AttributeList OldAttrs = CI->getAttributes();
5441 AttributeList NewAttrs = AttributeList::get(
5442 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5443 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5444 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5445 NewCall->setAttributes(NewAttrs);
5446 auto *MemCI = cast<MemIntrinsic>(NewCall);
5447 // All mem intrinsics support dest alignment.
5449 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5450 // Memcpy/Memmove also support source alignment.
5451 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5452 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5453 break;
5454 }
5455
5456 case Intrinsic::masked_load:
5457 case Intrinsic::masked_gather:
5458 case Intrinsic::masked_store:
5459 case Intrinsic::masked_scatter: {
5460 if (CI->arg_size() != 4) {
5461 DefaultCase();
5462 return;
5463 }
5464
5465 auto GetMaybeAlign = [](Value *Op) {
5466 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5467 uint64_t Val = CI->getZExtValue();
5468 if (Val == 0)
5469 return MaybeAlign();
5470 if (isPowerOf2_64(Val))
5471 return MaybeAlign(Val);
5472 }
5473 reportFatalUsageError("Invalid alignment argument");
5474 };
5475 auto GetAlign = [&](Value *Op) {
5476 MaybeAlign Align = GetMaybeAlign(Op);
5477 if (Align)
5478 return *Align;
5479 reportFatalUsageError("Invalid zero alignment argument");
5480 };
5481
5482 const DataLayout &DL = CI->getDataLayout();
5483 switch (NewFn->getIntrinsicID()) {
5484 case Intrinsic::masked_load:
5485 NewCall = Builder.CreateMaskedLoad(
5486 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5487 CI->getArgOperand(2), CI->getArgOperand(3));
5488 break;
5489 case Intrinsic::masked_gather:
5490 NewCall = Builder.CreateMaskedGather(
5491 CI->getType(), CI->getArgOperand(0),
5492 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5493 CI->getType()->getScalarType()),
5494 CI->getArgOperand(2), CI->getArgOperand(3));
5495 break;
5496 case Intrinsic::masked_store:
5497 NewCall = Builder.CreateMaskedStore(
5498 CI->getArgOperand(0), CI->getArgOperand(1),
5499 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5500 break;
5501 case Intrinsic::masked_scatter:
5502 NewCall = Builder.CreateMaskedScatter(
5503 CI->getArgOperand(0), CI->getArgOperand(1),
5504 DL.getValueOrABITypeAlignment(
5505 GetMaybeAlign(CI->getArgOperand(2)),
5506 CI->getArgOperand(0)->getType()->getScalarType()),
5507 CI->getArgOperand(3));
5508 break;
5509 default:
5510 llvm_unreachable("Unexpected intrinsic ID");
5511 }
5512 // Previous metadata is still valid.
5513 NewCall->copyMetadata(*CI);
5514 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5515 break;
5516 }
5517
5518 case Intrinsic::lifetime_start:
5519 case Intrinsic::lifetime_end: {
5520 if (CI->arg_size() != 2) {
5521 DefaultCase();
5522 return;
5523 }
5524
5525 Value *Ptr = CI->getArgOperand(1);
5526 // Try to strip pointer casts, such that the lifetime works on an alloca.
5527 Ptr = Ptr->stripPointerCasts();
5528 if (isa<AllocaInst>(Ptr)) {
5529 // Don't use NewFn, as we might have looked through an addrspacecast.
5530 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5531 NewCall = Builder.CreateLifetimeStart(Ptr);
5532 else
5533 NewCall = Builder.CreateLifetimeEnd(Ptr);
5534 break;
5535 }
5536
5537 // Otherwise remove the lifetime marker.
5538 CI->eraseFromParent();
5539 return;
5540 }
5541
5542 case Intrinsic::x86_avx512_vpdpbusd_128:
5543 case Intrinsic::x86_avx512_vpdpbusd_256:
5544 case Intrinsic::x86_avx512_vpdpbusd_512:
5545 case Intrinsic::x86_avx512_vpdpbusds_128:
5546 case Intrinsic::x86_avx512_vpdpbusds_256:
5547 case Intrinsic::x86_avx512_vpdpbusds_512:
5548 case Intrinsic::x86_avx2_vpdpbssd_128:
5549 case Intrinsic::x86_avx2_vpdpbssd_256:
5550 case Intrinsic::x86_avx10_vpdpbssd_512:
5551 case Intrinsic::x86_avx2_vpdpbssds_128:
5552 case Intrinsic::x86_avx2_vpdpbssds_256:
5553 case Intrinsic::x86_avx10_vpdpbssds_512:
5554 case Intrinsic::x86_avx2_vpdpbsud_128:
5555 case Intrinsic::x86_avx2_vpdpbsud_256:
5556 case Intrinsic::x86_avx10_vpdpbsud_512:
5557 case Intrinsic::x86_avx2_vpdpbsuds_128:
5558 case Intrinsic::x86_avx2_vpdpbsuds_256:
5559 case Intrinsic::x86_avx10_vpdpbsuds_512:
5560 case Intrinsic::x86_avx2_vpdpbuud_128:
5561 case Intrinsic::x86_avx2_vpdpbuud_256:
5562 case Intrinsic::x86_avx10_vpdpbuud_512:
5563 case Intrinsic::x86_avx2_vpdpbuuds_128:
5564 case Intrinsic::x86_avx2_vpdpbuuds_256:
5565 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5566 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5567 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5568 CI->getArgOperand(2)};
5569 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5570 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5571 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5572
5573 NewCall = Builder.CreateCall(NewFn, Args);
5574 break;
5575 }
5576 case Intrinsic::x86_avx512_vpdpwssd_128:
5577 case Intrinsic::x86_avx512_vpdpwssd_256:
5578 case Intrinsic::x86_avx512_vpdpwssd_512:
5579 case Intrinsic::x86_avx512_vpdpwssds_128:
5580 case Intrinsic::x86_avx512_vpdpwssds_256:
5581 case Intrinsic::x86_avx512_vpdpwssds_512:
5582 case Intrinsic::x86_avx2_vpdpwsud_128:
5583 case Intrinsic::x86_avx2_vpdpwsud_256:
5584 case Intrinsic::x86_avx10_vpdpwsud_512:
5585 case Intrinsic::x86_avx2_vpdpwsuds_128:
5586 case Intrinsic::x86_avx2_vpdpwsuds_256:
5587 case Intrinsic::x86_avx10_vpdpwsuds_512:
5588 case Intrinsic::x86_avx2_vpdpwusd_128:
5589 case Intrinsic::x86_avx2_vpdpwusd_256:
5590 case Intrinsic::x86_avx10_vpdpwusd_512:
5591 case Intrinsic::x86_avx2_vpdpwusds_128:
5592 case Intrinsic::x86_avx2_vpdpwusds_256:
5593 case Intrinsic::x86_avx10_vpdpwusds_512:
5594 case Intrinsic::x86_avx2_vpdpwuud_128:
5595 case Intrinsic::x86_avx2_vpdpwuud_256:
5596 case Intrinsic::x86_avx10_vpdpwuud_512:
5597 case Intrinsic::x86_avx2_vpdpwuuds_128:
5598 case Intrinsic::x86_avx2_vpdpwuuds_256:
5599 case Intrinsic::x86_avx10_vpdpwuuds_512:
5600 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5601 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5602 CI->getArgOperand(2)};
5603 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5604 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5605 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5606
5607 NewCall = Builder.CreateCall(NewFn, Args);
5608 break;
5609 }
5610 assert(NewCall && "Should have either set this variable or returned through "
5611 "the default case");
5612 NewCall->takeName(CI);
5613 CI->replaceAllUsesWith(NewCall);
5614 CI->eraseFromParent();
5615}
5616
5618 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5619
5620 // Check if this function should be upgraded and get the replacement function
5621 // if there is one.
5622 Function *NewFn;
5623 if (UpgradeIntrinsicFunction(F, NewFn)) {
5624 // Replace all users of the old function with the new function or new
5625 // instructions. This is not a range loop because the call is deleted.
5626 for (User *U : make_early_inc_range(F->users()))
5627 if (CallBase *CB = dyn_cast<CallBase>(U))
5628 UpgradeIntrinsicCall(CB, NewFn);
5629
5630 // Remove old function, no longer used, from the module.
5631 if (F != NewFn)
5632 F->eraseFromParent();
5633 }
5634}
5635
5637 const unsigned NumOperands = MD.getNumOperands();
5638 if (NumOperands == 0)
5639 return &MD; // Invalid, punt to a verifier error.
5640
5641 // Check if the tag uses struct-path aware TBAA format.
5642 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5643 return &MD;
5644
5645 auto &Context = MD.getContext();
5646 if (NumOperands == 3) {
5647 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5648 MDNode *ScalarType = MDNode::get(Context, Elts);
5649 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5650 Metadata *Elts2[] = {ScalarType, ScalarType,
5653 MD.getOperand(2)};
5654 return MDNode::get(Context, Elts2);
5655 }
5656 // Create a MDNode <MD, MD, offset 0>
5658 Type::getInt64Ty(Context)))};
5659 return MDNode::get(Context, Elts);
5660}
5661
5663 Instruction *&Temp) {
5664 if (Opc != Instruction::BitCast)
5665 return nullptr;
5666
5667 Temp = nullptr;
5668 Type *SrcTy = V->getType();
5669 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5670 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5671 LLVMContext &Context = V->getContext();
5672
5673 // We have no information about target data layout, so we assume that
5674 // the maximum pointer size is 64bit.
5675 Type *MidTy = Type::getInt64Ty(Context);
5676 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5677
5678 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5679 }
5680
5681 return nullptr;
5682}
5683
5685 if (Opc != Instruction::BitCast)
5686 return nullptr;
5687
5688 Type *SrcTy = C->getType();
5689 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5690 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5691 LLVMContext &Context = C->getContext();
5692
5693 // We have no information about target data layout, so we assume that
5694 // the maximum pointer size is 64bit.
5695 Type *MidTy = Type::getInt64Ty(Context);
5696
5698 DestTy);
5699 }
5700
5701 return nullptr;
5702}
5703
5704/// Check the debug info version number, if it is out-dated, drop the debug
5705/// info. Return true if module is modified.
5708 return false;
5709
5710 llvm::TimeTraceScope timeScope("Upgrade debug info");
5711 // We need to get metadata before the module is verified (i.e., getModuleFlag
5712 // makes assumptions that we haven't verified yet). Carefully extract the flag
5713 // from the metadata.
5714 unsigned Version = 0;
5715 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5716 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5717 if (Flag->getNumOperands() < 3)
5718 return false;
5719 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5720 return K->getString() == "Debug Info Version";
5721 return false;
5722 });
5723 if (OpIt != ModFlags->op_end()) {
5724 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5725 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5726 Version = CI->getZExtValue();
5727 }
5728 }
5729
5731 bool BrokenDebugInfo = false;
5732 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5733 report_fatal_error("Broken module found, compilation aborted!");
5734 if (!BrokenDebugInfo)
5735 // Everything is ok.
5736 return false;
5737 else {
5738 // Diagnose malformed debug info.
5740 M.getContext().diagnose(Diag);
5741 }
5742 }
5743 bool Modified = StripDebugInfo(M);
5745 // Diagnose a version mismatch.
5747 M.getContext().diagnose(DiagVersion);
5748 }
5749 return Modified;
5750}
5751
5752static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5753 GlobalValue *GV, const Metadata *V) {
5754 Function *F = cast<Function>(GV);
5755
5756 constexpr StringLiteral DefaultValue = "1";
5757 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5758 unsigned Length = 0;
5759
5760 if (F->hasFnAttribute(Attr)) {
5761 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5762 // parse these elements placing them into Vect3
5763 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5764 for (; Length < 3 && !S.empty(); Length++) {
5765 auto [Part, Rest] = S.split(',');
5766 Vect3[Length] = Part.trim();
5767 S = Rest;
5768 }
5769 }
5770
5771 const unsigned Dim = DimC - 'x';
5772 assert(Dim < 3 && "Unexpected dim char");
5773
5774 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5775
5776 // local variable required for StringRef in Vect3 to point to.
5777 const std::string VStr = llvm::utostr(VInt);
5778 Vect3[Dim] = VStr;
5779 Length = std::max(Length, Dim + 1);
5780
5781 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5782 F->addFnAttr(Attr, NewAttr);
5783}
5784
5785static inline bool isXYZ(StringRef S) {
5786 return S == "x" || S == "y" || S == "z";
5787}
5788
5790 const Metadata *V) {
5791 if (K == "kernel") {
5793 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5794 return true;
5795 }
5796 if (K == "align") {
5797 // V is a bitfeild specifying two 16-bit values. The alignment value is
5798 // specfied in low 16-bits, The index is specified in the high bits. For the
5799 // index, 0 indicates the return value while higher values correspond to
5800 // each parameter (idx = param + 1).
5801 const uint64_t AlignIdxValuePair =
5802 mdconst::extract<ConstantInt>(V)->getZExtValue();
5803 const unsigned Idx = (AlignIdxValuePair >> 16);
5804 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5805 cast<Function>(GV)->addAttributeAtIndex(
5806 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5807 return true;
5808 }
5809 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5810 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5811 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5812 return true;
5813 }
5814 if (K == "minctasm") {
5815 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5816 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5817 return true;
5818 }
5819 if (K == "maxnreg") {
5820 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5821 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5822 return true;
5823 }
5824 if (K.consume_front("maxntid") && isXYZ(K)) {
5825 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5826 return true;
5827 }
5828 if (K.consume_front("reqntid") && isXYZ(K)) {
5829 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5830 return true;
5831 }
5832 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5833 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5834 return true;
5835 }
5836 if (K == "grid_constant") {
5837 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5838 for (const auto &Op : cast<MDNode>(V)->operands()) {
5839 // For some reason, the index is 1-based in the metadata. Good thing we're
5840 // able to auto-upgrade it!
5841 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5842 cast<Function>(GV)->addParamAttr(Index, Attr);
5843 }
5844 return true;
5845 }
5846
5847 return false;
5848}
5849
5851 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5852 if (!NamedMD)
5853 return;
5854
5855 SmallVector<MDNode *, 8> NewNodes;
5857 for (MDNode *MD : NamedMD->operands()) {
5858 if (!SeenNodes.insert(MD).second)
5859 continue;
5860
5861 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5862 if (!GV)
5863 continue;
5864
5865 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5866
5867 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5868 // Each nvvm.annotations metadata entry will be of the following form:
5869 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5870 // start index = 1, to skip the global variable key
5871 // increment = 2, to skip the value for each property-value pairs
5872 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5873 MDString *K = cast<MDString>(MD->getOperand(j));
5874 const MDOperand &V = MD->getOperand(j + 1);
5875 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5876 if (!Upgraded)
5877 NewOperands.append({K, V});
5878 }
5879
5880 if (NewOperands.size() > 1)
5881 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5882 }
5883
5884 NamedMD->clearOperands();
5885 for (MDNode *N : NewNodes)
5886 NamedMD->addOperand(N);
5887}
5888
5889/// This checks for objc retain release marker which should be upgraded. It
5890/// returns true if module is modified.
5892 bool Changed = false;
5893 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5894 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5895 if (ModRetainReleaseMarker) {
5896 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5897 if (Op) {
5898 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5899 if (ID) {
5900 SmallVector<StringRef, 4> ValueComp;
5901 ID->getString().split(ValueComp, "#");
5902 if (ValueComp.size() == 2) {
5903 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5904 ID = MDString::get(M.getContext(), NewValue);
5905 }
5906 M.addModuleFlag(Module::Error, MarkerKey, ID);
5907 M.eraseNamedMetadata(ModRetainReleaseMarker);
5908 Changed = true;
5909 }
5910 }
5911 }
5912 return Changed;
5913}
5914
5916 // This lambda converts normal function calls to ARC runtime functions to
5917 // intrinsic calls.
5918 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5919 llvm::Intrinsic::ID IntrinsicFunc) {
5920 Function *Fn = M.getFunction(OldFunc);
5921
5922 if (!Fn)
5923 return;
5924
5925 Function *NewFn =
5926 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5927
5928 for (User *U : make_early_inc_range(Fn->users())) {
5930 if (!CI || CI->getCalledFunction() != Fn)
5931 continue;
5932
5933 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5934 FunctionType *NewFuncTy = NewFn->getFunctionType();
5936
5937 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5938 // value to the return type of the old function.
5939 if (NewFuncTy->getReturnType() != CI->getType() &&
5940 !CastInst::castIsValid(Instruction::BitCast, CI,
5941 NewFuncTy->getReturnType()))
5942 continue;
5943
5944 bool InvalidCast = false;
5945
5946 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5947 Value *Arg = CI->getArgOperand(I);
5948
5949 // Bitcast argument to the parameter type of the new function if it's
5950 // not a variadic argument.
5951 if (I < NewFuncTy->getNumParams()) {
5952 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5953 // to the parameter type of the new function.
5954 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5955 NewFuncTy->getParamType(I))) {
5956 InvalidCast = true;
5957 break;
5958 }
5959 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
5960 }
5961 Args.push_back(Arg);
5962 }
5963
5964 if (InvalidCast)
5965 continue;
5966
5967 // Create a call instruction that calls the new function.
5968 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
5969 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5970 NewCall->takeName(CI);
5971
5972 // Bitcast the return value back to the type of the old call.
5973 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
5974
5975 if (!CI->use_empty())
5976 CI->replaceAllUsesWith(NewRetVal);
5977 CI->eraseFromParent();
5978 }
5979
5980 if (Fn->use_empty())
5981 Fn->eraseFromParent();
5982 };
5983
5984 // Unconditionally convert a call to "clang.arc.use" to a call to
5985 // "llvm.objc.clang.arc.use".
5986 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
5987
5988 // Upgrade the retain release marker. If there is no need to upgrade
5989 // the marker, that means either the module is already new enough to contain
5990 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
5992 return;
5993
5994 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
5995 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
5996 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
5997 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
5998 {"objc_autoreleaseReturnValue",
5999 llvm::Intrinsic::objc_autoreleaseReturnValue},
6000 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6001 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6002 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6003 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6004 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6005 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6006 {"objc_release", llvm::Intrinsic::objc_release},
6007 {"objc_retain", llvm::Intrinsic::objc_retain},
6008 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6009 {"objc_retainAutoreleaseReturnValue",
6010 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6011 {"objc_retainAutoreleasedReturnValue",
6012 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6013 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6014 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6015 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6016 {"objc_unsafeClaimAutoreleasedReturnValue",
6017 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6018 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6019 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6020 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6021 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6022 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6023 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6024 {"objc_arc_annotation_topdown_bbstart",
6025 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6026 {"objc_arc_annotation_topdown_bbend",
6027 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6028 {"objc_arc_annotation_bottomup_bbstart",
6029 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6030 {"objc_arc_annotation_bottomup_bbend",
6031 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6032
6033 for (auto &I : RuntimeFuncs)
6034 UpgradeToIntrinsic(I.first, I.second);
6035}
6036
6038 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6039 if (!ModFlags)
6040 return false;
6041
6042 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6043 bool HasSwiftVersionFlag = false;
6044 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6045 uint32_t SwiftABIVersion;
6046 auto Int8Ty = Type::getInt8Ty(M.getContext());
6047 auto Int32Ty = Type::getInt32Ty(M.getContext());
6048
6049 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6050 MDNode *Op = ModFlags->getOperand(I);
6051 if (Op->getNumOperands() != 3)
6052 continue;
6053 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6054 if (!ID)
6055 continue;
6056 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6057 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6058 Type::getInt32Ty(M.getContext()), B)),
6059 MDString::get(M.getContext(), ID->getString()),
6060 Op->getOperand(2)};
6061 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6062 Changed = true;
6063 };
6064
6065 if (ID->getString() == "Objective-C Image Info Version")
6066 HasObjCFlag = true;
6067 if (ID->getString() == "Objective-C Class Properties")
6068 HasClassProperties = true;
6069 // Upgrade PIC from Error/Max to Min.
6070 if (ID->getString() == "PIC Level") {
6071 if (auto *Behavior =
6073 uint64_t V = Behavior->getLimitedValue();
6074 if (V == Module::Error || V == Module::Max)
6075 SetBehavior(Module::Min);
6076 }
6077 }
6078 // Upgrade "PIE Level" from Error to Max.
6079 if (ID->getString() == "PIE Level")
6080 if (auto *Behavior =
6082 if (Behavior->getLimitedValue() == Module::Error)
6083 SetBehavior(Module::Max);
6084
6085 // Upgrade branch protection and return address signing module flags. The
6086 // module flag behavior for these fields were Error and now they are Min.
6087 if (ID->getString() == "branch-target-enforcement" ||
6088 ID->getString().starts_with("sign-return-address")) {
6089 if (auto *Behavior =
6091 if (Behavior->getLimitedValue() == Module::Error) {
6092 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6093 Metadata *Ops[3] = {
6094 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6095 Op->getOperand(1), Op->getOperand(2)};
6096 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6097 Changed = true;
6098 }
6099 }
6100 }
6101
6102 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6103 // section name so that llvm-lto will not complain about mismatching
6104 // module flags that is functionally the same.
6105 if (ID->getString() == "Objective-C Image Info Section") {
6106 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6107 SmallVector<StringRef, 4> ValueComp;
6108 Value->getString().split(ValueComp, " ");
6109 if (ValueComp.size() != 1) {
6110 std::string NewValue;
6111 for (auto &S : ValueComp)
6112 NewValue += S.str();
6113 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6114 MDString::get(M.getContext(), NewValue)};
6115 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6116 Changed = true;
6117 }
6118 }
6119 }
6120
6121 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6122 // If the higher bits are set, it adds new module flag for swift info.
6123 if (ID->getString() == "Objective-C Garbage Collection") {
6124 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6125 if (Md) {
6126 assert(Md->getValue() && "Expected non-empty metadata");
6127 auto Type = Md->getValue()->getType();
6128 if (Type == Int8Ty)
6129 continue;
6130 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6131 if ((Val & 0xff) != Val) {
6132 HasSwiftVersionFlag = true;
6133 SwiftABIVersion = (Val & 0xff00) >> 8;
6134 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6135 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6136 }
6137 Metadata *Ops[3] = {
6139 Op->getOperand(1),
6140 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6141 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6142 Changed = true;
6143 }
6144 }
6145
6146 if (ID->getString() == "amdgpu_code_object_version") {
6147 Metadata *Ops[3] = {
6148 Op->getOperand(0),
6149 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6150 Op->getOperand(2)};
6151 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6152 Changed = true;
6153 }
6154 }
6155
6156 // "Objective-C Class Properties" is recently added for Objective-C. We
6157 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6158 // flag of value 0, so we can correclty downgrade this flag when trying to
6159 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6160 // this module flag.
6161 if (HasObjCFlag && !HasClassProperties) {
6162 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6163 (uint32_t)0);
6164 Changed = true;
6165 }
6166
6167 if (HasSwiftVersionFlag) {
6168 M.addModuleFlag(Module::Error, "Swift ABI Version",
6169 SwiftABIVersion);
6170 M.addModuleFlag(Module::Error, "Swift Major Version",
6171 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6172 M.addModuleFlag(Module::Error, "Swift Minor Version",
6173 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6174 Changed = true;
6175 }
6176
6177 return Changed;
6178}
6179
6181 auto TrimSpaces = [](StringRef Section) -> std::string {
6182 SmallVector<StringRef, 5> Components;
6183 Section.split(Components, ',');
6184
6185 SmallString<32> Buffer;
6186 raw_svector_ostream OS(Buffer);
6187
6188 for (auto Component : Components)
6189 OS << ',' << Component.trim();
6190
6191 return std::string(OS.str().substr(1));
6192 };
6193
6194 for (auto &GV : M.globals()) {
6195 if (!GV.hasSection())
6196 continue;
6197
6198 StringRef Section = GV.getSection();
6199
6200 if (!Section.starts_with("__DATA, __objc_catlist"))
6201 continue;
6202
6203 // __DATA, __objc_catlist, regular, no_dead_strip
6204 // __DATA,__objc_catlist,regular,no_dead_strip
6205 GV.setSection(TrimSpaces(Section));
6206 }
6207}
6208
6209namespace {
6210// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6211// callsites within a function that did not also have the strictfp attribute.
6212// Since 10.0, if strict FP semantics are needed within a function, the
6213// function must have the strictfp attribute and all calls within the function
6214// must also have the strictfp attribute. This latter restriction is
6215// necessary to prevent unwanted libcall simplification when a function is
6216// being cloned (such as for inlining).
6217//
6218// The "dangling" strictfp attribute usage was only used to prevent constant
6219// folding and other libcall simplification. The nobuiltin attribute on the
6220// callsite has the same effect.
6221struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6222 StrictFPUpgradeVisitor() = default;
6223
6224 void visitCallBase(CallBase &Call) {
6225 if (!Call.isStrictFP())
6226 return;
6228 return;
6229 // If we get here, the caller doesn't have the strictfp attribute
6230 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6231 Call.removeFnAttr(Attribute::StrictFP);
6232 Call.addFnAttr(Attribute::NoBuiltin);
6233 }
6234};
6235
6236/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6237struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6238 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6239 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6240
6241 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6242 if (!RMW.isFloatingPointOperation())
6243 return;
6244
6245 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6246 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6247 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6248 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6249 }
6250};
6251} // namespace
6252
6254 // If a function definition doesn't have the strictfp attribute,
6255 // convert any callsite strictfp attributes to nobuiltin.
6256 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6257 StrictFPUpgradeVisitor SFPV;
6258 SFPV.visit(F);
6259 }
6260
6261 // Remove all incompatibile attributes from function.
6262 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6263 F.getReturnType(), F.getAttributes().getRetAttrs()));
6264 for (auto &Arg : F.args())
6265 Arg.removeAttrs(
6266 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6267
6268 // Older versions of LLVM treated an "implicit-section-name" attribute
6269 // similarly to directly setting the section on a Function.
6270 if (Attribute A = F.getFnAttribute("implicit-section-name");
6271 A.isValid() && A.isStringAttribute()) {
6272 F.setSection(A.getValueAsString());
6273 F.removeFnAttr("implicit-section-name");
6274 }
6275
6276 if (!F.empty()) {
6277 // For some reason this is called twice, and the first time is before any
6278 // instructions are loaded into the body.
6279
6280 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6281 A.isValid()) {
6282
6283 if (A.getValueAsBool()) {
6284 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6285 Visitor.visit(F);
6286 }
6287
6288 // We will leave behind dead attribute uses on external declarations, but
6289 // clang never added these to declarations anyway.
6290 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6291 }
6292 }
6293}
6294
6295// Check if the function attribute is not present and set it.
6297 StringRef Value) {
6298 if (!F.hasFnAttribute(FnAttrName))
6299 F.addFnAttr(FnAttrName, Value);
6300}
6301
6302// Check if the function attribute is not present and set it if needed.
6303// If the attribute is "false" then removes it.
6304// If the attribute is "true" resets it to a valueless attribute.
6305static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6306 if (!F.hasFnAttribute(FnAttrName)) {
6307 if (Set)
6308 F.addFnAttr(FnAttrName);
6309 } else {
6310 auto A = F.getFnAttribute(FnAttrName);
6311 if ("false" == A.getValueAsString())
6312 F.removeFnAttr(FnAttrName);
6313 else if ("true" == A.getValueAsString()) {
6314 F.removeFnAttr(FnAttrName);
6315 F.addFnAttr(FnAttrName);
6316 }
6317 }
6318}
6319
6321 Triple T(M.getTargetTriple());
6322 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6323 return;
6324
6325 uint64_t BTEValue = 0;
6326 uint64_t BPPLRValue = 0;
6327 uint64_t GCSValue = 0;
6328 uint64_t SRAValue = 0;
6329 uint64_t SRAALLValue = 0;
6330 uint64_t SRABKeyValue = 0;
6331
6332 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6333 if (ModFlags) {
6334 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6335 MDNode *Op = ModFlags->getOperand(I);
6336 if (Op->getNumOperands() != 3)
6337 continue;
6338
6339 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6340 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6341 if (!ID || !CI)
6342 continue;
6343
6344 StringRef IDStr = ID->getString();
6345 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6346 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6347 : IDStr == "guarded-control-stack" ? &GCSValue
6348 : IDStr == "sign-return-address" ? &SRAValue
6349 : IDStr == "sign-return-address-all" ? &SRAALLValue
6350 : IDStr == "sign-return-address-with-bkey"
6351 ? &SRABKeyValue
6352 : nullptr;
6353 if (!ValPtr)
6354 continue;
6355
6356 *ValPtr = CI->getZExtValue();
6357 if (*ValPtr == 2)
6358 return;
6359 }
6360 }
6361
6362 bool BTE = BTEValue == 1;
6363 bool BPPLR = BPPLRValue == 1;
6364 bool GCS = GCSValue == 1;
6365 bool SRA = SRAValue == 1;
6366
6367 StringRef SignTypeValue = "non-leaf";
6368 if (SRA && SRAALLValue == 1)
6369 SignTypeValue = "all";
6370
6371 StringRef SignKeyValue = "a_key";
6372 if (SRA && SRABKeyValue == 1)
6373 SignKeyValue = "b_key";
6374
6375 for (Function &F : M.getFunctionList()) {
6376 if (F.isDeclaration())
6377 continue;
6378
6379 if (SRA) {
6380 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6381 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6382 } else {
6383 if (auto A = F.getFnAttribute("sign-return-address");
6384 A.isValid() && "none" == A.getValueAsString()) {
6385 F.removeFnAttr("sign-return-address");
6386 F.removeFnAttr("sign-return-address-key");
6387 }
6388 }
6389 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6390 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6391 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6392 }
6393
6394 if (BTE)
6395 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6396 if (BPPLR)
6397 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6398 if (GCS)
6399 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6400 if (SRA) {
6401 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6402 if (SRAALLValue == 1)
6403 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6404 if (SRABKeyValue == 1)
6405 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6406 }
6407}
6408
6409static bool isOldLoopArgument(Metadata *MD) {
6410 auto *T = dyn_cast_or_null<MDTuple>(MD);
6411 if (!T)
6412 return false;
6413 if (T->getNumOperands() < 1)
6414 return false;
6415 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6416 if (!S)
6417 return false;
6418 return S->getString().starts_with("llvm.vectorizer.");
6419}
6420
6422 StringRef OldPrefix = "llvm.vectorizer.";
6423 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6424
6425 if (OldTag == "llvm.vectorizer.unroll")
6426 return MDString::get(C, "llvm.loop.interleave.count");
6427
6428 return MDString::get(
6429 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6430 .str());
6431}
6432
6434 auto *T = dyn_cast_or_null<MDTuple>(MD);
6435 if (!T)
6436 return MD;
6437 if (T->getNumOperands() < 1)
6438 return MD;
6439 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6440 if (!OldTag)
6441 return MD;
6442 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6443 return MD;
6444
6445 // This has an old tag. Upgrade it.
6447 Ops.reserve(T->getNumOperands());
6448 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6449 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6450 Ops.push_back(T->getOperand(I));
6451
6452 return MDTuple::get(T->getContext(), Ops);
6453}
6454
6456 auto *T = dyn_cast<MDTuple>(&N);
6457 if (!T)
6458 return &N;
6459
6460 if (none_of(T->operands(), isOldLoopArgument))
6461 return &N;
6462
6464 Ops.reserve(T->getNumOperands());
6465 for (Metadata *MD : T->operands())
6466 Ops.push_back(upgradeLoopArgument(MD));
6467
6468 return MDTuple::get(T->getContext(), Ops);
6469}
6470
6472 Triple T(TT);
6473 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6474 // the address space of globals to 1. This does not apply to SPIRV Logical.
6475 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6476 !DL.contains("-G") && !DL.starts_with("G")) {
6477 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6478 }
6479
6480 if (T.isLoongArch64() || T.isRISCV64()) {
6481 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6482 auto I = DL.find("-n64-");
6483 if (I != StringRef::npos)
6484 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6485 return DL.str();
6486 }
6487
6488 // AMDGPU data layout upgrades.
6489 std::string Res = DL.str();
6490 if (T.isAMDGPU()) {
6491 // Define address spaces for constants.
6492 if (!DL.contains("-G") && !DL.starts_with("G"))
6493 Res.append(Res.empty() ? "G1" : "-G1");
6494
6495 // AMDGCN data layout upgrades.
6496 if (T.isAMDGCN()) {
6497
6498 // Add missing non-integral declarations.
6499 // This goes before adding new address spaces to prevent incoherent string
6500 // values.
6501 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6502 Res.append("-ni:7:8:9");
6503 // Update ni:7 to ni:7:8:9.
6504 if (DL.ends_with("ni:7"))
6505 Res.append(":8:9");
6506 if (DL.ends_with("ni:7:8"))
6507 Res.append(":9");
6508
6509 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6510 // resources) An empty data layout has already been upgraded to G1 by now.
6511 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6512 Res.append("-p7:160:256:256:32");
6513 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6514 Res.append("-p8:128:128:128:48");
6515 constexpr StringRef OldP8("-p8:128:128-");
6516 if (DL.contains(OldP8))
6517 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6518 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6519 Res.append("-p9:192:256:256:32");
6520 }
6521
6522 // Upgrade the ELF mangling mode.
6523 if (!DL.contains("m:e"))
6524 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6525
6526 return Res;
6527 }
6528
6529 if (T.isSystemZ() && !DL.empty()) {
6530 // Make sure the stack alignment is present.
6531 if (!DL.contains("-S64"))
6532 return "E-S64" + DL.drop_front(1).str();
6533 return DL.str();
6534 }
6535
6536 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6537 // If the datalayout matches the expected format, add pointer size address
6538 // spaces to the datalayout.
6539 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6540 if (!DL.contains(AddrSpaces)) {
6542 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6543 if (R.match(Res, &Groups))
6544 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6545 }
6546 };
6547
6548 // AArch64 data layout upgrades.
6549 if (T.isAArch64()) {
6550 // Add "-Fn32"
6551 if (!DL.empty() && !DL.contains("-Fn32"))
6552 Res.append("-Fn32");
6553 AddPtr32Ptr64AddrSpaces();
6554 return Res;
6555 }
6556
6557 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6558 T.isWasm()) {
6559 // Mips64 with o32 ABI did not add "-i128:128".
6560 // Add "-i128:128"
6561 std::string I64 = "-i64:64";
6562 std::string I128 = "-i128:128";
6563 if (!StringRef(Res).contains(I128)) {
6564 size_t Pos = Res.find(I64);
6565 if (Pos != size_t(-1))
6566 Res.insert(Pos + I64.size(), I128);
6567 }
6568 }
6569
6570 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6571 size_t Pos = Res.find("-S128");
6572 if (Pos == StringRef::npos)
6573 Pos = Res.size();
6574 Res.insert(Pos, "-f64:32:64");
6575 }
6576
6577 if (!T.isX86())
6578 return Res;
6579
6580 AddPtr32Ptr64AddrSpaces();
6581
6582 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6583 // for i128 operations prior to this being reflected in the data layout, and
6584 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6585 // boundaries, so although this is a breaking change, the upgrade is expected
6586 // to fix more IR than it breaks.
6587 // Intel MCU is an exception and uses 4-byte-alignment.
6588 if (!T.isOSIAMCU()) {
6589 std::string I128 = "-i128:128";
6590 if (StringRef Ref = Res; !Ref.contains(I128)) {
6592 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6593 if (R.match(Res, &Groups))
6594 Res = (Groups[1] + I128 + Groups[3]).str();
6595 }
6596 }
6597
6598 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6599 // Raising the alignment is safe because Clang did not produce f80 values in
6600 // the MSVC environment before this upgrade was added.
6601 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6602 StringRef Ref = Res;
6603 auto I = Ref.find("-f80:32-");
6604 if (I != StringRef::npos)
6605 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6606 }
6607
6608 return Res;
6609}
6610
6611void llvm::UpgradeAttributes(AttrBuilder &B) {
6612 StringRef FramePointer;
6613 Attribute A = B.getAttribute("no-frame-pointer-elim");
6614 if (A.isValid()) {
6615 // The value can be "true" or "false".
6616 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6617 B.removeAttribute("no-frame-pointer-elim");
6618 }
6619 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6620 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6621 if (FramePointer != "all")
6622 FramePointer = "non-leaf";
6623 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6624 }
6625 if (!FramePointer.empty())
6626 B.addAttribute("frame-pointer", FramePointer);
6627
6628 A = B.getAttribute("null-pointer-is-valid");
6629 if (A.isValid()) {
6630 // The value can be "true" or "false".
6631 bool NullPointerIsValid = A.getValueAsString() == "true";
6632 B.removeAttribute("null-pointer-is-valid");
6633 if (NullPointerIsValid)
6634 B.addAttribute(Attribute::NullPointerIsValid);
6635 }
6636}
6637
6638void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6639 // clang.arc.attachedcall bundles are now required to have an operand.
6640 // If they don't, it's okay to drop them entirely: when there is an operand,
6641 // the "attachedcall" is meaningful and required, but without an operand,
6642 // it's just a marker NOP. Dropping it merely prevents an optimization.
6643 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6644 return OBD.getTag() == "clang.arc.attachedcall" &&
6645 OBD.inputs().empty();
6646 });
6647}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2762
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:826
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106