LLVM 23.0.0git
AutoUpgrade.cpp
Go to the documentation of this file.
1//===-- AutoUpgrade.cpp - Implement auto-upgrade helper functions ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the auto-upgrade helper functions.
10// This is where deprecated IR intrinsics and other IR features are updated to
11// current specifications.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/IR/AutoUpgrade.h"
16#include "llvm/ADT/ArrayRef.h"
18#include "llvm/ADT/StringRef.h"
22#include "llvm/IR/Attributes.h"
23#include "llvm/IR/CallingConv.h"
24#include "llvm/IR/Constants.h"
25#include "llvm/IR/DebugInfo.h"
28#include "llvm/IR/Function.h"
29#include "llvm/IR/IRBuilder.h"
30#include "llvm/IR/InstVisitor.h"
31#include "llvm/IR/Instruction.h"
33#include "llvm/IR/Intrinsics.h"
34#include "llvm/IR/IntrinsicsAArch64.h"
35#include "llvm/IR/IntrinsicsAMDGPU.h"
36#include "llvm/IR/IntrinsicsARM.h"
37#include "llvm/IR/IntrinsicsNVPTX.h"
38#include "llvm/IR/IntrinsicsRISCV.h"
39#include "llvm/IR/IntrinsicsWebAssembly.h"
40#include "llvm/IR/IntrinsicsX86.h"
41#include "llvm/IR/LLVMContext.h"
42#include "llvm/IR/MDBuilder.h"
43#include "llvm/IR/Metadata.h"
44#include "llvm/IR/Module.h"
45#include "llvm/IR/Value.h"
46#include "llvm/IR/Verifier.h"
51#include "llvm/Support/Regex.h"
54#include <cstdint>
55#include <cstring>
56#include <numeric>
57
58using namespace llvm;
59
60static cl::opt<bool>
61 DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info",
62 cl::desc("Disable autoupgrade of debug info"));
63
64static void rename(GlobalValue *GV) { GV->setName(GV->getName() + ".old"); }
65
66// Upgrade the declarations of the SSE4.1 ptest intrinsics whose arguments have
67// changed their type from v4f32 to v2i64.
69 Function *&NewFn) {
70 // Check whether this is an old version of the function, which received
71 // v4f32 arguments.
72 Type *Arg0Type = F->getFunctionType()->getParamType(0);
73 if (Arg0Type != FixedVectorType::get(Type::getFloatTy(F->getContext()), 4))
74 return false;
75
76 // Yes, it's old, replace it with new version.
77 rename(F);
78 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
79 return true;
80}
81
82// Upgrade the declarations of intrinsic functions whose 8-bit immediate mask
83// arguments have changed their type from i32 to i8.
85 Function *&NewFn) {
86 // Check that the last argument is an i32.
87 Type *LastArgType = F->getFunctionType()->getParamType(
88 F->getFunctionType()->getNumParams() - 1);
89 if (!LastArgType->isIntegerTy(32))
90 return false;
91
92 // Move this function aside and map down.
93 rename(F);
94 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
95 return true;
96}
97
98// Upgrade the declaration of fp compare intrinsics that change return type
99// from scalar to vXi1 mask.
101 Function *&NewFn) {
102 // Check if the return type is a vector.
103 if (F->getReturnType()->isVectorTy())
104 return false;
105
106 rename(F);
107 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
108 return true;
109}
110
111// Upgrade the declaration of multiply and add bytes intrinsics whose input
112// arguments' types have changed from vectors of i32 to vectors of i8
114 Function *&NewFn) {
115 // check if input argument type is a vector of i8
116 Type *Arg1Type = F->getFunctionType()->getParamType(1);
117 Type *Arg2Type = F->getFunctionType()->getParamType(2);
118 if (Arg1Type->isVectorTy() &&
119 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(8) &&
120 Arg2Type->isVectorTy() &&
121 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(8))
122 return false;
123
124 rename(F);
125 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
126 return true;
127}
128
129// Upgrade the declaration of multipy and add words intrinsics whose input
130// arguments' types have changed to vectors of i32 to vectors of i16
132 Function *&NewFn) {
133 // check if input argument type is a vector of i16
134 Type *Arg1Type = F->getFunctionType()->getParamType(1);
135 Type *Arg2Type = F->getFunctionType()->getParamType(2);
136 if (Arg1Type->isVectorTy() &&
137 cast<VectorType>(Arg1Type)->getElementType()->isIntegerTy(16) &&
138 Arg2Type->isVectorTy() &&
139 cast<VectorType>(Arg2Type)->getElementType()->isIntegerTy(16))
140 return false;
141
142 rename(F);
143 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
144 return true;
145}
146
148 Function *&NewFn) {
149 if (F->getReturnType()->getScalarType()->isBFloatTy())
150 return false;
151
152 rename(F);
153 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
154 return true;
155}
156
158 Function *&NewFn) {
159 if (F->getFunctionType()->getParamType(1)->getScalarType()->isBFloatTy())
160 return false;
161
162 rename(F);
163 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
164 return true;
165}
166
168 // All of the intrinsics matches below should be marked with which llvm
169 // version started autoupgrading them. At some point in the future we would
170 // like to use this information to remove upgrade code for some older
171 // intrinsics. It is currently undecided how we will determine that future
172 // point.
173 if (Name.consume_front("avx."))
174 return (Name.starts_with("blend.p") || // Added in 3.7
175 Name == "cvt.ps2.pd.256" || // Added in 3.9
176 Name == "cvtdq2.pd.256" || // Added in 3.9
177 Name == "cvtdq2.ps.256" || // Added in 7.0
178 Name.starts_with("movnt.") || // Added in 3.2
179 Name.starts_with("sqrt.p") || // Added in 7.0
180 Name.starts_with("storeu.") || // Added in 3.9
181 Name.starts_with("vbroadcast.s") || // Added in 3.5
182 Name.starts_with("vbroadcastf128") || // Added in 4.0
183 Name.starts_with("vextractf128.") || // Added in 3.7
184 Name.starts_with("vinsertf128.") || // Added in 3.7
185 Name.starts_with("vperm2f128.") || // Added in 6.0
186 Name.starts_with("vpermil.")); // Added in 3.1
187
188 if (Name.consume_front("avx2."))
189 return (Name == "movntdqa" || // Added in 5.0
190 Name.starts_with("pabs.") || // Added in 6.0
191 Name.starts_with("padds.") || // Added in 8.0
192 Name.starts_with("paddus.") || // Added in 8.0
193 Name.starts_with("pblendd.") || // Added in 3.7
194 Name == "pblendw" || // Added in 3.7
195 Name.starts_with("pbroadcast") || // Added in 3.8
196 Name.starts_with("pcmpeq.") || // Added in 3.1
197 Name.starts_with("pcmpgt.") || // Added in 3.1
198 Name.starts_with("pmax") || // Added in 3.9
199 Name.starts_with("pmin") || // Added in 3.9
200 Name.starts_with("pmovsx") || // Added in 3.9
201 Name.starts_with("pmovzx") || // Added in 3.9
202 Name == "pmul.dq" || // Added in 7.0
203 Name == "pmulu.dq" || // Added in 7.0
204 Name.starts_with("psll.dq") || // Added in 3.7
205 Name.starts_with("psrl.dq") || // Added in 3.7
206 Name.starts_with("psubs.") || // Added in 8.0
207 Name.starts_with("psubus.") || // Added in 8.0
208 Name.starts_with("vbroadcast") || // Added in 3.8
209 Name == "vbroadcasti128" || // Added in 3.7
210 Name == "vextracti128" || // Added in 3.7
211 Name == "vinserti128" || // Added in 3.7
212 Name == "vperm2i128"); // Added in 6.0
213
214 if (Name.consume_front("avx512.")) {
215 if (Name.consume_front("mask."))
216 // 'avx512.mask.*'
217 return (Name.starts_with("add.p") || // Added in 7.0. 128/256 in 4.0
218 Name.starts_with("and.") || // Added in 3.9
219 Name.starts_with("andn.") || // Added in 3.9
220 Name.starts_with("broadcast.s") || // Added in 3.9
221 Name.starts_with("broadcastf32x4.") || // Added in 6.0
222 Name.starts_with("broadcastf32x8.") || // Added in 6.0
223 Name.starts_with("broadcastf64x2.") || // Added in 6.0
224 Name.starts_with("broadcastf64x4.") || // Added in 6.0
225 Name.starts_with("broadcasti32x4.") || // Added in 6.0
226 Name.starts_with("broadcasti32x8.") || // Added in 6.0
227 Name.starts_with("broadcasti64x2.") || // Added in 6.0
228 Name.starts_with("broadcasti64x4.") || // Added in 6.0
229 Name.starts_with("cmp.b") || // Added in 5.0
230 Name.starts_with("cmp.d") || // Added in 5.0
231 Name.starts_with("cmp.q") || // Added in 5.0
232 Name.starts_with("cmp.w") || // Added in 5.0
233 Name.starts_with("compress.b") || // Added in 9.0
234 Name.starts_with("compress.d") || // Added in 9.0
235 Name.starts_with("compress.p") || // Added in 9.0
236 Name.starts_with("compress.q") || // Added in 9.0
237 Name.starts_with("compress.store.") || // Added in 7.0
238 Name.starts_with("compress.w") || // Added in 9.0
239 Name.starts_with("conflict.") || // Added in 9.0
240 Name.starts_with("cvtdq2pd.") || // Added in 4.0
241 Name.starts_with("cvtdq2ps.") || // Added in 7.0 updated 9.0
242 Name == "cvtpd2dq.256" || // Added in 7.0
243 Name == "cvtpd2ps.256" || // Added in 7.0
244 Name == "cvtps2pd.128" || // Added in 7.0
245 Name == "cvtps2pd.256" || // Added in 7.0
246 Name.starts_with("cvtqq2pd.") || // Added in 7.0 updated 9.0
247 Name == "cvtqq2ps.256" || // Added in 9.0
248 Name == "cvtqq2ps.512" || // Added in 9.0
249 Name == "cvttpd2dq.256" || // Added in 7.0
250 Name == "cvttps2dq.128" || // Added in 7.0
251 Name == "cvttps2dq.256" || // Added in 7.0
252 Name.starts_with("cvtudq2pd.") || // Added in 4.0
253 Name.starts_with("cvtudq2ps.") || // Added in 7.0 updated 9.0
254 Name.starts_with("cvtuqq2pd.") || // Added in 7.0 updated 9.0
255 Name == "cvtuqq2ps.256" || // Added in 9.0
256 Name == "cvtuqq2ps.512" || // Added in 9.0
257 Name.starts_with("dbpsadbw.") || // Added in 7.0
258 Name.starts_with("div.p") || // Added in 7.0. 128/256 in 4.0
259 Name.starts_with("expand.b") || // Added in 9.0
260 Name.starts_with("expand.d") || // Added in 9.0
261 Name.starts_with("expand.load.") || // Added in 7.0
262 Name.starts_with("expand.p") || // Added in 9.0
263 Name.starts_with("expand.q") || // Added in 9.0
264 Name.starts_with("expand.w") || // Added in 9.0
265 Name.starts_with("fpclass.p") || // Added in 7.0
266 Name.starts_with("insert") || // Added in 4.0
267 Name.starts_with("load.") || // Added in 3.9
268 Name.starts_with("loadu.") || // Added in 3.9
269 Name.starts_with("lzcnt.") || // Added in 5.0
270 Name.starts_with("max.p") || // Added in 7.0. 128/256 in 5.0
271 Name.starts_with("min.p") || // Added in 7.0. 128/256 in 5.0
272 Name.starts_with("movddup") || // Added in 3.9
273 Name.starts_with("move.s") || // Added in 4.0
274 Name.starts_with("movshdup") || // Added in 3.9
275 Name.starts_with("movsldup") || // Added in 3.9
276 Name.starts_with("mul.p") || // Added in 7.0. 128/256 in 4.0
277 Name.starts_with("or.") || // Added in 3.9
278 Name.starts_with("pabs.") || // Added in 6.0
279 Name.starts_with("packssdw.") || // Added in 5.0
280 Name.starts_with("packsswb.") || // Added in 5.0
281 Name.starts_with("packusdw.") || // Added in 5.0
282 Name.starts_with("packuswb.") || // Added in 5.0
283 Name.starts_with("padd.") || // Added in 4.0
284 Name.starts_with("padds.") || // Added in 8.0
285 Name.starts_with("paddus.") || // Added in 8.0
286 Name.starts_with("palignr.") || // Added in 3.9
287 Name.starts_with("pand.") || // Added in 3.9
288 Name.starts_with("pandn.") || // Added in 3.9
289 Name.starts_with("pavg") || // Added in 6.0
290 Name.starts_with("pbroadcast") || // Added in 6.0
291 Name.starts_with("pcmpeq.") || // Added in 3.9
292 Name.starts_with("pcmpgt.") || // Added in 3.9
293 Name.starts_with("perm.df.") || // Added in 3.9
294 Name.starts_with("perm.di.") || // Added in 3.9
295 Name.starts_with("permvar.") || // Added in 7.0
296 Name.starts_with("pmaddubs.w.") || // Added in 7.0
297 Name.starts_with("pmaddw.d.") || // Added in 7.0
298 Name.starts_with("pmax") || // Added in 4.0
299 Name.starts_with("pmin") || // Added in 4.0
300 Name == "pmov.qd.256" || // Added in 9.0
301 Name == "pmov.qd.512" || // Added in 9.0
302 Name == "pmov.wb.256" || // Added in 9.0
303 Name == "pmov.wb.512" || // Added in 9.0
304 Name.starts_with("pmovsx") || // Added in 4.0
305 Name.starts_with("pmovzx") || // Added in 4.0
306 Name.starts_with("pmul.dq.") || // Added in 4.0
307 Name.starts_with("pmul.hr.sw.") || // Added in 7.0
308 Name.starts_with("pmulh.w.") || // Added in 7.0
309 Name.starts_with("pmulhu.w.") || // Added in 7.0
310 Name.starts_with("pmull.") || // Added in 4.0
311 Name.starts_with("pmultishift.qb.") || // Added in 8.0
312 Name.starts_with("pmulu.dq.") || // Added in 4.0
313 Name.starts_with("por.") || // Added in 3.9
314 Name.starts_with("prol.") || // Added in 8.0
315 Name.starts_with("prolv.") || // Added in 8.0
316 Name.starts_with("pror.") || // Added in 8.0
317 Name.starts_with("prorv.") || // Added in 8.0
318 Name.starts_with("pshuf.b.") || // Added in 4.0
319 Name.starts_with("pshuf.d.") || // Added in 3.9
320 Name.starts_with("pshufh.w.") || // Added in 3.9
321 Name.starts_with("pshufl.w.") || // Added in 3.9
322 Name.starts_with("psll.d") || // Added in 4.0
323 Name.starts_with("psll.q") || // Added in 4.0
324 Name.starts_with("psll.w") || // Added in 4.0
325 Name.starts_with("pslli") || // Added in 4.0
326 Name.starts_with("psllv") || // Added in 4.0
327 Name.starts_with("psra.d") || // Added in 4.0
328 Name.starts_with("psra.q") || // Added in 4.0
329 Name.starts_with("psra.w") || // Added in 4.0
330 Name.starts_with("psrai") || // Added in 4.0
331 Name.starts_with("psrav") || // Added in 4.0
332 Name.starts_with("psrl.d") || // Added in 4.0
333 Name.starts_with("psrl.q") || // Added in 4.0
334 Name.starts_with("psrl.w") || // Added in 4.0
335 Name.starts_with("psrli") || // Added in 4.0
336 Name.starts_with("psrlv") || // Added in 4.0
337 Name.starts_with("psub.") || // Added in 4.0
338 Name.starts_with("psubs.") || // Added in 8.0
339 Name.starts_with("psubus.") || // Added in 8.0
340 Name.starts_with("pternlog.") || // Added in 7.0
341 Name.starts_with("punpckh") || // Added in 3.9
342 Name.starts_with("punpckl") || // Added in 3.9
343 Name.starts_with("pxor.") || // Added in 3.9
344 Name.starts_with("shuf.f") || // Added in 6.0
345 Name.starts_with("shuf.i") || // Added in 6.0
346 Name.starts_with("shuf.p") || // Added in 4.0
347 Name.starts_with("sqrt.p") || // Added in 7.0
348 Name.starts_with("store.b.") || // Added in 3.9
349 Name.starts_with("store.d.") || // Added in 3.9
350 Name.starts_with("store.p") || // Added in 3.9
351 Name.starts_with("store.q.") || // Added in 3.9
352 Name.starts_with("store.w.") || // Added in 3.9
353 Name == "store.ss" || // Added in 7.0
354 Name.starts_with("storeu.") || // Added in 3.9
355 Name.starts_with("sub.p") || // Added in 7.0. 128/256 in 4.0
356 Name.starts_with("ucmp.") || // Added in 5.0
357 Name.starts_with("unpckh.") || // Added in 3.9
358 Name.starts_with("unpckl.") || // Added in 3.9
359 Name.starts_with("valign.") || // Added in 4.0
360 Name == "vcvtph2ps.128" || // Added in 11.0
361 Name == "vcvtph2ps.256" || // Added in 11.0
362 Name.starts_with("vextract") || // Added in 4.0
363 Name.starts_with("vfmadd.") || // Added in 7.0
364 Name.starts_with("vfmaddsub.") || // Added in 7.0
365 Name.starts_with("vfnmadd.") || // Added in 7.0
366 Name.starts_with("vfnmsub.") || // Added in 7.0
367 Name.starts_with("vpdpbusd.") || // Added in 7.0
368 Name.starts_with("vpdpbusds.") || // Added in 7.0
369 Name.starts_with("vpdpwssd.") || // Added in 7.0
370 Name.starts_with("vpdpwssds.") || // Added in 7.0
371 Name.starts_with("vpermi2var.") || // Added in 7.0
372 Name.starts_with("vpermil.p") || // Added in 3.9
373 Name.starts_with("vpermilvar.") || // Added in 4.0
374 Name.starts_with("vpermt2var.") || // Added in 7.0
375 Name.starts_with("vpmadd52") || // Added in 7.0
376 Name.starts_with("vpshld.") || // Added in 7.0
377 Name.starts_with("vpshldv.") || // Added in 8.0
378 Name.starts_with("vpshrd.") || // Added in 7.0
379 Name.starts_with("vpshrdv.") || // Added in 8.0
380 Name.starts_with("vpshufbitqmb.") || // Added in 8.0
381 Name.starts_with("xor.")); // Added in 3.9
382
383 if (Name.consume_front("mask3."))
384 // 'avx512.mask3.*'
385 return (Name.starts_with("vfmadd.") || // Added in 7.0
386 Name.starts_with("vfmaddsub.") || // Added in 7.0
387 Name.starts_with("vfmsub.") || // Added in 7.0
388 Name.starts_with("vfmsubadd.") || // Added in 7.0
389 Name.starts_with("vfnmsub.")); // Added in 7.0
390
391 if (Name.consume_front("maskz."))
392 // 'avx512.maskz.*'
393 return (Name.starts_with("pternlog.") || // Added in 7.0
394 Name.starts_with("vfmadd.") || // Added in 7.0
395 Name.starts_with("vfmaddsub.") || // Added in 7.0
396 Name.starts_with("vpdpbusd.") || // Added in 7.0
397 Name.starts_with("vpdpbusds.") || // Added in 7.0
398 Name.starts_with("vpdpwssd.") || // Added in 7.0
399 Name.starts_with("vpdpwssds.") || // Added in 7.0
400 Name.starts_with("vpermt2var.") || // Added in 7.0
401 Name.starts_with("vpmadd52") || // Added in 7.0
402 Name.starts_with("vpshldv.") || // Added in 8.0
403 Name.starts_with("vpshrdv.")); // Added in 8.0
404
405 // 'avx512.*'
406 return (Name == "movntdqa" || // Added in 5.0
407 Name == "pmul.dq.512" || // Added in 7.0
408 Name == "pmulu.dq.512" || // Added in 7.0
409 Name.starts_with("broadcastm") || // Added in 6.0
410 Name.starts_with("cmp.p") || // Added in 12.0
411 Name.starts_with("cvtb2mask.") || // Added in 7.0
412 Name.starts_with("cvtd2mask.") || // Added in 7.0
413 Name.starts_with("cvtmask2") || // Added in 5.0
414 Name.starts_with("cvtq2mask.") || // Added in 7.0
415 Name == "cvtusi2sd" || // Added in 7.0
416 Name.starts_with("cvtw2mask.") || // Added in 7.0
417 Name == "kand.w" || // Added in 7.0
418 Name == "kandn.w" || // Added in 7.0
419 Name == "knot.w" || // Added in 7.0
420 Name == "kor.w" || // Added in 7.0
421 Name == "kortestc.w" || // Added in 7.0
422 Name == "kortestz.w" || // Added in 7.0
423 Name.starts_with("kunpck") || // added in 6.0
424 Name == "kxnor.w" || // Added in 7.0
425 Name == "kxor.w" || // Added in 7.0
426 Name.starts_with("padds.") || // Added in 8.0
427 Name.starts_with("pbroadcast") || // Added in 3.9
428 Name.starts_with("prol") || // Added in 8.0
429 Name.starts_with("pror") || // Added in 8.0
430 Name.starts_with("psll.dq") || // Added in 3.9
431 Name.starts_with("psrl.dq") || // Added in 3.9
432 Name.starts_with("psubs.") || // Added in 8.0
433 Name.starts_with("ptestm") || // Added in 6.0
434 Name.starts_with("ptestnm") || // Added in 6.0
435 Name.starts_with("storent.") || // Added in 3.9
436 Name.starts_with("vbroadcast.s") || // Added in 7.0
437 Name.starts_with("vpshld.") || // Added in 8.0
438 Name.starts_with("vpshrd.")); // Added in 8.0
439 }
440
441 if (Name.consume_front("fma."))
442 return (Name.starts_with("vfmadd.") || // Added in 7.0
443 Name.starts_with("vfmsub.") || // Added in 7.0
444 Name.starts_with("vfmsubadd.") || // Added in 7.0
445 Name.starts_with("vfnmadd.") || // Added in 7.0
446 Name.starts_with("vfnmsub.")); // Added in 7.0
447
448 if (Name.consume_front("fma4."))
449 return Name.starts_with("vfmadd.s"); // Added in 7.0
450
451 if (Name.consume_front("sse."))
452 return (Name == "add.ss" || // Added in 4.0
453 Name == "cvtsi2ss" || // Added in 7.0
454 Name == "cvtsi642ss" || // Added in 7.0
455 Name == "div.ss" || // Added in 4.0
456 Name == "mul.ss" || // Added in 4.0
457 Name.starts_with("sqrt.p") || // Added in 7.0
458 Name == "sqrt.ss" || // Added in 7.0
459 Name.starts_with("storeu.") || // Added in 3.9
460 Name == "sub.ss"); // Added in 4.0
461
462 if (Name.consume_front("sse2."))
463 return (Name == "add.sd" || // Added in 4.0
464 Name == "cvtdq2pd" || // Added in 3.9
465 Name == "cvtdq2ps" || // Added in 7.0
466 Name == "cvtps2pd" || // Added in 3.9
467 Name == "cvtsi2sd" || // Added in 7.0
468 Name == "cvtsi642sd" || // Added in 7.0
469 Name == "cvtss2sd" || // Added in 7.0
470 Name == "div.sd" || // Added in 4.0
471 Name == "mul.sd" || // Added in 4.0
472 Name.starts_with("padds.") || // Added in 8.0
473 Name.starts_with("paddus.") || // Added in 8.0
474 Name.starts_with("pcmpeq.") || // Added in 3.1
475 Name.starts_with("pcmpgt.") || // Added in 3.1
476 Name == "pmaxs.w" || // Added in 3.9
477 Name == "pmaxu.b" || // Added in 3.9
478 Name == "pmins.w" || // Added in 3.9
479 Name == "pminu.b" || // Added in 3.9
480 Name == "pmulu.dq" || // Added in 7.0
481 Name.starts_with("pshuf") || // Added in 3.9
482 Name.starts_with("psll.dq") || // Added in 3.7
483 Name.starts_with("psrl.dq") || // Added in 3.7
484 Name.starts_with("psubs.") || // Added in 8.0
485 Name.starts_with("psubus.") || // Added in 8.0
486 Name.starts_with("sqrt.p") || // Added in 7.0
487 Name == "sqrt.sd" || // Added in 7.0
488 Name == "storel.dq" || // Added in 3.9
489 Name.starts_with("storeu.") || // Added in 3.9
490 Name == "sub.sd"); // Added in 4.0
491
492 if (Name.consume_front("sse41."))
493 return (Name.starts_with("blendp") || // Added in 3.7
494 Name == "movntdqa" || // Added in 5.0
495 Name == "pblendw" || // Added in 3.7
496 Name == "pmaxsb" || // Added in 3.9
497 Name == "pmaxsd" || // Added in 3.9
498 Name == "pmaxud" || // Added in 3.9
499 Name == "pmaxuw" || // Added in 3.9
500 Name == "pminsb" || // Added in 3.9
501 Name == "pminsd" || // Added in 3.9
502 Name == "pminud" || // Added in 3.9
503 Name == "pminuw" || // Added in 3.9
504 Name.starts_with("pmovsx") || // Added in 3.8
505 Name.starts_with("pmovzx") || // Added in 3.9
506 Name == "pmuldq"); // Added in 7.0
507
508 if (Name.consume_front("sse42."))
509 return Name == "crc32.64.8"; // Added in 3.4
510
511 if (Name.consume_front("sse4a."))
512 return Name.starts_with("movnt."); // Added in 3.9
513
514 if (Name.consume_front("ssse3."))
515 return (Name == "pabs.b.128" || // Added in 6.0
516 Name == "pabs.d.128" || // Added in 6.0
517 Name == "pabs.w.128"); // Added in 6.0
518
519 if (Name.consume_front("xop."))
520 return (Name == "vpcmov" || // Added in 3.8
521 Name == "vpcmov.256" || // Added in 5.0
522 Name.starts_with("vpcom") || // Added in 3.2, Updated in 9.0
523 Name.starts_with("vprot")); // Added in 8.0
524
525 return (Name == "addcarry.u32" || // Added in 8.0
526 Name == "addcarry.u64" || // Added in 8.0
527 Name == "addcarryx.u32" || // Added in 8.0
528 Name == "addcarryx.u64" || // Added in 8.0
529 Name == "subborrow.u32" || // Added in 8.0
530 Name == "subborrow.u64" || // Added in 8.0
531 Name.starts_with("vcvtph2ps.")); // Added in 11.0
532}
533
535 Function *&NewFn) {
536 // Only handle intrinsics that start with "x86.".
537 if (!Name.consume_front("x86."))
538 return false;
539
540 if (shouldUpgradeX86Intrinsic(F, Name)) {
541 NewFn = nullptr;
542 return true;
543 }
544
545 if (Name == "rdtscp") { // Added in 8.0
546 // If this intrinsic has 0 operands, it's the new version.
547 if (F->getFunctionType()->getNumParams() == 0)
548 return false;
549
550 rename(F);
551 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
552 Intrinsic::x86_rdtscp);
553 return true;
554 }
555
557
558 // SSE4.1 ptest functions may have an old signature.
559 if (Name.consume_front("sse41.ptest")) { // Added in 3.2
561 .Case("c", Intrinsic::x86_sse41_ptestc)
562 .Case("z", Intrinsic::x86_sse41_ptestz)
563 .Case("nzc", Intrinsic::x86_sse41_ptestnzc)
566 return upgradePTESTIntrinsic(F, ID, NewFn);
567
568 return false;
569 }
570
571 // Several blend and other instructions with masks used the wrong number of
572 // bits.
573
574 // Added in 3.6
576 .Case("sse41.insertps", Intrinsic::x86_sse41_insertps)
577 .Case("sse41.dppd", Intrinsic::x86_sse41_dppd)
578 .Case("sse41.dpps", Intrinsic::x86_sse41_dpps)
579 .Case("sse41.mpsadbw", Intrinsic::x86_sse41_mpsadbw)
580 .Case("avx.dp.ps.256", Intrinsic::x86_avx_dp_ps_256)
581 .Case("avx2.mpsadbw", Intrinsic::x86_avx2_mpsadbw)
584 return upgradeX86IntrinsicsWith8BitMask(F, ID, NewFn);
585
586 if (Name.consume_front("avx512.")) {
587 if (Name.consume_front("mask.cmp.")) {
588 // Added in 7.0
590 .Case("pd.128", Intrinsic::x86_avx512_mask_cmp_pd_128)
591 .Case("pd.256", Intrinsic::x86_avx512_mask_cmp_pd_256)
592 .Case("pd.512", Intrinsic::x86_avx512_mask_cmp_pd_512)
593 .Case("ps.128", Intrinsic::x86_avx512_mask_cmp_ps_128)
594 .Case("ps.256", Intrinsic::x86_avx512_mask_cmp_ps_256)
595 .Case("ps.512", Intrinsic::x86_avx512_mask_cmp_ps_512)
598 return upgradeX86MaskedFPCompare(F, ID, NewFn);
599 } else if (Name.starts_with("vpdpbusd.") ||
600 Name.starts_with("vpdpbusds.")) {
601 // Added in 21.1
603 .Case("vpdpbusd.128", Intrinsic::x86_avx512_vpdpbusd_128)
604 .Case("vpdpbusd.256", Intrinsic::x86_avx512_vpdpbusd_256)
605 .Case("vpdpbusd.512", Intrinsic::x86_avx512_vpdpbusd_512)
606 .Case("vpdpbusds.128", Intrinsic::x86_avx512_vpdpbusds_128)
607 .Case("vpdpbusds.256", Intrinsic::x86_avx512_vpdpbusds_256)
608 .Case("vpdpbusds.512", Intrinsic::x86_avx512_vpdpbusds_512)
611 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
612 } else if (Name.starts_with("vpdpwssd.") ||
613 Name.starts_with("vpdpwssds.")) {
614 // Added in 21.1
616 .Case("vpdpwssd.128", Intrinsic::x86_avx512_vpdpwssd_128)
617 .Case("vpdpwssd.256", Intrinsic::x86_avx512_vpdpwssd_256)
618 .Case("vpdpwssd.512", Intrinsic::x86_avx512_vpdpwssd_512)
619 .Case("vpdpwssds.128", Intrinsic::x86_avx512_vpdpwssds_128)
620 .Case("vpdpwssds.256", Intrinsic::x86_avx512_vpdpwssds_256)
621 .Case("vpdpwssds.512", Intrinsic::x86_avx512_vpdpwssds_512)
624 return upgradeX86MultiplyAddWords(F, ID, NewFn);
625 }
626 return false; // No other 'x86.avx512.*'.
627 }
628
629 if (Name.consume_front("avx2.")) {
630 if (Name.consume_front("vpdpb")) {
631 // Added in 21.1
633 .Case("ssd.128", Intrinsic::x86_avx2_vpdpbssd_128)
634 .Case("ssd.256", Intrinsic::x86_avx2_vpdpbssd_256)
635 .Case("ssds.128", Intrinsic::x86_avx2_vpdpbssds_128)
636 .Case("ssds.256", Intrinsic::x86_avx2_vpdpbssds_256)
637 .Case("sud.128", Intrinsic::x86_avx2_vpdpbsud_128)
638 .Case("sud.256", Intrinsic::x86_avx2_vpdpbsud_256)
639 .Case("suds.128", Intrinsic::x86_avx2_vpdpbsuds_128)
640 .Case("suds.256", Intrinsic::x86_avx2_vpdpbsuds_256)
641 .Case("uud.128", Intrinsic::x86_avx2_vpdpbuud_128)
642 .Case("uud.256", Intrinsic::x86_avx2_vpdpbuud_256)
643 .Case("uuds.128", Intrinsic::x86_avx2_vpdpbuuds_128)
644 .Case("uuds.256", Intrinsic::x86_avx2_vpdpbuuds_256)
647 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
648 } else if (Name.consume_front("vpdpw")) {
649 // Added in 21.1
651 .Case("sud.128", Intrinsic::x86_avx2_vpdpwsud_128)
652 .Case("sud.256", Intrinsic::x86_avx2_vpdpwsud_256)
653 .Case("suds.128", Intrinsic::x86_avx2_vpdpwsuds_128)
654 .Case("suds.256", Intrinsic::x86_avx2_vpdpwsuds_256)
655 .Case("usd.128", Intrinsic::x86_avx2_vpdpwusd_128)
656 .Case("usd.256", Intrinsic::x86_avx2_vpdpwusd_256)
657 .Case("usds.128", Intrinsic::x86_avx2_vpdpwusds_128)
658 .Case("usds.256", Intrinsic::x86_avx2_vpdpwusds_256)
659 .Case("uud.128", Intrinsic::x86_avx2_vpdpwuud_128)
660 .Case("uud.256", Intrinsic::x86_avx2_vpdpwuud_256)
661 .Case("uuds.128", Intrinsic::x86_avx2_vpdpwuuds_128)
662 .Case("uuds.256", Intrinsic::x86_avx2_vpdpwuuds_256)
665 return upgradeX86MultiplyAddWords(F, ID, NewFn);
666 }
667 return false; // No other 'x86.avx2.*'
668 }
669
670 if (Name.consume_front("avx10.")) {
671 if (Name.consume_front("vpdpb")) {
672 // Added in 21.1
674 .Case("ssd.512", Intrinsic::x86_avx10_vpdpbssd_512)
675 .Case("ssds.512", Intrinsic::x86_avx10_vpdpbssds_512)
676 .Case("sud.512", Intrinsic::x86_avx10_vpdpbsud_512)
677 .Case("suds.512", Intrinsic::x86_avx10_vpdpbsuds_512)
678 .Case("uud.512", Intrinsic::x86_avx10_vpdpbuud_512)
679 .Case("uuds.512", Intrinsic::x86_avx10_vpdpbuuds_512)
682 return upgradeX86MultiplyAddBytes(F, ID, NewFn);
683 } else if (Name.consume_front("vpdpw")) {
685 .Case("sud.512", Intrinsic::x86_avx10_vpdpwsud_512)
686 .Case("suds.512", Intrinsic::x86_avx10_vpdpwsuds_512)
687 .Case("usd.512", Intrinsic::x86_avx10_vpdpwusd_512)
688 .Case("usds.512", Intrinsic::x86_avx10_vpdpwusds_512)
689 .Case("uud.512", Intrinsic::x86_avx10_vpdpwuud_512)
690 .Case("uuds.512", Intrinsic::x86_avx10_vpdpwuuds_512)
693 return upgradeX86MultiplyAddWords(F, ID, NewFn);
694 }
695 return false; // No other 'x86.avx10.*'
696 }
697
698 if (Name.consume_front("avx512bf16.")) {
699 // Added in 9.0
701 .Case("cvtne2ps2bf16.128",
702 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128)
703 .Case("cvtne2ps2bf16.256",
704 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256)
705 .Case("cvtne2ps2bf16.512",
706 Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512)
707 .Case("mask.cvtneps2bf16.128",
708 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
709 .Case("cvtneps2bf16.256",
710 Intrinsic::x86_avx512bf16_cvtneps2bf16_256)
711 .Case("cvtneps2bf16.512",
712 Intrinsic::x86_avx512bf16_cvtneps2bf16_512)
715 return upgradeX86BF16Intrinsic(F, ID, NewFn);
716
717 // Added in 9.0
719 .Case("dpbf16ps.128", Intrinsic::x86_avx512bf16_dpbf16ps_128)
720 .Case("dpbf16ps.256", Intrinsic::x86_avx512bf16_dpbf16ps_256)
721 .Case("dpbf16ps.512", Intrinsic::x86_avx512bf16_dpbf16ps_512)
724 return upgradeX86BF16DPIntrinsic(F, ID, NewFn);
725 return false; // No other 'x86.avx512bf16.*'.
726 }
727
728 if (Name.consume_front("xop.")) {
730 if (Name.starts_with("vpermil2")) { // Added in 3.9
731 // Upgrade any XOP PERMIL2 index operand still using a float/double
732 // vector.
733 auto Idx = F->getFunctionType()->getParamType(2);
734 if (Idx->isFPOrFPVectorTy()) {
735 unsigned IdxSize = Idx->getPrimitiveSizeInBits();
736 unsigned EltSize = Idx->getScalarSizeInBits();
737 if (EltSize == 64 && IdxSize == 128)
738 ID = Intrinsic::x86_xop_vpermil2pd;
739 else if (EltSize == 32 && IdxSize == 128)
740 ID = Intrinsic::x86_xop_vpermil2ps;
741 else if (EltSize == 64 && IdxSize == 256)
742 ID = Intrinsic::x86_xop_vpermil2pd_256;
743 else
744 ID = Intrinsic::x86_xop_vpermil2ps_256;
745 }
746 } else if (F->arg_size() == 2)
747 // frcz.ss/sd may need to have an argument dropped. Added in 3.2
749 .Case("vfrcz.ss", Intrinsic::x86_xop_vfrcz_ss)
750 .Case("vfrcz.sd", Intrinsic::x86_xop_vfrcz_sd)
752
754 rename(F);
755 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
756 return true;
757 }
758 return false; // No other 'x86.xop.*'
759 }
760
761 if (Name == "seh.recoverfp") {
762 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
763 Intrinsic::eh_recoverfp);
764 return true;
765 }
766
767 return false;
768}
769
770// Upgrade ARM (IsArm) or Aarch64 (!IsArm) intrinsic fns. Return true iff so.
771// IsArm: 'arm.*', !IsArm: 'aarch64.*'.
773 StringRef Name,
774 Function *&NewFn) {
775 if (Name.starts_with("rbit")) {
776 // '(arm|aarch64).rbit'.
778 F->getParent(), Intrinsic::bitreverse, F->arg_begin()->getType());
779 return true;
780 }
781
782 if (Name == "thread.pointer") {
783 // '(arm|aarch64).thread.pointer'.
785 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
786 return true;
787 }
788
789 bool Neon = Name.consume_front("neon.");
790 if (Neon) {
791 // '(arm|aarch64).neon.*'.
792 // Changed in 12.0: bfdot accept v4bf16 and v8bf16 instead of v8i8 and
793 // v16i8 respectively.
794 if (Name.consume_front("bfdot.")) {
795 // (arm|aarch64).neon.bfdot.*'.
798 .Cases({"v2f32.v8i8", "v4f32.v16i8"},
799 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfdot
800 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfdot)
803 size_t OperandWidth = F->getReturnType()->getPrimitiveSizeInBits();
804 assert((OperandWidth == 64 || OperandWidth == 128) &&
805 "Unexpected operand width");
806 LLVMContext &Ctx = F->getParent()->getContext();
807 std::array<Type *, 2> Tys{
808 {F->getReturnType(),
809 FixedVectorType::get(Type::getBFloatTy(Ctx), OperandWidth / 16)}};
810 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
811 return true;
812 }
813 return false; // No other '(arm|aarch64).neon.bfdot.*'.
814 }
815
816 // Changed in 12.0: bfmmla, bfmlalb and bfmlalt are not polymorphic
817 // anymore and accept v8bf16 instead of v16i8.
818 if (Name.consume_front("bfm")) {
819 // (arm|aarch64).neon.bfm*'.
820 if (Name.consume_back(".v4f32.v16i8")) {
821 // (arm|aarch64).neon.bfm*.v4f32.v16i8'.
824 .Case("mla",
825 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmmla
826 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmmla)
827 .Case("lalb",
828 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalb
829 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalb)
830 .Case("lalt",
831 IsArm ? (Intrinsic::ID)Intrinsic::arm_neon_bfmlalt
832 : (Intrinsic::ID)Intrinsic::aarch64_neon_bfmlalt)
835 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
836 return true;
837 }
838 return false; // No other '(arm|aarch64).neon.bfm*.v16i8'.
839 }
840 return false; // No other '(arm|aarch64).neon.bfm*.
841 }
842 // Continue on to Aarch64 Neon or Arm Neon.
843 }
844 // Continue on to Arm or Aarch64.
845
846 if (IsArm) {
847 // 'arm.*'.
848 if (Neon) {
849 // 'arm.neon.*'.
851 .StartsWith("vclz.", Intrinsic::ctlz)
852 .StartsWith("vcnt.", Intrinsic::ctpop)
853 .StartsWith("vqadds.", Intrinsic::sadd_sat)
854 .StartsWith("vqaddu.", Intrinsic::uadd_sat)
855 .StartsWith("vqsubs.", Intrinsic::ssub_sat)
856 .StartsWith("vqsubu.", Intrinsic::usub_sat)
857 .StartsWith("vrinta.", Intrinsic::round)
858 .StartsWith("vrintn.", Intrinsic::roundeven)
859 .StartsWith("vrintm.", Intrinsic::floor)
860 .StartsWith("vrintp.", Intrinsic::ceil)
861 .StartsWith("vrintx.", Intrinsic::rint)
862 .StartsWith("vrintz.", Intrinsic::trunc)
865 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
866 F->arg_begin()->getType());
867 return true;
868 }
869
870 if (Name.consume_front("vst")) {
871 // 'arm.neon.vst*'.
872 static const Regex vstRegex("^([1234]|[234]lane)\\.v[a-z0-9]*$");
874 if (vstRegex.match(Name, &Groups)) {
875 static const Intrinsic::ID StoreInts[] = {
876 Intrinsic::arm_neon_vst1, Intrinsic::arm_neon_vst2,
877 Intrinsic::arm_neon_vst3, Intrinsic::arm_neon_vst4};
878
879 static const Intrinsic::ID StoreLaneInts[] = {
880 Intrinsic::arm_neon_vst2lane, Intrinsic::arm_neon_vst3lane,
881 Intrinsic::arm_neon_vst4lane};
882
883 auto fArgs = F->getFunctionType()->params();
884 Type *Tys[] = {fArgs[0], fArgs[1]};
885 if (Groups[1].size() == 1)
887 F->getParent(), StoreInts[fArgs.size() - 3], Tys);
888 else
890 F->getParent(), StoreLaneInts[fArgs.size() - 5], Tys);
891 return true;
892 }
893 return false; // No other 'arm.neon.vst*'.
894 }
895
896 return false; // No other 'arm.neon.*'.
897 }
898
899 if (Name.consume_front("mve.")) {
900 // 'arm.mve.*'.
901 if (Name == "vctp64") {
902 if (cast<FixedVectorType>(F->getReturnType())->getNumElements() == 4) {
903 // A vctp64 returning a v4i1 is converted to return a v2i1. Rename
904 // the function and deal with it below in UpgradeIntrinsicCall.
905 rename(F);
906 return true;
907 }
908 return false; // Not 'arm.mve.vctp64'.
909 }
910
911 if (Name.starts_with("vrintn.v")) {
913 F->getParent(), Intrinsic::roundeven, F->arg_begin()->getType());
914 return true;
915 }
916
917 // These too are changed to accept a v2i1 instead of the old v4i1.
918 if (Name.consume_back(".v4i1")) {
919 // 'arm.mve.*.v4i1'.
920 if (Name.consume_back(".predicated.v2i64.v4i32"))
921 // 'arm.mve.*.predicated.v2i64.v4i32.v4i1'
922 return Name == "mull.int" || Name == "vqdmull";
923
924 if (Name.consume_back(".v2i64")) {
925 // 'arm.mve.*.v2i64.v4i1'
926 bool IsGather = Name.consume_front("vldr.gather.");
927 if (IsGather || Name.consume_front("vstr.scatter.")) {
928 if (Name.consume_front("base.")) {
929 // Optional 'wb.' prefix.
930 Name.consume_front("wb.");
931 // 'arm.mve.(vldr.gather|vstr.scatter).base.(wb.)?
932 // predicated.v2i64.v2i64.v4i1'.
933 return Name == "predicated.v2i64";
934 }
935
936 if (Name.consume_front("offset.predicated."))
937 return Name == (IsGather ? "v2i64.p0i64" : "p0i64.v2i64") ||
938 Name == (IsGather ? "v2i64.p0" : "p0.v2i64");
939
940 // No other 'arm.mve.(vldr.gather|vstr.scatter).*.v2i64.v4i1'.
941 return false;
942 }
943
944 return false; // No other 'arm.mve.*.v2i64.v4i1'.
945 }
946 return false; // No other 'arm.mve.*.v4i1'.
947 }
948 return false; // No other 'arm.mve.*'.
949 }
950
951 if (Name.consume_front("cde.vcx")) {
952 // 'arm.cde.vcx*'.
953 if (Name.consume_back(".predicated.v2i64.v4i1"))
954 // 'arm.cde.vcx*.predicated.v2i64.v4i1'.
955 return Name == "1q" || Name == "1qa" || Name == "2q" || Name == "2qa" ||
956 Name == "3q" || Name == "3qa";
957
958 return false; // No other 'arm.cde.vcx*'.
959 }
960 } else {
961 // 'aarch64.*'.
962 if (Neon) {
963 // 'aarch64.neon.*'.
965 .StartsWith("frintn", Intrinsic::roundeven)
966 .StartsWith("rbit", Intrinsic::bitreverse)
969 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
970 F->arg_begin()->getType());
971 return true;
972 }
973
974 if (Name.starts_with("addp")) {
975 // 'aarch64.neon.addp*'.
976 if (F->arg_size() != 2)
977 return false; // Invalid IR.
978 VectorType *Ty = dyn_cast<VectorType>(F->getReturnType());
979 if (Ty && Ty->getElementType()->isFloatingPointTy()) {
981 F->getParent(), Intrinsic::aarch64_neon_faddp, Ty);
982 return true;
983 }
984 }
985
986 // Changed in 20.0: bfcvt/bfcvtn/bcvtn2 have been replaced with fptrunc.
987 if (Name.starts_with("bfcvt")) {
988 NewFn = nullptr;
989 return true;
990 }
991
992 return false; // No other 'aarch64.neon.*'.
993 }
994 if (Name.consume_front("sve.")) {
995 // 'aarch64.sve.*'.
996 if (Name.consume_front("bf")) {
997 if (Name.consume_back(".lane")) {
998 // 'aarch64.sve.bf*.lane'.
1001 .Case("dot", Intrinsic::aarch64_sve_bfdot_lane_v2)
1002 .Case("mlalb", Intrinsic::aarch64_sve_bfmlalb_lane_v2)
1003 .Case("mlalt", Intrinsic::aarch64_sve_bfmlalt_lane_v2)
1006 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1007 return true;
1008 }
1009 return false; // No other 'aarch64.sve.bf*.lane'.
1010 }
1011 return false; // No other 'aarch64.sve.bf*'.
1012 }
1013
1014 // 'aarch64.sve.fcvt.bf16f32' || 'aarch64.sve.fcvtnt.bf16f32'
1015 if (Name == "fcvt.bf16f32" || Name == "fcvtnt.bf16f32") {
1016 NewFn = nullptr;
1017 return true;
1018 }
1019
1020 if (Name.consume_front("addqv")) {
1021 // 'aarch64.sve.addqv'.
1022 if (!F->getReturnType()->isFPOrFPVectorTy())
1023 return false;
1024
1025 auto Args = F->getFunctionType()->params();
1026 Type *Tys[] = {F->getReturnType(), Args[1]};
1028 F->getParent(), Intrinsic::aarch64_sve_faddqv, Tys);
1029 return true;
1030 }
1031
1032 if (Name.consume_front("ld")) {
1033 // 'aarch64.sve.ld*'.
1034 static const Regex LdRegex("^[234](.nxv[a-z0-9]+|$)");
1035 if (LdRegex.match(Name)) {
1036 Type *ScalarTy =
1037 cast<VectorType>(F->getReturnType())->getElementType();
1038 ElementCount EC =
1039 cast<VectorType>(F->arg_begin()->getType())->getElementCount();
1040 Type *Ty = VectorType::get(ScalarTy, EC);
1041 static const Intrinsic::ID LoadIDs[] = {
1042 Intrinsic::aarch64_sve_ld2_sret,
1043 Intrinsic::aarch64_sve_ld3_sret,
1044 Intrinsic::aarch64_sve_ld4_sret,
1045 };
1046 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1047 LoadIDs[Name[0] - '2'], Ty);
1048 return true;
1049 }
1050 return false; // No other 'aarch64.sve.ld*'.
1051 }
1052
1053 if (Name.consume_front("tuple.")) {
1054 // 'aarch64.sve.tuple.*'.
1055 if (Name.starts_with("get")) {
1056 // 'aarch64.sve.tuple.get*'.
1057 Type *Tys[] = {F->getReturnType(), F->arg_begin()->getType()};
1059 F->getParent(), Intrinsic::vector_extract, Tys);
1060 return true;
1061 }
1062
1063 if (Name.starts_with("set")) {
1064 // 'aarch64.sve.tuple.set*'.
1065 auto Args = F->getFunctionType()->params();
1066 Type *Tys[] = {Args[0], Args[2], Args[1]};
1068 F->getParent(), Intrinsic::vector_insert, Tys);
1069 return true;
1070 }
1071
1072 static const Regex CreateTupleRegex("^create[234](.nxv[a-z0-9]+|$)");
1073 if (CreateTupleRegex.match(Name)) {
1074 // 'aarch64.sve.tuple.create*'.
1075 auto Args = F->getFunctionType()->params();
1076 Type *Tys[] = {F->getReturnType(), Args[1]};
1078 F->getParent(), Intrinsic::vector_insert, Tys);
1079 return true;
1080 }
1081 return false; // No other 'aarch64.sve.tuple.*'.
1082 }
1083
1084 if (Name.starts_with("rev.nxv")) {
1085 // 'aarch64.sve.rev.<Ty>'
1087 F->getParent(), Intrinsic::vector_reverse, F->getReturnType());
1088 return true;
1089 }
1090
1091 return false; // No other 'aarch64.sve.*'.
1092 }
1093 }
1094 return false; // No other 'arm.*', 'aarch64.*'.
1095}
1096
1098 StringRef Name) {
1099 if (Name.consume_front("cp.async.bulk.tensor.g2s.")) {
1102 .Case("im2col.3d",
1103 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d)
1104 .Case("im2col.4d",
1105 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d)
1106 .Case("im2col.5d",
1107 Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d)
1108 .Case("tile.1d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d)
1109 .Case("tile.2d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d)
1110 .Case("tile.3d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d)
1111 .Case("tile.4d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d)
1112 .Case("tile.5d", Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d)
1114
1116 return ID;
1117
1118 // These intrinsics may need upgrade for two reasons:
1119 // (1) When the address-space of the first argument is shared[AS=3]
1120 // (and we upgrade it to use shared_cluster address-space[AS=7])
1121 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1123 return ID;
1124
1125 // (2) When there are only two boolean flag arguments at the end:
1126 //
1127 // The last three parameters of the older version of these
1128 // intrinsics are: arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag
1129 //
1130 // The newer version reads as:
1131 // arg1, arg2, .. i64 ch, i1 mc_flag, i1 ch_flag, i32 cta_group_flag
1132 //
1133 // So, when the type of the [N-3]rd argument is "not i1", then
1134 // it is the older version and we need to upgrade.
1135 size_t FlagStartIndex = F->getFunctionType()->getNumParams() - 3;
1136 Type *ArgType = F->getFunctionType()->getParamType(FlagStartIndex);
1137 if (!ArgType->isIntegerTy(1))
1138 return ID;
1139 }
1140
1142}
1143
1145 StringRef Name) {
1146 if (Name.consume_front("mapa.shared.cluster"))
1147 if (F->getReturnType()->getPointerAddressSpace() ==
1149 return Intrinsic::nvvm_mapa_shared_cluster;
1150
1151 if (Name.consume_front("cp.async.bulk.")) {
1154 .Case("global.to.shared.cluster",
1155 Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster)
1156 .Case("shared.cta.to.cluster",
1157 Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster)
1159
1161 if (F->getArg(0)->getType()->getPointerAddressSpace() ==
1163 return ID;
1164 }
1165
1167}
1168
1170 if (Name.consume_front("fma.rn."))
1171 return StringSwitch<Intrinsic::ID>(Name)
1172 .Case("bf16", Intrinsic::nvvm_fma_rn_bf16)
1173 .Case("bf16x2", Intrinsic::nvvm_fma_rn_bf16x2)
1174 .Case("relu.bf16", Intrinsic::nvvm_fma_rn_relu_bf16)
1175 .Case("relu.bf16x2", Intrinsic::nvvm_fma_rn_relu_bf16x2)
1177
1178 if (Name.consume_front("fmax."))
1179 return StringSwitch<Intrinsic::ID>(Name)
1180 .Case("bf16", Intrinsic::nvvm_fmax_bf16)
1181 .Case("bf16x2", Intrinsic::nvvm_fmax_bf16x2)
1182 .Case("ftz.bf16", Intrinsic::nvvm_fmax_ftz_bf16)
1183 .Case("ftz.bf16x2", Intrinsic::nvvm_fmax_ftz_bf16x2)
1184 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmax_ftz_nan_bf16)
1185 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmax_ftz_nan_bf16x2)
1186 .Case("ftz.nan.xorsign.abs.bf16",
1187 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16)
1188 .Case("ftz.nan.xorsign.abs.bf16x2",
1189 Intrinsic::nvvm_fmax_ftz_nan_xorsign_abs_bf16x2)
1190 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16)
1191 .Case("ftz.xorsign.abs.bf16x2",
1192 Intrinsic::nvvm_fmax_ftz_xorsign_abs_bf16x2)
1193 .Case("nan.bf16", Intrinsic::nvvm_fmax_nan_bf16)
1194 .Case("nan.bf16x2", Intrinsic::nvvm_fmax_nan_bf16x2)
1195 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16)
1196 .Case("nan.xorsign.abs.bf16x2",
1197 Intrinsic::nvvm_fmax_nan_xorsign_abs_bf16x2)
1198 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmax_xorsign_abs_bf16)
1199 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmax_xorsign_abs_bf16x2)
1201
1202 if (Name.consume_front("fmin."))
1203 return StringSwitch<Intrinsic::ID>(Name)
1204 .Case("bf16", Intrinsic::nvvm_fmin_bf16)
1205 .Case("bf16x2", Intrinsic::nvvm_fmin_bf16x2)
1206 .Case("ftz.bf16", Intrinsic::nvvm_fmin_ftz_bf16)
1207 .Case("ftz.bf16x2", Intrinsic::nvvm_fmin_ftz_bf16x2)
1208 .Case("ftz.nan.bf16", Intrinsic::nvvm_fmin_ftz_nan_bf16)
1209 .Case("ftz.nan.bf16x2", Intrinsic::nvvm_fmin_ftz_nan_bf16x2)
1210 .Case("ftz.nan.xorsign.abs.bf16",
1211 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16)
1212 .Case("ftz.nan.xorsign.abs.bf16x2",
1213 Intrinsic::nvvm_fmin_ftz_nan_xorsign_abs_bf16x2)
1214 .Case("ftz.xorsign.abs.bf16", Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16)
1215 .Case("ftz.xorsign.abs.bf16x2",
1216 Intrinsic::nvvm_fmin_ftz_xorsign_abs_bf16x2)
1217 .Case("nan.bf16", Intrinsic::nvvm_fmin_nan_bf16)
1218 .Case("nan.bf16x2", Intrinsic::nvvm_fmin_nan_bf16x2)
1219 .Case("nan.xorsign.abs.bf16", Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16)
1220 .Case("nan.xorsign.abs.bf16x2",
1221 Intrinsic::nvvm_fmin_nan_xorsign_abs_bf16x2)
1222 .Case("xorsign.abs.bf16", Intrinsic::nvvm_fmin_xorsign_abs_bf16)
1223 .Case("xorsign.abs.bf16x2", Intrinsic::nvvm_fmin_xorsign_abs_bf16x2)
1225
1226 if (Name.consume_front("neg."))
1227 return StringSwitch<Intrinsic::ID>(Name)
1228 .Case("bf16", Intrinsic::nvvm_neg_bf16)
1229 .Case("bf16x2", Intrinsic::nvvm_neg_bf16x2)
1231
1233}
1234
1236 return Name.consume_front("local") || Name.consume_front("shared") ||
1237 Name.consume_front("global") || Name.consume_front("constant") ||
1238 Name.consume_front("param");
1239}
1240
1242 const FunctionType *FuncTy) {
1243 Type *HalfTy = Type::getHalfTy(FuncTy->getContext());
1244 if (Name.starts_with("to.fp16")) {
1245 return CastInst::castIsValid(Instruction::FPTrunc, FuncTy->getParamType(0),
1246 HalfTy) &&
1247 CastInst::castIsValid(Instruction::BitCast, HalfTy,
1248 FuncTy->getReturnType());
1249 }
1250
1251 if (Name.starts_with("from.fp16")) {
1252 return CastInst::castIsValid(Instruction::BitCast, FuncTy->getParamType(0),
1253 HalfTy) &&
1254 CastInst::castIsValid(Instruction::FPExt, HalfTy,
1255 FuncTy->getReturnType());
1256 }
1257
1258 return false;
1259}
1260
1262 bool CanUpgradeDebugIntrinsicsToRecords) {
1263 assert(F && "Illegal to upgrade a non-existent Function.");
1264
1265 StringRef Name = F->getName();
1266
1267 // Quickly eliminate it, if it's not a candidate.
1268 if (!Name.consume_front("llvm.") || Name.empty())
1269 return false;
1270
1271 switch (Name[0]) {
1272 default: break;
1273 case 'a': {
1274 bool IsArm = Name.consume_front("arm.");
1275 if (IsArm || Name.consume_front("aarch64.")) {
1276 if (upgradeArmOrAarch64IntrinsicFunction(IsArm, F, Name, NewFn))
1277 return true;
1278 break;
1279 }
1280
1281 if (Name.consume_front("amdgcn.")) {
1282 if (Name == "alignbit") {
1283 // Target specific intrinsic became redundant
1285 F->getParent(), Intrinsic::fshr, {F->getReturnType()});
1286 return true;
1287 }
1288
1289 if (Name.consume_front("atomic.")) {
1290 if (Name.starts_with("inc") || Name.starts_with("dec") ||
1291 Name.starts_with("cond.sub") || Name.starts_with("csub")) {
1292 // These were replaced with atomicrmw uinc_wrap, udec_wrap, usub_cond
1293 // and usub_sat so there's no new declaration.
1294 NewFn = nullptr;
1295 return true;
1296 }
1297 break; // No other 'amdgcn.atomic.*'
1298 }
1299
1300 // Legacy wmma iu intrinsics without the optional clamp operand.
1301 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8 &&
1302 F->arg_size() == 7) {
1303 NewFn = nullptr;
1304 return true;
1305 }
1306 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8 &&
1307 F->arg_size() == 8) {
1308 NewFn = nullptr;
1309 return true;
1310 }
1311
1312 if (Name.consume_front("ds.") || Name.consume_front("global.atomic.") ||
1313 Name.consume_front("flat.atomic.")) {
1314 if (Name.starts_with("fadd") ||
1315 // FIXME: We should also remove fmin.num and fmax.num intrinsics.
1316 (Name.starts_with("fmin") && !Name.starts_with("fmin.num")) ||
1317 (Name.starts_with("fmax") && !Name.starts_with("fmax.num"))) {
1318 // Replaced with atomicrmw fadd/fmin/fmax, so there's no new
1319 // declaration.
1320 NewFn = nullptr;
1321 return true;
1322 }
1323 }
1324
1325 if (Name.starts_with("ldexp.")) {
1326 // Target specific intrinsic became redundant
1328 F->getParent(), Intrinsic::ldexp,
1329 {F->getReturnType(), F->getArg(1)->getType()});
1330 return true;
1331 }
1332 break; // No other 'amdgcn.*'
1333 }
1334
1335 break;
1336 }
1337 case 'c': {
1338 if (F->arg_size() == 1) {
1339 if (Name.consume_front("convert.")) {
1340 if (convertIntrinsicValidType(Name, F->getFunctionType())) {
1341 NewFn = nullptr;
1342 return true;
1343 }
1344 }
1345
1347 .StartsWith("ctlz.", Intrinsic::ctlz)
1348 .StartsWith("cttz.", Intrinsic::cttz)
1351 rename(F);
1352 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1353 F->arg_begin()->getType());
1354 return true;
1355 }
1356 }
1357
1358 if (F->arg_size() == 2 && Name == "coro.end") {
1359 rename(F);
1360 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1361 Intrinsic::coro_end);
1362 return true;
1363 }
1364
1365 break;
1366 }
1367 case 'd':
1368 if (Name.consume_front("dbg.")) {
1369 // Mark debug intrinsics for upgrade to new debug format.
1370 if (CanUpgradeDebugIntrinsicsToRecords) {
1371 if (Name == "addr" || Name == "value" || Name == "assign" ||
1372 Name == "declare" || Name == "label") {
1373 // There's no function to replace these with.
1374 NewFn = nullptr;
1375 // But we do want these to get upgraded.
1376 return true;
1377 }
1378 }
1379 // Update llvm.dbg.addr intrinsics even in "new debug mode"; they'll get
1380 // converted to DbgVariableRecords later.
1381 if (Name == "addr" || (Name == "value" && F->arg_size() == 4)) {
1382 rename(F);
1383 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1384 Intrinsic::dbg_value);
1385 return true;
1386 }
1387 break; // No other 'dbg.*'.
1388 }
1389 break;
1390 case 'e':
1391 if (Name.consume_front("experimental.vector.")) {
1394 // Skip over extract.last.active, otherwise it will be 'upgraded'
1395 // to a regular vector extract which is a different operation.
1396 .StartsWith("extract.last.active.", Intrinsic::not_intrinsic)
1397 .StartsWith("extract.", Intrinsic::vector_extract)
1398 .StartsWith("insert.", Intrinsic::vector_insert)
1399 .StartsWith("reverse.", Intrinsic::vector_reverse)
1400 .StartsWith("interleave2.", Intrinsic::vector_interleave2)
1401 .StartsWith("deinterleave2.", Intrinsic::vector_deinterleave2)
1402 .StartsWith("partial.reduce.add",
1403 Intrinsic::vector_partial_reduce_add)
1406 const auto *FT = F->getFunctionType();
1408 if (ID == Intrinsic::vector_extract ||
1409 ID == Intrinsic::vector_interleave2)
1410 // Extracting overloads the return type.
1411 Tys.push_back(FT->getReturnType());
1412 if (ID != Intrinsic::vector_interleave2)
1413 Tys.push_back(FT->getParamType(0));
1414 if (ID == Intrinsic::vector_insert ||
1415 ID == Intrinsic::vector_partial_reduce_add)
1416 // Inserting overloads the inserted type.
1417 Tys.push_back(FT->getParamType(1));
1418 rename(F);
1419 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID, Tys);
1420 return true;
1421 }
1422
1423 if (Name.consume_front("reduce.")) {
1425 static const Regex R("^([a-z]+)\\.[a-z][0-9]+");
1426 if (R.match(Name, &Groups))
1428 .Case("add", Intrinsic::vector_reduce_add)
1429 .Case("mul", Intrinsic::vector_reduce_mul)
1430 .Case("and", Intrinsic::vector_reduce_and)
1431 .Case("or", Intrinsic::vector_reduce_or)
1432 .Case("xor", Intrinsic::vector_reduce_xor)
1433 .Case("smax", Intrinsic::vector_reduce_smax)
1434 .Case("smin", Intrinsic::vector_reduce_smin)
1435 .Case("umax", Intrinsic::vector_reduce_umax)
1436 .Case("umin", Intrinsic::vector_reduce_umin)
1437 .Case("fmax", Intrinsic::vector_reduce_fmax)
1438 .Case("fmin", Intrinsic::vector_reduce_fmin)
1440
1441 bool V2 = false;
1443 static const Regex R2("^v2\\.([a-z]+)\\.[fi][0-9]+");
1444 Groups.clear();
1445 V2 = true;
1446 if (R2.match(Name, &Groups))
1448 .Case("fadd", Intrinsic::vector_reduce_fadd)
1449 .Case("fmul", Intrinsic::vector_reduce_fmul)
1451 }
1453 rename(F);
1454 auto Args = F->getFunctionType()->params();
1455 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1456 {Args[V2 ? 1 : 0]});
1457 return true;
1458 }
1459 break; // No other 'expermental.vector.reduce.*'.
1460 }
1461
1462 if (Name.consume_front("splice"))
1463 return true;
1464 break; // No other 'experimental.vector.*'.
1465 }
1466 if (Name.consume_front("experimental.stepvector.")) {
1467 Intrinsic::ID ID = Intrinsic::stepvector;
1468 rename(F);
1470 F->getParent(), ID, F->getFunctionType()->getReturnType());
1471 return true;
1472 }
1473 break; // No other 'e*'.
1474 case 'f':
1475 if (Name.starts_with("flt.rounds")) {
1476 rename(F);
1477 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1478 Intrinsic::get_rounding);
1479 return true;
1480 }
1481 break;
1482 case 'i':
1483 if (Name.starts_with("invariant.group.barrier")) {
1484 // Rename invariant.group.barrier to launder.invariant.group
1485 auto Args = F->getFunctionType()->params();
1486 Type* ObjectPtr[1] = {Args[0]};
1487 rename(F);
1489 F->getParent(), Intrinsic::launder_invariant_group, ObjectPtr);
1490 return true;
1491 }
1492 break;
1493 case 'l':
1494 if ((Name.starts_with("lifetime.start") ||
1495 Name.starts_with("lifetime.end")) &&
1496 F->arg_size() == 2) {
1497 Intrinsic::ID IID = Name.starts_with("lifetime.start")
1498 ? Intrinsic::lifetime_start
1499 : Intrinsic::lifetime_end;
1500 rename(F);
1501 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1502 F->getArg(0)->getType());
1503 return true;
1504 }
1505 break;
1506 case 'm': {
1507 // Updating the memory intrinsics (memcpy/memmove/memset) that have an
1508 // alignment parameter to embedding the alignment as an attribute of
1509 // the pointer args.
1510 if (unsigned ID = StringSwitch<unsigned>(Name)
1511 .StartsWith("memcpy.", Intrinsic::memcpy)
1512 .StartsWith("memmove.", Intrinsic::memmove)
1513 .Default(0)) {
1514 if (F->arg_size() == 5) {
1515 rename(F);
1516 // Get the types of dest, src, and len
1517 ArrayRef<Type *> ParamTypes =
1518 F->getFunctionType()->params().slice(0, 3);
1519 NewFn =
1520 Intrinsic::getOrInsertDeclaration(F->getParent(), ID, ParamTypes);
1521 return true;
1522 }
1523 }
1524 if (Name.starts_with("memset.") && F->arg_size() == 5) {
1525 rename(F);
1526 // Get the types of dest, and len
1527 const auto *FT = F->getFunctionType();
1528 Type *ParamTypes[2] = {
1529 FT->getParamType(0), // Dest
1530 FT->getParamType(2) // len
1531 };
1532 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1533 Intrinsic::memset, ParamTypes);
1534 return true;
1535 }
1536
1537 unsigned MaskedID =
1539 .StartsWith("masked.load", Intrinsic::masked_load)
1540 .StartsWith("masked.gather", Intrinsic::masked_gather)
1541 .StartsWith("masked.store", Intrinsic::masked_store)
1542 .StartsWith("masked.scatter", Intrinsic::masked_scatter)
1543 .Default(0);
1544 if (MaskedID && F->arg_size() == 4) {
1545 rename(F);
1546 if (MaskedID == Intrinsic::masked_load ||
1547 MaskedID == Intrinsic::masked_gather) {
1549 F->getParent(), MaskedID,
1550 {F->getReturnType(), F->getArg(0)->getType()});
1551 return true;
1552 }
1554 F->getParent(), MaskedID,
1555 {F->getArg(0)->getType(), F->getArg(1)->getType()});
1556 return true;
1557 }
1558 break;
1559 }
1560 case 'n': {
1561 if (Name.consume_front("nvvm.")) {
1562 // Check for nvvm intrinsics corresponding exactly to an LLVM intrinsic.
1563 if (F->arg_size() == 1) {
1564 Intrinsic::ID IID =
1566 .Cases({"brev32", "brev64"}, Intrinsic::bitreverse)
1567 .Case("clz.i", Intrinsic::ctlz)
1568 .Case("popc.i", Intrinsic::ctpop)
1570 if (IID != Intrinsic::not_intrinsic) {
1571 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1572 {F->getReturnType()});
1573 return true;
1574 }
1575 } else if (F->arg_size() == 2) {
1576 Intrinsic::ID IID =
1578 .Cases({"max.s", "max.i", "max.ll"}, Intrinsic::smax)
1579 .Cases({"min.s", "min.i", "min.ll"}, Intrinsic::smin)
1580 .Cases({"max.us", "max.ui", "max.ull"}, Intrinsic::umax)
1581 .Cases({"min.us", "min.ui", "min.ull"}, Intrinsic::umin)
1583 if (IID != Intrinsic::not_intrinsic) {
1584 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID,
1585 {F->getReturnType()});
1586 return true;
1587 }
1588 }
1589
1590 // Check for nvvm intrinsics that need a return type adjustment.
1591 if (!F->getReturnType()->getScalarType()->isBFloatTy()) {
1593 if (IID != Intrinsic::not_intrinsic) {
1594 NewFn = nullptr;
1595 return true;
1596 }
1597 }
1598
1599 // Upgrade Distributed Shared Memory Intrinsics
1601 if (IID != Intrinsic::not_intrinsic) {
1602 rename(F);
1603 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1604 return true;
1605 }
1606
1607 // Upgrade TMA copy G2S Intrinsics
1609 if (IID != Intrinsic::not_intrinsic) {
1610 rename(F);
1611 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
1612 return true;
1613 }
1614
1615 // The following nvvm intrinsics correspond exactly to an LLVM idiom, but
1616 // not to an intrinsic alone. We expand them in UpgradeIntrinsicCall.
1617 //
1618 // TODO: We could add lohi.i2d.
1619 bool Expand = false;
1620 if (Name.consume_front("abs."))
1621 // nvvm.abs.{i,ii}
1622 Expand =
1623 Name == "i" || Name == "ll" || Name == "bf16" || Name == "bf16x2";
1624 else if (Name.consume_front("fabs."))
1625 // nvvm.fabs.{f,ftz.f,d}
1626 Expand = Name == "f" || Name == "ftz.f" || Name == "d";
1627 else if (Name.consume_front("ex2.approx."))
1628 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
1629 Expand =
1630 Name == "f" || Name == "ftz.f" || Name == "d" || Name == "f16x2";
1631 else if (Name.consume_front("atomic.load."))
1632 // nvvm.atomic.load.add.{f32,f64}.p
1633 // nvvm.atomic.load.{inc,dec}.32.p
1634 Expand = StringSwitch<bool>(Name)
1635 .StartsWith("add.f32.p", true)
1636 .StartsWith("add.f64.p", true)
1637 .StartsWith("inc.32.p", true)
1638 .StartsWith("dec.32.p", true)
1639 .Default(false);
1640 else if (Name.consume_front("bitcast."))
1641 // nvvm.bitcast.{f2i,i2f,ll2d,d2ll}
1642 Expand =
1643 Name == "f2i" || Name == "i2f" || Name == "ll2d" || Name == "d2ll";
1644 else if (Name.consume_front("rotate."))
1645 // nvvm.rotate.{b32,b64,right.b64}
1646 Expand = Name == "b32" || Name == "b64" || Name == "right.b64";
1647 else if (Name.consume_front("ptr.gen.to."))
1648 // nvvm.ptr.gen.to.{local,shared,global,constant,param}
1649 Expand = consumeNVVMPtrAddrSpace(Name);
1650 else if (Name.consume_front("ptr."))
1651 // nvvm.ptr.{local,shared,global,constant,param}.to.gen
1652 Expand = consumeNVVMPtrAddrSpace(Name) && Name.starts_with(".to.gen");
1653 else if (Name.consume_front("ldg.global."))
1654 // nvvm.ldg.global.{i,p,f}
1655 Expand = (Name.starts_with("i.") || Name.starts_with("f.") ||
1656 Name.starts_with("p."));
1657 else
1658 Expand = StringSwitch<bool>(Name)
1659 .Case("barrier0", true)
1660 .Case("barrier.n", true)
1661 .Case("barrier.sync.cnt", true)
1662 .Case("barrier.sync", true)
1663 .Case("barrier", true)
1664 .Case("bar.sync", true)
1665 .Case("barrier0.popc", true)
1666 .Case("barrier0.and", true)
1667 .Case("barrier0.or", true)
1668 .Case("clz.ll", true)
1669 .Case("popc.ll", true)
1670 .Case("h2f", true)
1671 .Case("swap.lo.hi.b64", true)
1672 .Case("tanh.approx.f32", true)
1673 .Default(false);
1674
1675 if (Expand) {
1676 NewFn = nullptr;
1677 return true;
1678 }
1679 break; // No other 'nvvm.*'.
1680 }
1681 break;
1682 }
1683 case 'o':
1684 if (Name.starts_with("objectsize.")) {
1685 Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() };
1686 if (F->arg_size() == 2 || F->arg_size() == 3) {
1687 rename(F);
1688 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(),
1689 Intrinsic::objectsize, Tys);
1690 return true;
1691 }
1692 }
1693 break;
1694
1695 case 'p':
1696 if (Name.starts_with("ptr.annotation.") && F->arg_size() == 4) {
1697 rename(F);
1699 F->getParent(), Intrinsic::ptr_annotation,
1700 {F->arg_begin()->getType(), F->getArg(1)->getType()});
1701 return true;
1702 }
1703 break;
1704
1705 case 'r': {
1706 if (Name.consume_front("riscv.")) {
1709 .Case("aes32dsi", Intrinsic::riscv_aes32dsi)
1710 .Case("aes32dsmi", Intrinsic::riscv_aes32dsmi)
1711 .Case("aes32esi", Intrinsic::riscv_aes32esi)
1712 .Case("aes32esmi", Intrinsic::riscv_aes32esmi)
1715 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32)) {
1716 rename(F);
1717 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1718 return true;
1719 }
1720 break; // No other applicable upgrades.
1721 }
1722
1724 .StartsWith("sm4ks", Intrinsic::riscv_sm4ks)
1725 .StartsWith("sm4ed", Intrinsic::riscv_sm4ed)
1728 if (!F->getFunctionType()->getParamType(2)->isIntegerTy(32) ||
1729 F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1730 rename(F);
1731 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1732 return true;
1733 }
1734 break; // No other applicable upgrades.
1735 }
1736
1738 .StartsWith("sha256sig0", Intrinsic::riscv_sha256sig0)
1739 .StartsWith("sha256sig1", Intrinsic::riscv_sha256sig1)
1740 .StartsWith("sha256sum0", Intrinsic::riscv_sha256sum0)
1741 .StartsWith("sha256sum1", Intrinsic::riscv_sha256sum1)
1742 .StartsWith("sm3p0", Intrinsic::riscv_sm3p0)
1743 .StartsWith("sm3p1", Intrinsic::riscv_sm3p1)
1746 if (F->getFunctionType()->getReturnType()->isIntegerTy(64)) {
1747 rename(F);
1748 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1749 return true;
1750 }
1751 break; // No other applicable upgrades.
1752 }
1753 break; // No other 'riscv.*' intrinsics
1754 }
1755 } break;
1756
1757 case 's':
1758 if (Name == "stackprotectorcheck") {
1759 NewFn = nullptr;
1760 return true;
1761 }
1762 break;
1763
1764 case 't':
1765 if (Name == "thread.pointer") {
1767 F->getParent(), Intrinsic::thread_pointer, F->getReturnType());
1768 return true;
1769 }
1770 break;
1771
1772 case 'v': {
1773 if (Name == "var.annotation" && F->arg_size() == 4) {
1774 rename(F);
1776 F->getParent(), Intrinsic::var_annotation,
1777 {{F->arg_begin()->getType(), F->getArg(1)->getType()}});
1778 return true;
1779 }
1780 if (Name.consume_front("vector.splice")) {
1781 if (Name.starts_with(".left") || Name.starts_with(".right"))
1782 break;
1783 return true;
1784 }
1785 break;
1786 }
1787
1788 case 'w':
1789 if (Name.consume_front("wasm.")) {
1792 .StartsWith("fma.", Intrinsic::wasm_relaxed_madd)
1793 .StartsWith("fms.", Intrinsic::wasm_relaxed_nmadd)
1794 .StartsWith("laneselect.", Intrinsic::wasm_relaxed_laneselect)
1797 rename(F);
1798 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID,
1799 F->getReturnType());
1800 return true;
1801 }
1802
1803 if (Name.consume_front("dot.i8x16.i7x16.")) {
1805 .Case("signed", Intrinsic::wasm_relaxed_dot_i8x16_i7x16_signed)
1806 .Case("add.signed",
1807 Intrinsic::wasm_relaxed_dot_i8x16_i7x16_add_signed)
1810 rename(F);
1811 NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), ID);
1812 return true;
1813 }
1814 break; // No other 'wasm.dot.i8x16.i7x16.*'.
1815 }
1816 break; // No other 'wasm.*'.
1817 }
1818 break;
1819
1820 case 'x':
1821 if (upgradeX86IntrinsicFunction(F, Name, NewFn))
1822 return true;
1823 }
1824
1825 auto *ST = dyn_cast<StructType>(F->getReturnType());
1826 if (ST && (!ST->isLiteral() || ST->isPacked()) &&
1827 F->getIntrinsicID() != Intrinsic::not_intrinsic) {
1828 // Replace return type with literal non-packed struct. Only do this for
1829 // intrinsics declared to return a struct, not for intrinsics with
1830 // overloaded return type, in which case the exact struct type will be
1831 // mangled into the name.
1834 if (Desc.front().Kind == Intrinsic::IITDescriptor::Struct) {
1835 auto *FT = F->getFunctionType();
1836 auto *NewST = StructType::get(ST->getContext(), ST->elements());
1837 auto *NewFT = FunctionType::get(NewST, FT->params(), FT->isVarArg());
1838 std::string Name = F->getName().str();
1839 rename(F);
1840 NewFn = Function::Create(NewFT, F->getLinkage(), F->getAddressSpace(),
1841 Name, F->getParent());
1842
1843 // The new function may also need remangling.
1844 if (auto Result = llvm::Intrinsic::remangleIntrinsicFunction(NewFn))
1845 NewFn = *Result;
1846 return true;
1847 }
1848 }
1849
1850 // Remangle our intrinsic since we upgrade the mangling
1852 if (Result != std::nullopt) {
1853 NewFn = *Result;
1854 return true;
1855 }
1856
1857 // This may not belong here. This function is effectively being overloaded
1858 // to both detect an intrinsic which needs upgrading, and to provide the
1859 // upgraded form of the intrinsic. We should perhaps have two separate
1860 // functions for this.
1861 return false;
1862}
1863
1865 bool CanUpgradeDebugIntrinsicsToRecords) {
1866 NewFn = nullptr;
1867 bool Upgraded =
1868 upgradeIntrinsicFunction1(F, NewFn, CanUpgradeDebugIntrinsicsToRecords);
1869
1870 // Upgrade intrinsic attributes. This does not change the function.
1871 if (NewFn)
1872 F = NewFn;
1873 if (Intrinsic::ID id = F->getIntrinsicID()) {
1874 // Only do this if the intrinsic signature is valid.
1875 SmallVector<Type *> OverloadTys;
1876 if (Intrinsic::getIntrinsicSignature(id, F->getFunctionType(), OverloadTys))
1877 F->setAttributes(
1878 Intrinsic::getAttributes(F->getContext(), id, F->getFunctionType()));
1879 }
1880 return Upgraded;
1881}
1882
1884 if (!(GV->hasName() && (GV->getName() == "llvm.global_ctors" ||
1885 GV->getName() == "llvm.global_dtors")) ||
1886 !GV->hasInitializer())
1887 return nullptr;
1889 if (!ATy)
1890 return nullptr;
1892 if (!STy || STy->getNumElements() != 2)
1893 return nullptr;
1894
1895 LLVMContext &C = GV->getContext();
1896 IRBuilder<> IRB(C);
1897 auto EltTy = StructType::get(STy->getElementType(0), STy->getElementType(1),
1898 IRB.getPtrTy());
1899 Constant *Init = GV->getInitializer();
1900 unsigned N = Init->getNumOperands();
1901 std::vector<Constant *> NewCtors(N);
1902 for (unsigned i = 0; i != N; ++i) {
1903 auto Ctor = cast<Constant>(Init->getOperand(i));
1904 NewCtors[i] = ConstantStruct::get(EltTy, Ctor->getAggregateElement(0u),
1905 Ctor->getAggregateElement(1),
1907 }
1908 Constant *NewInit = ConstantArray::get(ArrayType::get(EltTy, N), NewCtors);
1909
1910 return new GlobalVariable(NewInit->getType(), false, GV->getLinkage(),
1911 NewInit, GV->getName());
1912}
1913
1914// Handles upgrading SSE2/AVX2/AVX512BW PSLLDQ intrinsics by converting them
1915// to byte shuffles.
1917 unsigned Shift) {
1918 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1919 unsigned NumElts = ResultTy->getNumElements() * 8;
1920
1921 // Bitcast from a 64-bit element type to a byte element type.
1922 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1923 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1924
1925 // We'll be shuffling in zeroes.
1926 Value *Res = Constant::getNullValue(VecTy);
1927
1928 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1929 // we'll just return the zero vector.
1930 if (Shift < 16) {
1931 int Idxs[64];
1932 // 256/512-bit version is split into 2/4 16-byte lanes.
1933 for (unsigned l = 0; l != NumElts; l += 16)
1934 for (unsigned i = 0; i != 16; ++i) {
1935 unsigned Idx = NumElts + i - Shift;
1936 if (Idx < NumElts)
1937 Idx -= NumElts - 16; // end of lane, switch operand.
1938 Idxs[l + i] = Idx + l;
1939 }
1940
1941 Res = Builder.CreateShuffleVector(Res, Op, ArrayRef(Idxs, NumElts));
1942 }
1943
1944 // Bitcast back to a 64-bit element type.
1945 return Builder.CreateBitCast(Res, ResultTy, "cast");
1946}
1947
1948// Handles upgrading SSE2/AVX2/AVX512BW PSRLDQ intrinsics by converting them
1949// to byte shuffles.
1951 unsigned Shift) {
1952 auto *ResultTy = cast<FixedVectorType>(Op->getType());
1953 unsigned NumElts = ResultTy->getNumElements() * 8;
1954
1955 // Bitcast from a 64-bit element type to a byte element type.
1956 Type *VecTy = FixedVectorType::get(Builder.getInt8Ty(), NumElts);
1957 Op = Builder.CreateBitCast(Op, VecTy, "cast");
1958
1959 // We'll be shuffling in zeroes.
1960 Value *Res = Constant::getNullValue(VecTy);
1961
1962 // If shift is less than 16, emit a shuffle to move the bytes. Otherwise,
1963 // we'll just return the zero vector.
1964 if (Shift < 16) {
1965 int Idxs[64];
1966 // 256/512-bit version is split into 2/4 16-byte lanes.
1967 for (unsigned l = 0; l != NumElts; l += 16)
1968 for (unsigned i = 0; i != 16; ++i) {
1969 unsigned Idx = i + Shift;
1970 if (Idx >= 16)
1971 Idx += NumElts - 16; // end of lane, switch operand.
1972 Idxs[l + i] = Idx + l;
1973 }
1974
1975 Res = Builder.CreateShuffleVector(Op, Res, ArrayRef(Idxs, NumElts));
1976 }
1977
1978 // Bitcast back to a 64-bit element type.
1979 return Builder.CreateBitCast(Res, ResultTy, "cast");
1980}
1981
1982static Value *getX86MaskVec(IRBuilder<> &Builder, Value *Mask,
1983 unsigned NumElts) {
1984 assert(isPowerOf2_32(NumElts) && "Expected power-of-2 mask elements");
1986 Builder.getInt1Ty(), cast<IntegerType>(Mask->getType())->getBitWidth());
1987 Mask = Builder.CreateBitCast(Mask, MaskTy);
1988
1989 // If we have less than 8 elements (1, 2 or 4), then the starting mask was an
1990 // i8 and we need to extract down to the right number of elements.
1991 if (NumElts <= 4) {
1992 int Indices[4];
1993 for (unsigned i = 0; i != NumElts; ++i)
1994 Indices[i] = i;
1995 Mask = Builder.CreateShuffleVector(Mask, Mask, ArrayRef(Indices, NumElts),
1996 "extract");
1997 }
1998
1999 return Mask;
2000}
2001
2002static Value *emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2003 Value *Op1) {
2004 // If the mask is all ones just emit the first operation.
2005 if (const auto *C = dyn_cast<Constant>(Mask))
2006 if (C->isAllOnesValue())
2007 return Op0;
2008
2009 Mask = getX86MaskVec(Builder, Mask,
2010 cast<FixedVectorType>(Op0->getType())->getNumElements());
2011 return Builder.CreateSelect(Mask, Op0, Op1);
2012}
2013
2014static Value *emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0,
2015 Value *Op1) {
2016 // If the mask is all ones just emit the first operation.
2017 if (const auto *C = dyn_cast<Constant>(Mask))
2018 if (C->isAllOnesValue())
2019 return Op0;
2020
2021 auto *MaskTy = FixedVectorType::get(Builder.getInt1Ty(),
2022 Mask->getType()->getIntegerBitWidth());
2023 Mask = Builder.CreateBitCast(Mask, MaskTy);
2024 Mask = Builder.CreateExtractElement(Mask, (uint64_t)0);
2025 return Builder.CreateSelect(Mask, Op0, Op1);
2026}
2027
2028// Handle autoupgrade for masked PALIGNR and VALIGND/Q intrinsics.
2029// PALIGNR handles large immediates by shifting while VALIGN masks the immediate
2030// so we need to handle both cases. VALIGN also doesn't have 128-bit lanes.
2032 Value *Op1, Value *Shift,
2033 Value *Passthru, Value *Mask,
2034 bool IsVALIGN) {
2035 unsigned ShiftVal = cast<llvm::ConstantInt>(Shift)->getZExtValue();
2036
2037 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2038 assert((IsVALIGN || NumElts % 16 == 0) && "Illegal NumElts for PALIGNR!");
2039 assert((!IsVALIGN || NumElts <= 16) && "NumElts too large for VALIGN!");
2040 assert(isPowerOf2_32(NumElts) && "NumElts not a power of 2!");
2041
2042 // Mask the immediate for VALIGN.
2043 if (IsVALIGN)
2044 ShiftVal &= (NumElts - 1);
2045
2046 // If palignr is shifting the pair of vectors more than the size of two
2047 // lanes, emit zero.
2048 if (ShiftVal >= 32)
2050
2051 // If palignr is shifting the pair of input vectors more than one lane,
2052 // but less than two lanes, convert to shifting in zeroes.
2053 if (ShiftVal > 16) {
2054 ShiftVal -= 16;
2055 Op1 = Op0;
2057 }
2058
2059 int Indices[64];
2060 // 256-bit palignr operates on 128-bit lanes so we need to handle that
2061 for (unsigned l = 0; l < NumElts; l += 16) {
2062 for (unsigned i = 0; i != 16; ++i) {
2063 unsigned Idx = ShiftVal + i;
2064 if (!IsVALIGN && Idx >= 16) // Disable wrap for VALIGN.
2065 Idx += NumElts - 16; // End of lane, switch operand.
2066 Indices[l + i] = Idx + l;
2067 }
2068 }
2069
2070 Value *Align = Builder.CreateShuffleVector(
2071 Op1, Op0, ArrayRef(Indices, NumElts), "palignr");
2072
2073 return emitX86Select(Builder, Mask, Align, Passthru);
2074}
2075
2077 bool ZeroMask, bool IndexForm) {
2078 Type *Ty = CI.getType();
2079 unsigned VecWidth = Ty->getPrimitiveSizeInBits();
2080 unsigned EltWidth = Ty->getScalarSizeInBits();
2081 bool IsFloat = Ty->isFPOrFPVectorTy();
2082 Intrinsic::ID IID;
2083 if (VecWidth == 128 && EltWidth == 32 && IsFloat)
2084 IID = Intrinsic::x86_avx512_vpermi2var_ps_128;
2085 else if (VecWidth == 128 && EltWidth == 32 && !IsFloat)
2086 IID = Intrinsic::x86_avx512_vpermi2var_d_128;
2087 else if (VecWidth == 128 && EltWidth == 64 && IsFloat)
2088 IID = Intrinsic::x86_avx512_vpermi2var_pd_128;
2089 else if (VecWidth == 128 && EltWidth == 64 && !IsFloat)
2090 IID = Intrinsic::x86_avx512_vpermi2var_q_128;
2091 else if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2092 IID = Intrinsic::x86_avx512_vpermi2var_ps_256;
2093 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2094 IID = Intrinsic::x86_avx512_vpermi2var_d_256;
2095 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2096 IID = Intrinsic::x86_avx512_vpermi2var_pd_256;
2097 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2098 IID = Intrinsic::x86_avx512_vpermi2var_q_256;
2099 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2100 IID = Intrinsic::x86_avx512_vpermi2var_ps_512;
2101 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2102 IID = Intrinsic::x86_avx512_vpermi2var_d_512;
2103 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2104 IID = Intrinsic::x86_avx512_vpermi2var_pd_512;
2105 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2106 IID = Intrinsic::x86_avx512_vpermi2var_q_512;
2107 else if (VecWidth == 128 && EltWidth == 16)
2108 IID = Intrinsic::x86_avx512_vpermi2var_hi_128;
2109 else if (VecWidth == 256 && EltWidth == 16)
2110 IID = Intrinsic::x86_avx512_vpermi2var_hi_256;
2111 else if (VecWidth == 512 && EltWidth == 16)
2112 IID = Intrinsic::x86_avx512_vpermi2var_hi_512;
2113 else if (VecWidth == 128 && EltWidth == 8)
2114 IID = Intrinsic::x86_avx512_vpermi2var_qi_128;
2115 else if (VecWidth == 256 && EltWidth == 8)
2116 IID = Intrinsic::x86_avx512_vpermi2var_qi_256;
2117 else if (VecWidth == 512 && EltWidth == 8)
2118 IID = Intrinsic::x86_avx512_vpermi2var_qi_512;
2119 else
2120 llvm_unreachable("Unexpected intrinsic");
2121
2122 Value *Args[] = { CI.getArgOperand(0) , CI.getArgOperand(1),
2123 CI.getArgOperand(2) };
2124
2125 // If this isn't index form we need to swap operand 0 and 1.
2126 if (!IndexForm)
2127 std::swap(Args[0], Args[1]);
2128
2129 Value *V = Builder.CreateIntrinsic(IID, Args);
2130 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(Ty)
2131 : Builder.CreateBitCast(CI.getArgOperand(1),
2132 Ty);
2133 return emitX86Select(Builder, CI.getArgOperand(3), V, PassThru);
2134}
2135
2137 Intrinsic::ID IID) {
2138 Type *Ty = CI.getType();
2139 Value *Op0 = CI.getOperand(0);
2140 Value *Op1 = CI.getOperand(1);
2141 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1});
2142
2143 if (CI.arg_size() == 4) { // For masked intrinsics.
2144 Value *VecSrc = CI.getOperand(2);
2145 Value *Mask = CI.getOperand(3);
2146 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2147 }
2148 return Res;
2149}
2150
2152 bool IsRotateRight) {
2153 Type *Ty = CI.getType();
2154 Value *Src = CI.getArgOperand(0);
2155 Value *Amt = CI.getArgOperand(1);
2156
2157 // Amount may be scalar immediate, in which case create a splat vector.
2158 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2159 // we only care about the lowest log2 bits anyway.
2160 if (Amt->getType() != Ty) {
2161 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2162 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2163 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2164 }
2165
2166 Intrinsic::ID IID = IsRotateRight ? Intrinsic::fshr : Intrinsic::fshl;
2167 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Src, Src, Amt});
2168
2169 if (CI.arg_size() == 4) { // For masked intrinsics.
2170 Value *VecSrc = CI.getOperand(2);
2171 Value *Mask = CI.getOperand(3);
2172 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2173 }
2174 return Res;
2175}
2176
2177static Value *upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm,
2178 bool IsSigned) {
2179 Type *Ty = CI.getType();
2180 Value *LHS = CI.getArgOperand(0);
2181 Value *RHS = CI.getArgOperand(1);
2182
2183 CmpInst::Predicate Pred;
2184 switch (Imm) {
2185 case 0x0:
2186 Pred = IsSigned ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT;
2187 break;
2188 case 0x1:
2189 Pred = IsSigned ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE;
2190 break;
2191 case 0x2:
2192 Pred = IsSigned ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT;
2193 break;
2194 case 0x3:
2195 Pred = IsSigned ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE;
2196 break;
2197 case 0x4:
2198 Pred = ICmpInst::ICMP_EQ;
2199 break;
2200 case 0x5:
2201 Pred = ICmpInst::ICMP_NE;
2202 break;
2203 case 0x6:
2204 return Constant::getNullValue(Ty); // FALSE
2205 case 0x7:
2206 return Constant::getAllOnesValue(Ty); // TRUE
2207 default:
2208 llvm_unreachable("Unknown XOP vpcom/vpcomu predicate");
2209 }
2210
2211 Value *Cmp = Builder.CreateICmp(Pred, LHS, RHS);
2212 Value *Ext = Builder.CreateSExt(Cmp, Ty);
2213 return Ext;
2214}
2215
2217 bool IsShiftRight, bool ZeroMask) {
2218 Type *Ty = CI.getType();
2219 Value *Op0 = CI.getArgOperand(0);
2220 Value *Op1 = CI.getArgOperand(1);
2221 Value *Amt = CI.getArgOperand(2);
2222
2223 if (IsShiftRight)
2224 std::swap(Op0, Op1);
2225
2226 // Amount may be scalar immediate, in which case create a splat vector.
2227 // Funnel shifts amounts are treated as modulo and types are all power-of-2 so
2228 // we only care about the lowest log2 bits anyway.
2229 if (Amt->getType() != Ty) {
2230 unsigned NumElts = cast<FixedVectorType>(Ty)->getNumElements();
2231 Amt = Builder.CreateIntCast(Amt, Ty->getScalarType(), false);
2232 Amt = Builder.CreateVectorSplat(NumElts, Amt);
2233 }
2234
2235 Intrinsic::ID IID = IsShiftRight ? Intrinsic::fshr : Intrinsic::fshl;
2236 Value *Res = Builder.CreateIntrinsic(IID, Ty, {Op0, Op1, Amt});
2237
2238 unsigned NumArgs = CI.arg_size();
2239 if (NumArgs >= 4) { // For masked intrinsics.
2240 Value *VecSrc = NumArgs == 5 ? CI.getArgOperand(3) :
2241 ZeroMask ? ConstantAggregateZero::get(CI.getType()) :
2242 CI.getArgOperand(0);
2243 Value *Mask = CI.getOperand(NumArgs - 1);
2244 Res = emitX86Select(Builder, Mask, Res, VecSrc);
2245 }
2246 return Res;
2247}
2248
2250 Value *Mask, bool Aligned) {
2251 const Align Alignment =
2252 Aligned
2253 ? Align(Data->getType()->getPrimitiveSizeInBits().getFixedValue() / 8)
2254 : Align(1);
2255
2256 // If the mask is all ones just emit a regular store.
2257 if (const auto *C = dyn_cast<Constant>(Mask))
2258 if (C->isAllOnesValue())
2259 return Builder.CreateAlignedStore(Data, Ptr, Alignment);
2260
2261 // Convert the mask from an integer type to a vector of i1.
2262 unsigned NumElts = cast<FixedVectorType>(Data->getType())->getNumElements();
2263 Mask = getX86MaskVec(Builder, Mask, NumElts);
2264 return Builder.CreateMaskedStore(Data, Ptr, Alignment, Mask);
2265}
2266
2268 Value *Passthru, Value *Mask, bool Aligned) {
2269 Type *ValTy = Passthru->getType();
2270 const Align Alignment =
2271 Aligned
2272 ? Align(
2274 8)
2275 : Align(1);
2276
2277 // If the mask is all ones just emit a regular store.
2278 if (const auto *C = dyn_cast<Constant>(Mask))
2279 if (C->isAllOnesValue())
2280 return Builder.CreateAlignedLoad(ValTy, Ptr, Alignment);
2281
2282 // Convert the mask from an integer type to a vector of i1.
2283 unsigned NumElts = cast<FixedVectorType>(ValTy)->getNumElements();
2284 Mask = getX86MaskVec(Builder, Mask, NumElts);
2285 return Builder.CreateMaskedLoad(ValTy, Ptr, Alignment, Mask, Passthru);
2286}
2287
2288static Value *upgradeAbs(IRBuilder<> &Builder, CallBase &CI) {
2289 Type *Ty = CI.getType();
2290 Value *Op0 = CI.getArgOperand(0);
2291 Value *Res = Builder.CreateIntrinsic(Intrinsic::abs, Ty,
2292 {Op0, Builder.getInt1(false)});
2293 if (CI.arg_size() == 3)
2294 Res = emitX86Select(Builder, CI.getArgOperand(2), Res, CI.getArgOperand(1));
2295 return Res;
2296}
2297
2298static Value *upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned) {
2299 Type *Ty = CI.getType();
2300
2301 // Arguments have a vXi32 type so cast to vXi64.
2302 Value *LHS = Builder.CreateBitCast(CI.getArgOperand(0), Ty);
2303 Value *RHS = Builder.CreateBitCast(CI.getArgOperand(1), Ty);
2304
2305 if (IsSigned) {
2306 // Shift left then arithmetic shift right.
2307 Constant *ShiftAmt = ConstantInt::get(Ty, 32);
2308 LHS = Builder.CreateShl(LHS, ShiftAmt);
2309 LHS = Builder.CreateAShr(LHS, ShiftAmt);
2310 RHS = Builder.CreateShl(RHS, ShiftAmt);
2311 RHS = Builder.CreateAShr(RHS, ShiftAmt);
2312 } else {
2313 // Clear the upper bits.
2314 Constant *Mask = ConstantInt::get(Ty, 0xffffffff);
2315 LHS = Builder.CreateAnd(LHS, Mask);
2316 RHS = Builder.CreateAnd(RHS, Mask);
2317 }
2318
2319 Value *Res = Builder.CreateMul(LHS, RHS);
2320
2321 if (CI.arg_size() == 4)
2322 Res = emitX86Select(Builder, CI.getArgOperand(3), Res, CI.getArgOperand(2));
2323
2324 return Res;
2325}
2326
2327// Applying mask on vector of i1's and make sure result is at least 8 bits wide.
2329 Value *Mask) {
2330 unsigned NumElts = cast<FixedVectorType>(Vec->getType())->getNumElements();
2331 if (Mask) {
2332 const auto *C = dyn_cast<Constant>(Mask);
2333 if (!C || !C->isAllOnesValue())
2334 Vec = Builder.CreateAnd(Vec, getX86MaskVec(Builder, Mask, NumElts));
2335 }
2336
2337 if (NumElts < 8) {
2338 int Indices[8];
2339 for (unsigned i = 0; i != NumElts; ++i)
2340 Indices[i] = i;
2341 for (unsigned i = NumElts; i != 8; ++i)
2342 Indices[i] = NumElts + i % NumElts;
2343 Vec = Builder.CreateShuffleVector(Vec,
2345 Indices);
2346 }
2347 return Builder.CreateBitCast(Vec, Builder.getIntNTy(std::max(NumElts, 8U)));
2348}
2349
2351 unsigned CC, bool Signed) {
2352 Value *Op0 = CI.getArgOperand(0);
2353 unsigned NumElts = cast<FixedVectorType>(Op0->getType())->getNumElements();
2354
2355 Value *Cmp;
2356 if (CC == 3) {
2358 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2359 } else if (CC == 7) {
2361 FixedVectorType::get(Builder.getInt1Ty(), NumElts));
2362 } else {
2364 switch (CC) {
2365 default: llvm_unreachable("Unknown condition code");
2366 case 0: Pred = ICmpInst::ICMP_EQ; break;
2367 case 1: Pred = Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT; break;
2368 case 2: Pred = Signed ? ICmpInst::ICMP_SLE : ICmpInst::ICMP_ULE; break;
2369 case 4: Pred = ICmpInst::ICMP_NE; break;
2370 case 5: Pred = Signed ? ICmpInst::ICMP_SGE : ICmpInst::ICMP_UGE; break;
2371 case 6: Pred = Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT; break;
2372 }
2373 Cmp = Builder.CreateICmp(Pred, Op0, CI.getArgOperand(1));
2374 }
2375
2376 Value *Mask = CI.getArgOperand(CI.arg_size() - 1);
2377
2378 return applyX86MaskOn1BitsVec(Builder, Cmp, Mask);
2379}
2380
2381// Replace a masked intrinsic with an older unmasked intrinsic.
2383 Intrinsic::ID IID) {
2384 Value *Rep =
2385 Builder.CreateIntrinsic(IID, {CI.getArgOperand(0), CI.getArgOperand(1)});
2386 return emitX86Select(Builder, CI.getArgOperand(3), Rep, CI.getArgOperand(2));
2387}
2388
2390 Value* A = CI.getArgOperand(0);
2391 Value* B = CI.getArgOperand(1);
2392 Value* Src = CI.getArgOperand(2);
2393 Value* Mask = CI.getArgOperand(3);
2394
2395 Value* AndNode = Builder.CreateAnd(Mask, APInt(8, 1));
2396 Value* Cmp = Builder.CreateIsNotNull(AndNode);
2397 Value* Extract1 = Builder.CreateExtractElement(B, (uint64_t)0);
2398 Value* Extract2 = Builder.CreateExtractElement(Src, (uint64_t)0);
2399 Value* Select = Builder.CreateSelect(Cmp, Extract1, Extract2);
2400 return Builder.CreateInsertElement(A, Select, (uint64_t)0);
2401}
2402
2404 Value* Op = CI.getArgOperand(0);
2405 Type* ReturnOp = CI.getType();
2406 unsigned NumElts = cast<FixedVectorType>(CI.getType())->getNumElements();
2407 Value *Mask = getX86MaskVec(Builder, Op, NumElts);
2408 return Builder.CreateSExt(Mask, ReturnOp, "vpmovm2");
2409}
2410
2411// Replace intrinsic with unmasked version and a select.
2413 CallBase &CI, Value *&Rep) {
2414 Name = Name.substr(12); // Remove avx512.mask.
2415
2416 unsigned VecWidth = CI.getType()->getPrimitiveSizeInBits();
2417 unsigned EltWidth = CI.getType()->getScalarSizeInBits();
2418 Intrinsic::ID IID;
2419 if (Name.starts_with("max.p")) {
2420 if (VecWidth == 128 && EltWidth == 32)
2421 IID = Intrinsic::x86_sse_max_ps;
2422 else if (VecWidth == 128 && EltWidth == 64)
2423 IID = Intrinsic::x86_sse2_max_pd;
2424 else if (VecWidth == 256 && EltWidth == 32)
2425 IID = Intrinsic::x86_avx_max_ps_256;
2426 else if (VecWidth == 256 && EltWidth == 64)
2427 IID = Intrinsic::x86_avx_max_pd_256;
2428 else
2429 llvm_unreachable("Unexpected intrinsic");
2430 } else if (Name.starts_with("min.p")) {
2431 if (VecWidth == 128 && EltWidth == 32)
2432 IID = Intrinsic::x86_sse_min_ps;
2433 else if (VecWidth == 128 && EltWidth == 64)
2434 IID = Intrinsic::x86_sse2_min_pd;
2435 else if (VecWidth == 256 && EltWidth == 32)
2436 IID = Intrinsic::x86_avx_min_ps_256;
2437 else if (VecWidth == 256 && EltWidth == 64)
2438 IID = Intrinsic::x86_avx_min_pd_256;
2439 else
2440 llvm_unreachable("Unexpected intrinsic");
2441 } else if (Name.starts_with("pshuf.b.")) {
2442 if (VecWidth == 128)
2443 IID = Intrinsic::x86_ssse3_pshuf_b_128;
2444 else if (VecWidth == 256)
2445 IID = Intrinsic::x86_avx2_pshuf_b;
2446 else if (VecWidth == 512)
2447 IID = Intrinsic::x86_avx512_pshuf_b_512;
2448 else
2449 llvm_unreachable("Unexpected intrinsic");
2450 } else if (Name.starts_with("pmul.hr.sw.")) {
2451 if (VecWidth == 128)
2452 IID = Intrinsic::x86_ssse3_pmul_hr_sw_128;
2453 else if (VecWidth == 256)
2454 IID = Intrinsic::x86_avx2_pmul_hr_sw;
2455 else if (VecWidth == 512)
2456 IID = Intrinsic::x86_avx512_pmul_hr_sw_512;
2457 else
2458 llvm_unreachable("Unexpected intrinsic");
2459 } else if (Name.starts_with("pmulh.w.")) {
2460 if (VecWidth == 128)
2461 IID = Intrinsic::x86_sse2_pmulh_w;
2462 else if (VecWidth == 256)
2463 IID = Intrinsic::x86_avx2_pmulh_w;
2464 else if (VecWidth == 512)
2465 IID = Intrinsic::x86_avx512_pmulh_w_512;
2466 else
2467 llvm_unreachable("Unexpected intrinsic");
2468 } else if (Name.starts_with("pmulhu.w.")) {
2469 if (VecWidth == 128)
2470 IID = Intrinsic::x86_sse2_pmulhu_w;
2471 else if (VecWidth == 256)
2472 IID = Intrinsic::x86_avx2_pmulhu_w;
2473 else if (VecWidth == 512)
2474 IID = Intrinsic::x86_avx512_pmulhu_w_512;
2475 else
2476 llvm_unreachable("Unexpected intrinsic");
2477 } else if (Name.starts_with("pmaddw.d.")) {
2478 if (VecWidth == 128)
2479 IID = Intrinsic::x86_sse2_pmadd_wd;
2480 else if (VecWidth == 256)
2481 IID = Intrinsic::x86_avx2_pmadd_wd;
2482 else if (VecWidth == 512)
2483 IID = Intrinsic::x86_avx512_pmaddw_d_512;
2484 else
2485 llvm_unreachable("Unexpected intrinsic");
2486 } else if (Name.starts_with("pmaddubs.w.")) {
2487 if (VecWidth == 128)
2488 IID = Intrinsic::x86_ssse3_pmadd_ub_sw_128;
2489 else if (VecWidth == 256)
2490 IID = Intrinsic::x86_avx2_pmadd_ub_sw;
2491 else if (VecWidth == 512)
2492 IID = Intrinsic::x86_avx512_pmaddubs_w_512;
2493 else
2494 llvm_unreachable("Unexpected intrinsic");
2495 } else if (Name.starts_with("packsswb.")) {
2496 if (VecWidth == 128)
2497 IID = Intrinsic::x86_sse2_packsswb_128;
2498 else if (VecWidth == 256)
2499 IID = Intrinsic::x86_avx2_packsswb;
2500 else if (VecWidth == 512)
2501 IID = Intrinsic::x86_avx512_packsswb_512;
2502 else
2503 llvm_unreachable("Unexpected intrinsic");
2504 } else if (Name.starts_with("packssdw.")) {
2505 if (VecWidth == 128)
2506 IID = Intrinsic::x86_sse2_packssdw_128;
2507 else if (VecWidth == 256)
2508 IID = Intrinsic::x86_avx2_packssdw;
2509 else if (VecWidth == 512)
2510 IID = Intrinsic::x86_avx512_packssdw_512;
2511 else
2512 llvm_unreachable("Unexpected intrinsic");
2513 } else if (Name.starts_with("packuswb.")) {
2514 if (VecWidth == 128)
2515 IID = Intrinsic::x86_sse2_packuswb_128;
2516 else if (VecWidth == 256)
2517 IID = Intrinsic::x86_avx2_packuswb;
2518 else if (VecWidth == 512)
2519 IID = Intrinsic::x86_avx512_packuswb_512;
2520 else
2521 llvm_unreachable("Unexpected intrinsic");
2522 } else if (Name.starts_with("packusdw.")) {
2523 if (VecWidth == 128)
2524 IID = Intrinsic::x86_sse41_packusdw;
2525 else if (VecWidth == 256)
2526 IID = Intrinsic::x86_avx2_packusdw;
2527 else if (VecWidth == 512)
2528 IID = Intrinsic::x86_avx512_packusdw_512;
2529 else
2530 llvm_unreachable("Unexpected intrinsic");
2531 } else if (Name.starts_with("vpermilvar.")) {
2532 if (VecWidth == 128 && EltWidth == 32)
2533 IID = Intrinsic::x86_avx_vpermilvar_ps;
2534 else if (VecWidth == 128 && EltWidth == 64)
2535 IID = Intrinsic::x86_avx_vpermilvar_pd;
2536 else if (VecWidth == 256 && EltWidth == 32)
2537 IID = Intrinsic::x86_avx_vpermilvar_ps_256;
2538 else if (VecWidth == 256 && EltWidth == 64)
2539 IID = Intrinsic::x86_avx_vpermilvar_pd_256;
2540 else if (VecWidth == 512 && EltWidth == 32)
2541 IID = Intrinsic::x86_avx512_vpermilvar_ps_512;
2542 else if (VecWidth == 512 && EltWidth == 64)
2543 IID = Intrinsic::x86_avx512_vpermilvar_pd_512;
2544 else
2545 llvm_unreachable("Unexpected intrinsic");
2546 } else if (Name == "cvtpd2dq.256") {
2547 IID = Intrinsic::x86_avx_cvt_pd2dq_256;
2548 } else if (Name == "cvtpd2ps.256") {
2549 IID = Intrinsic::x86_avx_cvt_pd2_ps_256;
2550 } else if (Name == "cvttpd2dq.256") {
2551 IID = Intrinsic::x86_avx_cvtt_pd2dq_256;
2552 } else if (Name == "cvttps2dq.128") {
2553 IID = Intrinsic::x86_sse2_cvttps2dq;
2554 } else if (Name == "cvttps2dq.256") {
2555 IID = Intrinsic::x86_avx_cvtt_ps2dq_256;
2556 } else if (Name.starts_with("permvar.")) {
2557 bool IsFloat = CI.getType()->isFPOrFPVectorTy();
2558 if (VecWidth == 256 && EltWidth == 32 && IsFloat)
2559 IID = Intrinsic::x86_avx2_permps;
2560 else if (VecWidth == 256 && EltWidth == 32 && !IsFloat)
2561 IID = Intrinsic::x86_avx2_permd;
2562 else if (VecWidth == 256 && EltWidth == 64 && IsFloat)
2563 IID = Intrinsic::x86_avx512_permvar_df_256;
2564 else if (VecWidth == 256 && EltWidth == 64 && !IsFloat)
2565 IID = Intrinsic::x86_avx512_permvar_di_256;
2566 else if (VecWidth == 512 && EltWidth == 32 && IsFloat)
2567 IID = Intrinsic::x86_avx512_permvar_sf_512;
2568 else if (VecWidth == 512 && EltWidth == 32 && !IsFloat)
2569 IID = Intrinsic::x86_avx512_permvar_si_512;
2570 else if (VecWidth == 512 && EltWidth == 64 && IsFloat)
2571 IID = Intrinsic::x86_avx512_permvar_df_512;
2572 else if (VecWidth == 512 && EltWidth == 64 && !IsFloat)
2573 IID = Intrinsic::x86_avx512_permvar_di_512;
2574 else if (VecWidth == 128 && EltWidth == 16)
2575 IID = Intrinsic::x86_avx512_permvar_hi_128;
2576 else if (VecWidth == 256 && EltWidth == 16)
2577 IID = Intrinsic::x86_avx512_permvar_hi_256;
2578 else if (VecWidth == 512 && EltWidth == 16)
2579 IID = Intrinsic::x86_avx512_permvar_hi_512;
2580 else if (VecWidth == 128 && EltWidth == 8)
2581 IID = Intrinsic::x86_avx512_permvar_qi_128;
2582 else if (VecWidth == 256 && EltWidth == 8)
2583 IID = Intrinsic::x86_avx512_permvar_qi_256;
2584 else if (VecWidth == 512 && EltWidth == 8)
2585 IID = Intrinsic::x86_avx512_permvar_qi_512;
2586 else
2587 llvm_unreachable("Unexpected intrinsic");
2588 } else if (Name.starts_with("dbpsadbw.")) {
2589 if (VecWidth == 128)
2590 IID = Intrinsic::x86_avx512_dbpsadbw_128;
2591 else if (VecWidth == 256)
2592 IID = Intrinsic::x86_avx512_dbpsadbw_256;
2593 else if (VecWidth == 512)
2594 IID = Intrinsic::x86_avx512_dbpsadbw_512;
2595 else
2596 llvm_unreachable("Unexpected intrinsic");
2597 } else if (Name.starts_with("pmultishift.qb.")) {
2598 if (VecWidth == 128)
2599 IID = Intrinsic::x86_avx512_pmultishift_qb_128;
2600 else if (VecWidth == 256)
2601 IID = Intrinsic::x86_avx512_pmultishift_qb_256;
2602 else if (VecWidth == 512)
2603 IID = Intrinsic::x86_avx512_pmultishift_qb_512;
2604 else
2605 llvm_unreachable("Unexpected intrinsic");
2606 } else if (Name.starts_with("conflict.")) {
2607 if (Name[9] == 'd' && VecWidth == 128)
2608 IID = Intrinsic::x86_avx512_conflict_d_128;
2609 else if (Name[9] == 'd' && VecWidth == 256)
2610 IID = Intrinsic::x86_avx512_conflict_d_256;
2611 else if (Name[9] == 'd' && VecWidth == 512)
2612 IID = Intrinsic::x86_avx512_conflict_d_512;
2613 else if (Name[9] == 'q' && VecWidth == 128)
2614 IID = Intrinsic::x86_avx512_conflict_q_128;
2615 else if (Name[9] == 'q' && VecWidth == 256)
2616 IID = Intrinsic::x86_avx512_conflict_q_256;
2617 else if (Name[9] == 'q' && VecWidth == 512)
2618 IID = Intrinsic::x86_avx512_conflict_q_512;
2619 else
2620 llvm_unreachable("Unexpected intrinsic");
2621 } else if (Name.starts_with("pavg.")) {
2622 if (Name[5] == 'b' && VecWidth == 128)
2623 IID = Intrinsic::x86_sse2_pavg_b;
2624 else if (Name[5] == 'b' && VecWidth == 256)
2625 IID = Intrinsic::x86_avx2_pavg_b;
2626 else if (Name[5] == 'b' && VecWidth == 512)
2627 IID = Intrinsic::x86_avx512_pavg_b_512;
2628 else if (Name[5] == 'w' && VecWidth == 128)
2629 IID = Intrinsic::x86_sse2_pavg_w;
2630 else if (Name[5] == 'w' && VecWidth == 256)
2631 IID = Intrinsic::x86_avx2_pavg_w;
2632 else if (Name[5] == 'w' && VecWidth == 512)
2633 IID = Intrinsic::x86_avx512_pavg_w_512;
2634 else
2635 llvm_unreachable("Unexpected intrinsic");
2636 } else
2637 return false;
2638
2639 SmallVector<Value *, 4> Args(CI.args());
2640 Args.pop_back();
2641 Args.pop_back();
2642 Rep = Builder.CreateIntrinsic(IID, Args);
2643 unsigned NumArgs = CI.arg_size();
2644 Rep = emitX86Select(Builder, CI.getArgOperand(NumArgs - 1), Rep,
2645 CI.getArgOperand(NumArgs - 2));
2646 return true;
2647}
2648
2649/// Upgrade comment in call to inline asm that represents an objc retain release
2650/// marker.
2651void llvm::UpgradeInlineAsmString(std::string *AsmStr) {
2652 size_t Pos;
2653 if (AsmStr->find("mov\tfp") == 0 &&
2654 AsmStr->find("objc_retainAutoreleaseReturnValue") != std::string::npos &&
2655 (Pos = AsmStr->find("# marker")) != std::string::npos) {
2656 AsmStr->replace(Pos, 1, ";");
2657 }
2658}
2659
2661 Function *F, IRBuilder<> &Builder) {
2662 Value *Rep = nullptr;
2663
2664 if (Name == "abs.i" || Name == "abs.ll") {
2665 Value *Arg = CI->getArgOperand(0);
2666 Value *Neg = Builder.CreateNeg(Arg, "neg");
2667 Value *Cmp = Builder.CreateICmpSGE(
2668 Arg, llvm::Constant::getNullValue(Arg->getType()), "abs.cond");
2669 Rep = Builder.CreateSelect(Cmp, Arg, Neg, "abs");
2670 } else if (Name == "abs.bf16" || Name == "abs.bf16x2") {
2671 Type *Ty = (Name == "abs.bf16")
2672 ? Builder.getBFloatTy()
2673 : FixedVectorType::get(Builder.getBFloatTy(), 2);
2674 Value *Arg = Builder.CreateBitCast(CI->getArgOperand(0), Ty);
2675 Value *Abs = Builder.CreateUnaryIntrinsic(Intrinsic::nvvm_fabs, Arg);
2676 Rep = Builder.CreateBitCast(Abs, CI->getType());
2677 } else if (Name == "fabs.f" || Name == "fabs.ftz.f" || Name == "fabs.d") {
2678 Intrinsic::ID IID = (Name == "fabs.ftz.f") ? Intrinsic::nvvm_fabs_ftz
2679 : Intrinsic::nvvm_fabs;
2680 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2681 } else if (Name.consume_front("ex2.approx.")) {
2682 // nvvm.ex2.approx.{f,ftz.f,d,f16x2}
2683 Intrinsic::ID IID = Name.starts_with("ftz") ? Intrinsic::nvvm_ex2_approx_ftz
2684 : Intrinsic::nvvm_ex2_approx;
2685 Rep = Builder.CreateUnaryIntrinsic(IID, CI->getArgOperand(0));
2686 } else if (Name.starts_with("atomic.load.add.f32.p") ||
2687 Name.starts_with("atomic.load.add.f64.p")) {
2688 Value *Ptr = CI->getArgOperand(0);
2689 Value *Val = CI->getArgOperand(1);
2690 Rep = Builder.CreateAtomicRMW(AtomicRMWInst::FAdd, Ptr, Val, MaybeAlign(),
2692 } else if (Name.starts_with("atomic.load.inc.32.p") ||
2693 Name.starts_with("atomic.load.dec.32.p")) {
2694 Value *Ptr = CI->getArgOperand(0);
2695 Value *Val = CI->getArgOperand(1);
2696 auto Op = Name.starts_with("atomic.load.inc") ? AtomicRMWInst::UIncWrap
2698 Rep = Builder.CreateAtomicRMW(Op, Ptr, Val, MaybeAlign(),
2700 } else if (Name == "clz.ll") {
2701 // llvm.nvvm.clz.ll returns an i32, but llvm.ctlz.i64 returns an i64.
2702 Value *Arg = CI->getArgOperand(0);
2703 Value *Ctlz = Builder.CreateIntrinsic(Intrinsic::ctlz, {Arg->getType()},
2704 {Arg, Builder.getFalse()},
2705 /*FMFSource=*/nullptr, "ctlz");
2706 Rep = Builder.CreateTrunc(Ctlz, Builder.getInt32Ty(), "ctlz.trunc");
2707 } else if (Name == "popc.ll") {
2708 // llvm.nvvm.popc.ll returns an i32, but llvm.ctpop.i64 returns an
2709 // i64.
2710 Value *Arg = CI->getArgOperand(0);
2711 Value *Popc = Builder.CreateIntrinsic(Intrinsic::ctpop, {Arg->getType()},
2712 Arg, /*FMFSource=*/nullptr, "ctpop");
2713 Rep = Builder.CreateTrunc(Popc, Builder.getInt32Ty(), "ctpop.trunc");
2714 } else if (Name == "h2f") {
2715 Value *Cast =
2716 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
2717 Rep = Builder.CreateFPExt(Cast, Builder.getFloatTy());
2718 } else if (Name.consume_front("bitcast.") &&
2719 (Name == "f2i" || Name == "i2f" || Name == "ll2d" ||
2720 Name == "d2ll")) {
2721 Rep = Builder.CreateBitCast(CI->getArgOperand(0), CI->getType());
2722 } else if (Name == "rotate.b32") {
2723 Value *Arg = CI->getOperand(0);
2724 Value *ShiftAmt = CI->getOperand(1);
2725 Rep = Builder.CreateIntrinsic(Builder.getInt32Ty(), Intrinsic::fshl,
2726 {Arg, Arg, ShiftAmt});
2727 } else if (Name == "rotate.b64") {
2728 Type *Int64Ty = Builder.getInt64Ty();
2729 Value *Arg = CI->getOperand(0);
2730 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2731 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2732 {Arg, Arg, ZExtShiftAmt});
2733 } else if (Name == "rotate.right.b64") {
2734 Type *Int64Ty = Builder.getInt64Ty();
2735 Value *Arg = CI->getOperand(0);
2736 Value *ZExtShiftAmt = Builder.CreateZExt(CI->getOperand(1), Int64Ty);
2737 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshr,
2738 {Arg, Arg, ZExtShiftAmt});
2739 } else if (Name == "swap.lo.hi.b64") {
2740 Type *Int64Ty = Builder.getInt64Ty();
2741 Value *Arg = CI->getOperand(0);
2742 Rep = Builder.CreateIntrinsic(Int64Ty, Intrinsic::fshl,
2743 {Arg, Arg, Builder.getInt64(32)});
2744 } else if ((Name.consume_front("ptr.gen.to.") &&
2745 consumeNVVMPtrAddrSpace(Name)) ||
2746 (Name.consume_front("ptr.") && consumeNVVMPtrAddrSpace(Name) &&
2747 Name.starts_with(".to.gen"))) {
2748 Rep = Builder.CreateAddrSpaceCast(CI->getArgOperand(0), CI->getType());
2749 } else if (Name.consume_front("ldg.global")) {
2750 Value *Ptr = CI->getArgOperand(0);
2751 Align PtrAlign = cast<ConstantInt>(CI->getArgOperand(1))->getAlignValue();
2752 // Use addrspace(1) for NVPTX ADDRESS_SPACE_GLOBAL
2753 Value *ASC = Builder.CreateAddrSpaceCast(Ptr, Builder.getPtrTy(1));
2754 Instruction *LD = Builder.CreateAlignedLoad(CI->getType(), ASC, PtrAlign);
2755 MDNode *MD = MDNode::get(Builder.getContext(), {});
2756 LD->setMetadata(LLVMContext::MD_invariant_load, MD);
2757 return LD;
2758 } else if (Name == "tanh.approx.f32") {
2759 // nvvm.tanh.approx.f32 -> afn llvm.tanh.f32
2760 FastMathFlags FMF;
2761 FMF.setApproxFunc();
2762 Rep = Builder.CreateUnaryIntrinsic(Intrinsic::tanh, CI->getArgOperand(0),
2763 FMF);
2764 } else if (Name == "barrier0" || Name == "barrier.n" || Name == "bar.sync") {
2765 Value *Arg =
2766 Name.ends_with('0') ? Builder.getInt32(0) : CI->getArgOperand(0);
2767 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_aligned_all,
2768 {}, {Arg});
2769 } else if (Name == "barrier") {
2770 Rep = Builder.CreateIntrinsic(
2771 Intrinsic::nvvm_barrier_cta_sync_aligned_count, {},
2772 {CI->getArgOperand(0), CI->getArgOperand(1)});
2773 } else if (Name == "barrier.sync") {
2774 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_all, {},
2775 {CI->getArgOperand(0)});
2776 } else if (Name == "barrier.sync.cnt") {
2777 Rep = Builder.CreateIntrinsic(Intrinsic::nvvm_barrier_cta_sync_count, {},
2778 {CI->getArgOperand(0), CI->getArgOperand(1)});
2779 } else if (Name == "barrier0.popc" || Name == "barrier0.and" ||
2780 Name == "barrier0.or") {
2781 Value *C = CI->getArgOperand(0);
2782 C = Builder.CreateICmpNE(C, Builder.getInt32(0));
2783
2784 Intrinsic::ID IID =
2786 .Case("barrier0.popc",
2787 Intrinsic::nvvm_barrier_cta_red_popc_aligned_all)
2788 .Case("barrier0.and",
2789 Intrinsic::nvvm_barrier_cta_red_and_aligned_all)
2790 .Case("barrier0.or",
2791 Intrinsic::nvvm_barrier_cta_red_or_aligned_all);
2792 Value *Bar = Builder.CreateIntrinsic(IID, {}, {Builder.getInt32(0), C});
2793 Rep = Builder.CreateZExt(Bar, CI->getType());
2794 } else {
2796 if (IID != Intrinsic::not_intrinsic &&
2797 !F->getReturnType()->getScalarType()->isBFloatTy()) {
2798 rename(F);
2799 Function *NewFn = Intrinsic::getOrInsertDeclaration(F->getParent(), IID);
2801 for (size_t I = 0; I < NewFn->arg_size(); ++I) {
2802 Value *Arg = CI->getArgOperand(I);
2803 Type *OldType = Arg->getType();
2804 Type *NewType = NewFn->getArg(I)->getType();
2805 Args.push_back(
2806 (OldType->isIntegerTy() && NewType->getScalarType()->isBFloatTy())
2807 ? Builder.CreateBitCast(Arg, NewType)
2808 : Arg);
2809 }
2810 Rep = Builder.CreateCall(NewFn, Args);
2811 if (F->getReturnType()->isIntegerTy())
2812 Rep = Builder.CreateBitCast(Rep, F->getReturnType());
2813 }
2814 }
2815
2816 return Rep;
2817}
2818
2820 IRBuilder<> &Builder) {
2821 LLVMContext &C = F->getContext();
2822 Value *Rep = nullptr;
2823
2824 if (Name.starts_with("sse4a.movnt.")) {
2826 Elts.push_back(
2827 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2828 MDNode *Node = MDNode::get(C, Elts);
2829
2830 Value *Arg0 = CI->getArgOperand(0);
2831 Value *Arg1 = CI->getArgOperand(1);
2832
2833 // Nontemporal (unaligned) store of the 0'th element of the float/double
2834 // vector.
2835 Value *Extract =
2836 Builder.CreateExtractElement(Arg1, (uint64_t)0, "extractelement");
2837
2838 StoreInst *SI = Builder.CreateAlignedStore(Extract, Arg0, Align(1));
2839 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2840 } else if (Name.starts_with("avx.movnt.") ||
2841 Name.starts_with("avx512.storent.")) {
2843 Elts.push_back(
2844 ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
2845 MDNode *Node = MDNode::get(C, Elts);
2846
2847 Value *Arg0 = CI->getArgOperand(0);
2848 Value *Arg1 = CI->getArgOperand(1);
2849
2850 StoreInst *SI = Builder.CreateAlignedStore(
2851 Arg1, Arg0,
2853 SI->setMetadata(LLVMContext::MD_nontemporal, Node);
2854 } else if (Name == "sse2.storel.dq") {
2855 Value *Arg0 = CI->getArgOperand(0);
2856 Value *Arg1 = CI->getArgOperand(1);
2857
2858 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
2859 Value *BC0 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
2860 Value *Elt = Builder.CreateExtractElement(BC0, (uint64_t)0);
2861 Builder.CreateAlignedStore(Elt, Arg0, Align(1));
2862 } else if (Name.starts_with("sse.storeu.") ||
2863 Name.starts_with("sse2.storeu.") ||
2864 Name.starts_with("avx.storeu.")) {
2865 Value *Arg0 = CI->getArgOperand(0);
2866 Value *Arg1 = CI->getArgOperand(1);
2867 Builder.CreateAlignedStore(Arg1, Arg0, Align(1));
2868 } else if (Name == "avx512.mask.store.ss") {
2869 Value *Mask = Builder.CreateAnd(CI->getArgOperand(2), Builder.getInt8(1));
2870 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2871 Mask, false);
2872 } else if (Name.starts_with("avx512.mask.store")) {
2873 // "avx512.mask.storeu." or "avx512.mask.store."
2874 bool Aligned = Name[17] != 'u'; // "avx512.mask.storeu".
2875 upgradeMaskedStore(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
2876 CI->getArgOperand(2), Aligned);
2877 } else if (Name.starts_with("sse2.pcmp") || Name.starts_with("avx2.pcmp")) {
2878 // Upgrade packed integer vector compare intrinsics to compare instructions.
2879 // "sse2.pcpmpeq." "sse2.pcmpgt." "avx2.pcmpeq." or "avx2.pcmpgt."
2880 bool CmpEq = Name[9] == 'e';
2881 Rep = Builder.CreateICmp(CmpEq ? ICmpInst::ICMP_EQ : ICmpInst::ICMP_SGT,
2882 CI->getArgOperand(0), CI->getArgOperand(1));
2883 Rep = Builder.CreateSExt(Rep, CI->getType(), "");
2884 } else if (Name.starts_with("avx512.broadcastm")) {
2885 Type *ExtTy = Type::getInt32Ty(C);
2886 if (CI->getOperand(0)->getType()->isIntegerTy(8))
2887 ExtTy = Type::getInt64Ty(C);
2888 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() /
2889 ExtTy->getPrimitiveSizeInBits();
2890 Rep = Builder.CreateZExt(CI->getArgOperand(0), ExtTy);
2891 Rep = Builder.CreateVectorSplat(NumElts, Rep);
2892 } else if (Name == "sse.sqrt.ss" || Name == "sse2.sqrt.sd") {
2893 Value *Vec = CI->getArgOperand(0);
2894 Value *Elt0 = Builder.CreateExtractElement(Vec, (uint64_t)0);
2895 Elt0 = Builder.CreateIntrinsic(Intrinsic::sqrt, Elt0->getType(), Elt0);
2896 Rep = Builder.CreateInsertElement(Vec, Elt0, (uint64_t)0);
2897 } else if (Name.starts_with("avx.sqrt.p") ||
2898 Name.starts_with("sse2.sqrt.p") ||
2899 Name.starts_with("sse.sqrt.p")) {
2900 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2901 {CI->getArgOperand(0)});
2902 } else if (Name.starts_with("avx512.mask.sqrt.p")) {
2903 if (CI->arg_size() == 4 &&
2904 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
2905 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
2906 Intrinsic::ID IID = Name[18] == 's' ? Intrinsic::x86_avx512_sqrt_ps_512
2907 : Intrinsic::x86_avx512_sqrt_pd_512;
2908
2909 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(3)};
2910 Rep = Builder.CreateIntrinsic(IID, Args);
2911 } else {
2912 Rep = Builder.CreateIntrinsic(Intrinsic::sqrt, CI->getType(),
2913 {CI->getArgOperand(0)});
2914 }
2915 Rep =
2916 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2917 } else if (Name.starts_with("avx512.ptestm") ||
2918 Name.starts_with("avx512.ptestnm")) {
2919 Value *Op0 = CI->getArgOperand(0);
2920 Value *Op1 = CI->getArgOperand(1);
2921 Value *Mask = CI->getArgOperand(2);
2922 Rep = Builder.CreateAnd(Op0, Op1);
2923 llvm::Type *Ty = Op0->getType();
2925 ICmpInst::Predicate Pred = Name.starts_with("avx512.ptestm")
2928 Rep = Builder.CreateICmp(Pred, Rep, Zero);
2929 Rep = applyX86MaskOn1BitsVec(Builder, Rep, Mask);
2930 } else if (Name.starts_with("avx512.mask.pbroadcast")) {
2931 unsigned NumElts = cast<FixedVectorType>(CI->getArgOperand(1)->getType())
2932 ->getNumElements();
2933 Rep = Builder.CreateVectorSplat(NumElts, CI->getArgOperand(0));
2934 Rep =
2935 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
2936 } else if (Name.starts_with("avx512.kunpck")) {
2937 unsigned NumElts = CI->getType()->getScalarSizeInBits();
2938 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), NumElts);
2939 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), NumElts);
2940 int Indices[64];
2941 for (unsigned i = 0; i != NumElts; ++i)
2942 Indices[i] = i;
2943
2944 // First extract half of each vector. This gives better codegen than
2945 // doing it in a single shuffle.
2946 LHS = Builder.CreateShuffleVector(LHS, LHS, ArrayRef(Indices, NumElts / 2));
2947 RHS = Builder.CreateShuffleVector(RHS, RHS, ArrayRef(Indices, NumElts / 2));
2948 // Concat the vectors.
2949 // NOTE: Operands have to be swapped to match intrinsic definition.
2950 Rep = Builder.CreateShuffleVector(RHS, LHS, ArrayRef(Indices, NumElts));
2951 Rep = Builder.CreateBitCast(Rep, CI->getType());
2952 } else if (Name == "avx512.kand.w") {
2953 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2954 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2955 Rep = Builder.CreateAnd(LHS, RHS);
2956 Rep = Builder.CreateBitCast(Rep, CI->getType());
2957 } else if (Name == "avx512.kandn.w") {
2958 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2959 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2960 LHS = Builder.CreateNot(LHS);
2961 Rep = Builder.CreateAnd(LHS, RHS);
2962 Rep = Builder.CreateBitCast(Rep, CI->getType());
2963 } else if (Name == "avx512.kor.w") {
2964 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2965 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2966 Rep = Builder.CreateOr(LHS, RHS);
2967 Rep = Builder.CreateBitCast(Rep, CI->getType());
2968 } else if (Name == "avx512.kxor.w") {
2969 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2970 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2971 Rep = Builder.CreateXor(LHS, RHS);
2972 Rep = Builder.CreateBitCast(Rep, CI->getType());
2973 } else if (Name == "avx512.kxnor.w") {
2974 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2975 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2976 LHS = Builder.CreateNot(LHS);
2977 Rep = Builder.CreateXor(LHS, RHS);
2978 Rep = Builder.CreateBitCast(Rep, CI->getType());
2979 } else if (Name == "avx512.knot.w") {
2980 Rep = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2981 Rep = Builder.CreateNot(Rep);
2982 Rep = Builder.CreateBitCast(Rep, CI->getType());
2983 } else if (Name == "avx512.kortestz.w" || Name == "avx512.kortestc.w") {
2984 Value *LHS = getX86MaskVec(Builder, CI->getArgOperand(0), 16);
2985 Value *RHS = getX86MaskVec(Builder, CI->getArgOperand(1), 16);
2986 Rep = Builder.CreateOr(LHS, RHS);
2987 Rep = Builder.CreateBitCast(Rep, Builder.getInt16Ty());
2988 Value *C;
2989 if (Name[14] == 'c')
2990 C = ConstantInt::getAllOnesValue(Builder.getInt16Ty());
2991 else
2992 C = ConstantInt::getNullValue(Builder.getInt16Ty());
2993 Rep = Builder.CreateICmpEQ(Rep, C);
2994 Rep = Builder.CreateZExt(Rep, Builder.getInt32Ty());
2995 } else if (Name == "sse.add.ss" || Name == "sse2.add.sd" ||
2996 Name == "sse.sub.ss" || Name == "sse2.sub.sd" ||
2997 Name == "sse.mul.ss" || Name == "sse2.mul.sd" ||
2998 Name == "sse.div.ss" || Name == "sse2.div.sd") {
2999 Type *I32Ty = Type::getInt32Ty(C);
3000 Value *Elt0 = Builder.CreateExtractElement(CI->getArgOperand(0),
3001 ConstantInt::get(I32Ty, 0));
3002 Value *Elt1 = Builder.CreateExtractElement(CI->getArgOperand(1),
3003 ConstantInt::get(I32Ty, 0));
3004 Value *EltOp;
3005 if (Name.contains(".add."))
3006 EltOp = Builder.CreateFAdd(Elt0, Elt1);
3007 else if (Name.contains(".sub."))
3008 EltOp = Builder.CreateFSub(Elt0, Elt1);
3009 else if (Name.contains(".mul."))
3010 EltOp = Builder.CreateFMul(Elt0, Elt1);
3011 else
3012 EltOp = Builder.CreateFDiv(Elt0, Elt1);
3013 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), EltOp,
3014 ConstantInt::get(I32Ty, 0));
3015 } else if (Name.starts_with("avx512.mask.pcmp")) {
3016 // "avx512.mask.pcmpeq." or "avx512.mask.pcmpgt."
3017 bool CmpEq = Name[16] == 'e';
3018 Rep = upgradeMaskedCompare(Builder, *CI, CmpEq ? 0 : 6, true);
3019 } else if (Name.starts_with("avx512.mask.vpshufbitqmb.")) {
3020 Type *OpTy = CI->getArgOperand(0)->getType();
3021 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3022 Intrinsic::ID IID;
3023 switch (VecWidth) {
3024 default:
3025 llvm_unreachable("Unexpected intrinsic");
3026 case 128:
3027 IID = Intrinsic::x86_avx512_vpshufbitqmb_128;
3028 break;
3029 case 256:
3030 IID = Intrinsic::x86_avx512_vpshufbitqmb_256;
3031 break;
3032 case 512:
3033 IID = Intrinsic::x86_avx512_vpshufbitqmb_512;
3034 break;
3035 }
3036
3037 Rep =
3038 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3039 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3040 } else if (Name.starts_with("avx512.mask.fpclass.p")) {
3041 Type *OpTy = CI->getArgOperand(0)->getType();
3042 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3043 unsigned EltWidth = OpTy->getScalarSizeInBits();
3044 Intrinsic::ID IID;
3045 if (VecWidth == 128 && EltWidth == 32)
3046 IID = Intrinsic::x86_avx512_fpclass_ps_128;
3047 else if (VecWidth == 256 && EltWidth == 32)
3048 IID = Intrinsic::x86_avx512_fpclass_ps_256;
3049 else if (VecWidth == 512 && EltWidth == 32)
3050 IID = Intrinsic::x86_avx512_fpclass_ps_512;
3051 else if (VecWidth == 128 && EltWidth == 64)
3052 IID = Intrinsic::x86_avx512_fpclass_pd_128;
3053 else if (VecWidth == 256 && EltWidth == 64)
3054 IID = Intrinsic::x86_avx512_fpclass_pd_256;
3055 else if (VecWidth == 512 && EltWidth == 64)
3056 IID = Intrinsic::x86_avx512_fpclass_pd_512;
3057 else
3058 llvm_unreachable("Unexpected intrinsic");
3059
3060 Rep =
3061 Builder.CreateIntrinsic(IID, {CI->getOperand(0), CI->getArgOperand(1)});
3062 Rep = applyX86MaskOn1BitsVec(Builder, Rep, CI->getArgOperand(2));
3063 } else if (Name.starts_with("avx512.cmp.p")) {
3064 SmallVector<Value *, 4> Args(CI->args());
3065 Type *OpTy = Args[0]->getType();
3066 unsigned VecWidth = OpTy->getPrimitiveSizeInBits();
3067 unsigned EltWidth = OpTy->getScalarSizeInBits();
3068 Intrinsic::ID IID;
3069 if (VecWidth == 128 && EltWidth == 32)
3070 IID = Intrinsic::x86_avx512_mask_cmp_ps_128;
3071 else if (VecWidth == 256 && EltWidth == 32)
3072 IID = Intrinsic::x86_avx512_mask_cmp_ps_256;
3073 else if (VecWidth == 512 && EltWidth == 32)
3074 IID = Intrinsic::x86_avx512_mask_cmp_ps_512;
3075 else if (VecWidth == 128 && EltWidth == 64)
3076 IID = Intrinsic::x86_avx512_mask_cmp_pd_128;
3077 else if (VecWidth == 256 && EltWidth == 64)
3078 IID = Intrinsic::x86_avx512_mask_cmp_pd_256;
3079 else if (VecWidth == 512 && EltWidth == 64)
3080 IID = Intrinsic::x86_avx512_mask_cmp_pd_512;
3081 else
3082 llvm_unreachable("Unexpected intrinsic");
3083
3085 if (VecWidth == 512)
3086 std::swap(Mask, Args.back());
3087 Args.push_back(Mask);
3088
3089 Rep = Builder.CreateIntrinsic(IID, Args);
3090 } else if (Name.starts_with("avx512.mask.cmp.")) {
3091 // Integer compare intrinsics.
3092 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3093 Rep = upgradeMaskedCompare(Builder, *CI, Imm, true);
3094 } else if (Name.starts_with("avx512.mask.ucmp.")) {
3095 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3096 Rep = upgradeMaskedCompare(Builder, *CI, Imm, false);
3097 } else if (Name.starts_with("avx512.cvtb2mask.") ||
3098 Name.starts_with("avx512.cvtw2mask.") ||
3099 Name.starts_with("avx512.cvtd2mask.") ||
3100 Name.starts_with("avx512.cvtq2mask.")) {
3101 Value *Op = CI->getArgOperand(0);
3102 Value *Zero = llvm::Constant::getNullValue(Op->getType());
3103 Rep = Builder.CreateICmp(ICmpInst::ICMP_SLT, Op, Zero);
3104 Rep = applyX86MaskOn1BitsVec(Builder, Rep, nullptr);
3105 } else if (Name == "ssse3.pabs.b.128" || Name == "ssse3.pabs.w.128" ||
3106 Name == "ssse3.pabs.d.128" || Name.starts_with("avx2.pabs") ||
3107 Name.starts_with("avx512.mask.pabs")) {
3108 Rep = upgradeAbs(Builder, *CI);
3109 } else if (Name == "sse41.pmaxsb" || Name == "sse2.pmaxs.w" ||
3110 Name == "sse41.pmaxsd" || Name.starts_with("avx2.pmaxs") ||
3111 Name.starts_with("avx512.mask.pmaxs")) {
3112 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smax);
3113 } else if (Name == "sse2.pmaxu.b" || Name == "sse41.pmaxuw" ||
3114 Name == "sse41.pmaxud" || Name.starts_with("avx2.pmaxu") ||
3115 Name.starts_with("avx512.mask.pmaxu")) {
3116 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umax);
3117 } else if (Name == "sse41.pminsb" || Name == "sse2.pmins.w" ||
3118 Name == "sse41.pminsd" || Name.starts_with("avx2.pmins") ||
3119 Name.starts_with("avx512.mask.pmins")) {
3120 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::smin);
3121 } else if (Name == "sse2.pminu.b" || Name == "sse41.pminuw" ||
3122 Name == "sse41.pminud" || Name.starts_with("avx2.pminu") ||
3123 Name.starts_with("avx512.mask.pminu")) {
3124 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::umin);
3125 } else if (Name == "sse2.pmulu.dq" || Name == "avx2.pmulu.dq" ||
3126 Name == "avx512.pmulu.dq.512" ||
3127 Name.starts_with("avx512.mask.pmulu.dq.")) {
3128 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ false);
3129 } else if (Name == "sse41.pmuldq" || Name == "avx2.pmul.dq" ||
3130 Name == "avx512.pmul.dq.512" ||
3131 Name.starts_with("avx512.mask.pmul.dq.")) {
3132 Rep = upgradePMULDQ(Builder, *CI, /*Signed*/ true);
3133 } else if (Name == "sse.cvtsi2ss" || Name == "sse2.cvtsi2sd" ||
3134 Name == "sse.cvtsi642ss" || Name == "sse2.cvtsi642sd") {
3135 Rep =
3136 Builder.CreateSIToFP(CI->getArgOperand(1),
3137 cast<VectorType>(CI->getType())->getElementType());
3138 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3139 } else if (Name == "avx512.cvtusi2sd") {
3140 Rep =
3141 Builder.CreateUIToFP(CI->getArgOperand(1),
3142 cast<VectorType>(CI->getType())->getElementType());
3143 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3144 } else if (Name == "sse2.cvtss2sd") {
3145 Rep = Builder.CreateExtractElement(CI->getArgOperand(1), (uint64_t)0);
3146 Rep = Builder.CreateFPExt(
3147 Rep, cast<VectorType>(CI->getType())->getElementType());
3148 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
3149 } else if (Name == "sse2.cvtdq2pd" || Name == "sse2.cvtdq2ps" ||
3150 Name == "avx.cvtdq2.pd.256" || Name == "avx.cvtdq2.ps.256" ||
3151 Name.starts_with("avx512.mask.cvtdq2pd.") ||
3152 Name.starts_with("avx512.mask.cvtudq2pd.") ||
3153 Name.starts_with("avx512.mask.cvtdq2ps.") ||
3154 Name.starts_with("avx512.mask.cvtudq2ps.") ||
3155 Name.starts_with("avx512.mask.cvtqq2pd.") ||
3156 Name.starts_with("avx512.mask.cvtuqq2pd.") ||
3157 Name == "avx512.mask.cvtqq2ps.256" ||
3158 Name == "avx512.mask.cvtqq2ps.512" ||
3159 Name == "avx512.mask.cvtuqq2ps.256" ||
3160 Name == "avx512.mask.cvtuqq2ps.512" || Name == "sse2.cvtps2pd" ||
3161 Name == "avx.cvt.ps2.pd.256" ||
3162 Name == "avx512.mask.cvtps2pd.128" ||
3163 Name == "avx512.mask.cvtps2pd.256") {
3164 auto *DstTy = cast<FixedVectorType>(CI->getType());
3165 Rep = CI->getArgOperand(0);
3166 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3167
3168 unsigned NumDstElts = DstTy->getNumElements();
3169 if (NumDstElts < SrcTy->getNumElements()) {
3170 assert(NumDstElts == 2 && "Unexpected vector size");
3171 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1});
3172 }
3173
3174 bool IsPS2PD = SrcTy->getElementType()->isFloatTy();
3175 bool IsUnsigned = Name.contains("cvtu");
3176 if (IsPS2PD)
3177 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtps2pd");
3178 else if (CI->arg_size() == 4 &&
3179 (!isa<ConstantInt>(CI->getArgOperand(3)) ||
3180 cast<ConstantInt>(CI->getArgOperand(3))->getZExtValue() != 4)) {
3181 Intrinsic::ID IID = IsUnsigned ? Intrinsic::x86_avx512_uitofp_round
3182 : Intrinsic::x86_avx512_sitofp_round;
3183 Rep = Builder.CreateIntrinsic(IID, {DstTy, SrcTy},
3184 {Rep, CI->getArgOperand(3)});
3185 } else {
3186 Rep = IsUnsigned ? Builder.CreateUIToFP(Rep, DstTy, "cvt")
3187 : Builder.CreateSIToFP(Rep, DstTy, "cvt");
3188 }
3189
3190 if (CI->arg_size() >= 3)
3191 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3192 CI->getArgOperand(1));
3193 } else if (Name.starts_with("avx512.mask.vcvtph2ps.") ||
3194 Name.starts_with("vcvtph2ps.")) {
3195 auto *DstTy = cast<FixedVectorType>(CI->getType());
3196 Rep = CI->getArgOperand(0);
3197 auto *SrcTy = cast<FixedVectorType>(Rep->getType());
3198 unsigned NumDstElts = DstTy->getNumElements();
3199 if (NumDstElts != SrcTy->getNumElements()) {
3200 assert(NumDstElts == 4 && "Unexpected vector size");
3201 Rep = Builder.CreateShuffleVector(Rep, Rep, ArrayRef<int>{0, 1, 2, 3});
3202 }
3203 Rep = Builder.CreateBitCast(
3204 Rep, FixedVectorType::get(Type::getHalfTy(C), NumDstElts));
3205 Rep = Builder.CreateFPExt(Rep, DstTy, "cvtph2ps");
3206 if (CI->arg_size() >= 3)
3207 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3208 CI->getArgOperand(1));
3209 } else if (Name.starts_with("avx512.mask.load")) {
3210 // "avx512.mask.loadu." or "avx512.mask.load."
3211 bool Aligned = Name[16] != 'u'; // "avx512.mask.loadu".
3212 Rep = upgradeMaskedLoad(Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3213 CI->getArgOperand(2), Aligned);
3214 } else if (Name.starts_with("avx512.mask.expand.load.")) {
3215 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3216 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3217 ResultTy->getNumElements());
3218
3219 Rep = Builder.CreateIntrinsic(
3220 Intrinsic::masked_expandload, ResultTy,
3221 {CI->getOperand(0), MaskVec, CI->getOperand(1)});
3222 } else if (Name.starts_with("avx512.mask.compress.store.")) {
3223 auto *ResultTy = cast<VectorType>(CI->getArgOperand(1)->getType());
3224 Value *MaskVec =
3225 getX86MaskVec(Builder, CI->getArgOperand(2),
3226 cast<FixedVectorType>(ResultTy)->getNumElements());
3227
3228 Rep = Builder.CreateIntrinsic(
3229 Intrinsic::masked_compressstore, ResultTy,
3230 {CI->getArgOperand(1), CI->getArgOperand(0), MaskVec});
3231 } else if (Name.starts_with("avx512.mask.compress.") ||
3232 Name.starts_with("avx512.mask.expand.")) {
3233 auto *ResultTy = cast<FixedVectorType>(CI->getType());
3234
3235 Value *MaskVec = getX86MaskVec(Builder, CI->getArgOperand(2),
3236 ResultTy->getNumElements());
3237
3238 bool IsCompress = Name[12] == 'c';
3239 Intrinsic::ID IID = IsCompress ? Intrinsic::x86_avx512_mask_compress
3240 : Intrinsic::x86_avx512_mask_expand;
3241 Rep = Builder.CreateIntrinsic(
3242 IID, ResultTy, {CI->getOperand(0), CI->getOperand(1), MaskVec});
3243 } else if (Name.starts_with("xop.vpcom")) {
3244 bool IsSigned;
3245 if (Name.ends_with("ub") || Name.ends_with("uw") || Name.ends_with("ud") ||
3246 Name.ends_with("uq"))
3247 IsSigned = false;
3248 else if (Name.ends_with("b") || Name.ends_with("w") ||
3249 Name.ends_with("d") || Name.ends_with("q"))
3250 IsSigned = true;
3251 else
3252 llvm_unreachable("Unknown suffix");
3253
3254 unsigned Imm;
3255 if (CI->arg_size() == 3) {
3256 Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3257 } else {
3258 Name = Name.substr(9); // strip off "xop.vpcom"
3259 if (Name.starts_with("lt"))
3260 Imm = 0;
3261 else if (Name.starts_with("le"))
3262 Imm = 1;
3263 else if (Name.starts_with("gt"))
3264 Imm = 2;
3265 else if (Name.starts_with("ge"))
3266 Imm = 3;
3267 else if (Name.starts_with("eq"))
3268 Imm = 4;
3269 else if (Name.starts_with("ne"))
3270 Imm = 5;
3271 else if (Name.starts_with("false"))
3272 Imm = 6;
3273 else if (Name.starts_with("true"))
3274 Imm = 7;
3275 else
3276 llvm_unreachable("Unknown condition");
3277 }
3278
3279 Rep = upgradeX86vpcom(Builder, *CI, Imm, IsSigned);
3280 } else if (Name.starts_with("xop.vpcmov")) {
3281 Value *Sel = CI->getArgOperand(2);
3282 Value *NotSel = Builder.CreateNot(Sel);
3283 Value *Sel0 = Builder.CreateAnd(CI->getArgOperand(0), Sel);
3284 Value *Sel1 = Builder.CreateAnd(CI->getArgOperand(1), NotSel);
3285 Rep = Builder.CreateOr(Sel0, Sel1);
3286 } else if (Name.starts_with("xop.vprot") || Name.starts_with("avx512.prol") ||
3287 Name.starts_with("avx512.mask.prol")) {
3288 Rep = upgradeX86Rotate(Builder, *CI, false);
3289 } else if (Name.starts_with("avx512.pror") ||
3290 Name.starts_with("avx512.mask.pror")) {
3291 Rep = upgradeX86Rotate(Builder, *CI, true);
3292 } else if (Name.starts_with("avx512.vpshld.") ||
3293 Name.starts_with("avx512.mask.vpshld") ||
3294 Name.starts_with("avx512.maskz.vpshld")) {
3295 bool ZeroMask = Name[11] == 'z';
3296 Rep = upgradeX86ConcatShift(Builder, *CI, false, ZeroMask);
3297 } else if (Name.starts_with("avx512.vpshrd.") ||
3298 Name.starts_with("avx512.mask.vpshrd") ||
3299 Name.starts_with("avx512.maskz.vpshrd")) {
3300 bool ZeroMask = Name[11] == 'z';
3301 Rep = upgradeX86ConcatShift(Builder, *CI, true, ZeroMask);
3302 } else if (Name == "sse42.crc32.64.8") {
3303 Value *Trunc0 =
3304 Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C));
3305 Rep = Builder.CreateIntrinsic(Intrinsic::x86_sse42_crc32_32_8,
3306 {Trunc0, CI->getArgOperand(1)});
3307 Rep = Builder.CreateZExt(Rep, CI->getType(), "");
3308 } else if (Name.starts_with("avx.vbroadcast.s") ||
3309 Name.starts_with("avx512.vbroadcast.s")) {
3310 // Replace broadcasts with a series of insertelements.
3311 auto *VecTy = cast<FixedVectorType>(CI->getType());
3312 Type *EltTy = VecTy->getElementType();
3313 unsigned EltNum = VecTy->getNumElements();
3314 Value *Load = Builder.CreateLoad(EltTy, CI->getArgOperand(0));
3315 Type *I32Ty = Type::getInt32Ty(C);
3316 Rep = PoisonValue::get(VecTy);
3317 for (unsigned I = 0; I < EltNum; ++I)
3318 Rep = Builder.CreateInsertElement(Rep, Load, ConstantInt::get(I32Ty, I));
3319 } else if (Name.starts_with("sse41.pmovsx") ||
3320 Name.starts_with("sse41.pmovzx") ||
3321 Name.starts_with("avx2.pmovsx") ||
3322 Name.starts_with("avx2.pmovzx") ||
3323 Name.starts_with("avx512.mask.pmovsx") ||
3324 Name.starts_with("avx512.mask.pmovzx")) {
3325 auto *DstTy = cast<FixedVectorType>(CI->getType());
3326 unsigned NumDstElts = DstTy->getNumElements();
3327
3328 // Extract a subvector of the first NumDstElts lanes and sign/zero extend.
3329 SmallVector<int, 8> ShuffleMask(NumDstElts);
3330 for (unsigned i = 0; i != NumDstElts; ++i)
3331 ShuffleMask[i] = i;
3332
3333 Value *SV = Builder.CreateShuffleVector(CI->getArgOperand(0), ShuffleMask);
3334
3335 bool DoSext = Name.contains("pmovsx");
3336 Rep =
3337 DoSext ? Builder.CreateSExt(SV, DstTy) : Builder.CreateZExt(SV, DstTy);
3338 // If there are 3 arguments, it's a masked intrinsic so we need a select.
3339 if (CI->arg_size() == 3)
3340 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3341 CI->getArgOperand(1));
3342 } else if (Name == "avx512.mask.pmov.qd.256" ||
3343 Name == "avx512.mask.pmov.qd.512" ||
3344 Name == "avx512.mask.pmov.wb.256" ||
3345 Name == "avx512.mask.pmov.wb.512") {
3346 Type *Ty = CI->getArgOperand(1)->getType();
3347 Rep = Builder.CreateTrunc(CI->getArgOperand(0), Ty);
3348 Rep =
3349 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3350 } else if (Name.starts_with("avx.vbroadcastf128") ||
3351 Name == "avx2.vbroadcasti128") {
3352 // Replace vbroadcastf128/vbroadcasti128 with a vector load+shuffle.
3353 Type *EltTy = cast<VectorType>(CI->getType())->getElementType();
3354 unsigned NumSrcElts = 128 / EltTy->getPrimitiveSizeInBits();
3355 auto *VT = FixedVectorType::get(EltTy, NumSrcElts);
3356 Value *Load = Builder.CreateAlignedLoad(VT, CI->getArgOperand(0), Align(1));
3357 if (NumSrcElts == 2)
3358 Rep = Builder.CreateShuffleVector(Load, ArrayRef<int>{0, 1, 0, 1});
3359 else
3360 Rep = Builder.CreateShuffleVector(Load,
3361 ArrayRef<int>{0, 1, 2, 3, 0, 1, 2, 3});
3362 } else if (Name.starts_with("avx512.mask.shuf.i") ||
3363 Name.starts_with("avx512.mask.shuf.f")) {
3364 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3365 Type *VT = CI->getType();
3366 unsigned NumLanes = VT->getPrimitiveSizeInBits() / 128;
3367 unsigned NumElementsInLane = 128 / VT->getScalarSizeInBits();
3368 unsigned ControlBitsMask = NumLanes - 1;
3369 unsigned NumControlBits = NumLanes / 2;
3370 SmallVector<int, 8> ShuffleMask(0);
3371
3372 for (unsigned l = 0; l != NumLanes; ++l) {
3373 unsigned LaneMask = (Imm >> (l * NumControlBits)) & ControlBitsMask;
3374 // We actually need the other source.
3375 if (l >= NumLanes / 2)
3376 LaneMask += NumLanes;
3377 for (unsigned i = 0; i != NumElementsInLane; ++i)
3378 ShuffleMask.push_back(LaneMask * NumElementsInLane + i);
3379 }
3380 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3381 CI->getArgOperand(1), ShuffleMask);
3382 Rep =
3383 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3384 } else if (Name.starts_with("avx512.mask.broadcastf") ||
3385 Name.starts_with("avx512.mask.broadcasti")) {
3386 unsigned NumSrcElts = cast<FixedVectorType>(CI->getArgOperand(0)->getType())
3387 ->getNumElements();
3388 unsigned NumDstElts =
3389 cast<FixedVectorType>(CI->getType())->getNumElements();
3390
3391 SmallVector<int, 8> ShuffleMask(NumDstElts);
3392 for (unsigned i = 0; i != NumDstElts; ++i)
3393 ShuffleMask[i] = i % NumSrcElts;
3394
3395 Rep = Builder.CreateShuffleVector(CI->getArgOperand(0),
3396 CI->getArgOperand(0), ShuffleMask);
3397 Rep =
3398 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3399 } else if (Name.starts_with("avx2.pbroadcast") ||
3400 Name.starts_with("avx2.vbroadcast") ||
3401 Name.starts_with("avx512.pbroadcast") ||
3402 Name.starts_with("avx512.mask.broadcast.s")) {
3403 // Replace vp?broadcasts with a vector shuffle.
3404 Value *Op = CI->getArgOperand(0);
3405 ElementCount EC = cast<VectorType>(CI->getType())->getElementCount();
3406 Type *MaskTy = VectorType::get(Type::getInt32Ty(C), EC);
3409 Rep = Builder.CreateShuffleVector(Op, M);
3410
3411 if (CI->arg_size() == 3)
3412 Rep = emitX86Select(Builder, CI->getArgOperand(2), Rep,
3413 CI->getArgOperand(1));
3414 } else if (Name.starts_with("sse2.padds.") ||
3415 Name.starts_with("avx2.padds.") ||
3416 Name.starts_with("avx512.padds.") ||
3417 Name.starts_with("avx512.mask.padds.")) {
3418 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::sadd_sat);
3419 } else if (Name.starts_with("sse2.psubs.") ||
3420 Name.starts_with("avx2.psubs.") ||
3421 Name.starts_with("avx512.psubs.") ||
3422 Name.starts_with("avx512.mask.psubs.")) {
3423 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::ssub_sat);
3424 } else if (Name.starts_with("sse2.paddus.") ||
3425 Name.starts_with("avx2.paddus.") ||
3426 Name.starts_with("avx512.mask.paddus.")) {
3427 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::uadd_sat);
3428 } else if (Name.starts_with("sse2.psubus.") ||
3429 Name.starts_with("avx2.psubus.") ||
3430 Name.starts_with("avx512.mask.psubus.")) {
3431 Rep = upgradeX86BinaryIntrinsics(Builder, *CI, Intrinsic::usub_sat);
3432 } else if (Name.starts_with("avx512.mask.palignr.")) {
3433 Rep = upgradeX86ALIGNIntrinsics(Builder, CI->getArgOperand(0),
3434 CI->getArgOperand(1), CI->getArgOperand(2),
3435 CI->getArgOperand(3), CI->getArgOperand(4),
3436 false);
3437 } else if (Name.starts_with("avx512.mask.valign.")) {
3439 Builder, CI->getArgOperand(0), CI->getArgOperand(1),
3440 CI->getArgOperand(2), CI->getArgOperand(3), CI->getArgOperand(4), true);
3441 } else if (Name == "sse2.psll.dq" || Name == "avx2.psll.dq") {
3442 // 128/256-bit shift left specified in bits.
3443 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3444 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0),
3445 Shift / 8); // Shift is in bits.
3446 } else if (Name == "sse2.psrl.dq" || Name == "avx2.psrl.dq") {
3447 // 128/256-bit shift right specified in bits.
3448 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3449 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0),
3450 Shift / 8); // Shift is in bits.
3451 } else if (Name == "sse2.psll.dq.bs" || Name == "avx2.psll.dq.bs" ||
3452 Name == "avx512.psll.dq.512") {
3453 // 128/256/512-bit shift left specified in bytes.
3454 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3455 Rep = upgradeX86PSLLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3456 } else if (Name == "sse2.psrl.dq.bs" || Name == "avx2.psrl.dq.bs" ||
3457 Name == "avx512.psrl.dq.512") {
3458 // 128/256/512-bit shift right specified in bytes.
3459 unsigned Shift = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3460 Rep = upgradeX86PSRLDQIntrinsics(Builder, CI->getArgOperand(0), Shift);
3461 } else if (Name == "sse41.pblendw" || Name.starts_with("sse41.blendp") ||
3462 Name.starts_with("avx.blend.p") || Name == "avx2.pblendw" ||
3463 Name.starts_with("avx2.pblendd.")) {
3464 Value *Op0 = CI->getArgOperand(0);
3465 Value *Op1 = CI->getArgOperand(1);
3466 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3467 auto *VecTy = cast<FixedVectorType>(CI->getType());
3468 unsigned NumElts = VecTy->getNumElements();
3469
3470 SmallVector<int, 16> Idxs(NumElts);
3471 for (unsigned i = 0; i != NumElts; ++i)
3472 Idxs[i] = ((Imm >> (i % 8)) & 1) ? i + NumElts : i;
3473
3474 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3475 } else if (Name.starts_with("avx.vinsertf128.") ||
3476 Name == "avx2.vinserti128" ||
3477 Name.starts_with("avx512.mask.insert")) {
3478 Value *Op0 = CI->getArgOperand(0);
3479 Value *Op1 = CI->getArgOperand(1);
3480 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3481 unsigned DstNumElts =
3482 cast<FixedVectorType>(CI->getType())->getNumElements();
3483 unsigned SrcNumElts =
3484 cast<FixedVectorType>(Op1->getType())->getNumElements();
3485 unsigned Scale = DstNumElts / SrcNumElts;
3486
3487 // Mask off the high bits of the immediate value; hardware ignores those.
3488 Imm = Imm % Scale;
3489
3490 // Extend the second operand into a vector the size of the destination.
3491 SmallVector<int, 8> Idxs(DstNumElts);
3492 for (unsigned i = 0; i != SrcNumElts; ++i)
3493 Idxs[i] = i;
3494 for (unsigned i = SrcNumElts; i != DstNumElts; ++i)
3495 Idxs[i] = SrcNumElts;
3496 Rep = Builder.CreateShuffleVector(Op1, Idxs);
3497
3498 // Insert the second operand into the first operand.
3499
3500 // Note that there is no guarantee that instruction lowering will actually
3501 // produce a vinsertf128 instruction for the created shuffles. In
3502 // particular, the 0 immediate case involves no lane changes, so it can
3503 // be handled as a blend.
3504
3505 // Example of shuffle mask for 32-bit elements:
3506 // Imm = 1 <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11>
3507 // Imm = 0 <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7 >
3508
3509 // First fill with identify mask.
3510 for (unsigned i = 0; i != DstNumElts; ++i)
3511 Idxs[i] = i;
3512 // Then replace the elements where we need to insert.
3513 for (unsigned i = 0; i != SrcNumElts; ++i)
3514 Idxs[i + Imm * SrcNumElts] = i + DstNumElts;
3515 Rep = Builder.CreateShuffleVector(Op0, Rep, Idxs);
3516
3517 // If the intrinsic has a mask operand, handle that.
3518 if (CI->arg_size() == 5)
3519 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep,
3520 CI->getArgOperand(3));
3521 } else if (Name.starts_with("avx.vextractf128.") ||
3522 Name == "avx2.vextracti128" ||
3523 Name.starts_with("avx512.mask.vextract")) {
3524 Value *Op0 = CI->getArgOperand(0);
3525 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3526 unsigned DstNumElts =
3527 cast<FixedVectorType>(CI->getType())->getNumElements();
3528 unsigned SrcNumElts =
3529 cast<FixedVectorType>(Op0->getType())->getNumElements();
3530 unsigned Scale = SrcNumElts / DstNumElts;
3531
3532 // Mask off the high bits of the immediate value; hardware ignores those.
3533 Imm = Imm % Scale;
3534
3535 // Get indexes for the subvector of the input vector.
3536 SmallVector<int, 8> Idxs(DstNumElts);
3537 for (unsigned i = 0; i != DstNumElts; ++i) {
3538 Idxs[i] = i + (Imm * DstNumElts);
3539 }
3540 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3541
3542 // If the intrinsic has a mask operand, handle that.
3543 if (CI->arg_size() == 4)
3544 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3545 CI->getArgOperand(2));
3546 } else if (Name.starts_with("avx512.mask.perm.df.") ||
3547 Name.starts_with("avx512.mask.perm.di.")) {
3548 Value *Op0 = CI->getArgOperand(0);
3549 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3550 auto *VecTy = cast<FixedVectorType>(CI->getType());
3551 unsigned NumElts = VecTy->getNumElements();
3552
3553 SmallVector<int, 8> Idxs(NumElts);
3554 for (unsigned i = 0; i != NumElts; ++i)
3555 Idxs[i] = (i & ~0x3) + ((Imm >> (2 * (i & 0x3))) & 3);
3556
3557 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3558
3559 if (CI->arg_size() == 4)
3560 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3561 CI->getArgOperand(2));
3562 } else if (Name.starts_with("avx.vperm2f128.") || Name == "avx2.vperm2i128") {
3563 // The immediate permute control byte looks like this:
3564 // [1:0] - select 128 bits from sources for low half of destination
3565 // [2] - ignore
3566 // [3] - zero low half of destination
3567 // [5:4] - select 128 bits from sources for high half of destination
3568 // [6] - ignore
3569 // [7] - zero high half of destination
3570
3571 uint8_t Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3572
3573 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3574 unsigned HalfSize = NumElts / 2;
3575 SmallVector<int, 8> ShuffleMask(NumElts);
3576
3577 // Determine which operand(s) are actually in use for this instruction.
3578 Value *V0 = (Imm & 0x02) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3579 Value *V1 = (Imm & 0x20) ? CI->getArgOperand(1) : CI->getArgOperand(0);
3580
3581 // If needed, replace operands based on zero mask.
3582 V0 = (Imm & 0x08) ? ConstantAggregateZero::get(CI->getType()) : V0;
3583 V1 = (Imm & 0x80) ? ConstantAggregateZero::get(CI->getType()) : V1;
3584
3585 // Permute low half of result.
3586 unsigned StartIndex = (Imm & 0x01) ? HalfSize : 0;
3587 for (unsigned i = 0; i < HalfSize; ++i)
3588 ShuffleMask[i] = StartIndex + i;
3589
3590 // Permute high half of result.
3591 StartIndex = (Imm & 0x10) ? HalfSize : 0;
3592 for (unsigned i = 0; i < HalfSize; ++i)
3593 ShuffleMask[i + HalfSize] = NumElts + StartIndex + i;
3594
3595 Rep = Builder.CreateShuffleVector(V0, V1, ShuffleMask);
3596
3597 } else if (Name.starts_with("avx.vpermil.") || Name == "sse2.pshuf.d" ||
3598 Name.starts_with("avx512.mask.vpermil.p") ||
3599 Name.starts_with("avx512.mask.pshuf.d.")) {
3600 Value *Op0 = CI->getArgOperand(0);
3601 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3602 auto *VecTy = cast<FixedVectorType>(CI->getType());
3603 unsigned NumElts = VecTy->getNumElements();
3604 // Calculate the size of each index in the immediate.
3605 unsigned IdxSize = 64 / VecTy->getScalarSizeInBits();
3606 unsigned IdxMask = ((1 << IdxSize) - 1);
3607
3608 SmallVector<int, 8> Idxs(NumElts);
3609 // Lookup the bits for this element, wrapping around the immediate every
3610 // 8-bits. Elements are grouped into sets of 2 or 4 elements so we need
3611 // to offset by the first index of each group.
3612 for (unsigned i = 0; i != NumElts; ++i)
3613 Idxs[i] = ((Imm >> ((i * IdxSize) % 8)) & IdxMask) | (i & ~IdxMask);
3614
3615 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3616
3617 if (CI->arg_size() == 4)
3618 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3619 CI->getArgOperand(2));
3620 } else if (Name == "sse2.pshufl.w" ||
3621 Name.starts_with("avx512.mask.pshufl.w.")) {
3622 Value *Op0 = CI->getArgOperand(0);
3623 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3624 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3625
3626 SmallVector<int, 16> Idxs(NumElts);
3627 for (unsigned l = 0; l != NumElts; l += 8) {
3628 for (unsigned i = 0; i != 4; ++i)
3629 Idxs[i + l] = ((Imm >> (2 * i)) & 0x3) + l;
3630 for (unsigned i = 4; i != 8; ++i)
3631 Idxs[i + l] = i + l;
3632 }
3633
3634 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3635
3636 if (CI->arg_size() == 4)
3637 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3638 CI->getArgOperand(2));
3639 } else if (Name == "sse2.pshufh.w" ||
3640 Name.starts_with("avx512.mask.pshufh.w.")) {
3641 Value *Op0 = CI->getArgOperand(0);
3642 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
3643 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3644
3645 SmallVector<int, 16> Idxs(NumElts);
3646 for (unsigned l = 0; l != NumElts; l += 8) {
3647 for (unsigned i = 0; i != 4; ++i)
3648 Idxs[i + l] = i + l;
3649 for (unsigned i = 0; i != 4; ++i)
3650 Idxs[i + l + 4] = ((Imm >> (2 * i)) & 0x3) + 4 + l;
3651 }
3652
3653 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3654
3655 if (CI->arg_size() == 4)
3656 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep,
3657 CI->getArgOperand(2));
3658 } else if (Name.starts_with("avx512.mask.shuf.p")) {
3659 Value *Op0 = CI->getArgOperand(0);
3660 Value *Op1 = CI->getArgOperand(1);
3661 unsigned Imm = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
3662 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3663
3664 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3665 unsigned HalfLaneElts = NumLaneElts / 2;
3666
3667 SmallVector<int, 16> Idxs(NumElts);
3668 for (unsigned i = 0; i != NumElts; ++i) {
3669 // Base index is the starting element of the lane.
3670 Idxs[i] = i - (i % NumLaneElts);
3671 // If we are half way through the lane switch to the other source.
3672 if ((i % NumLaneElts) >= HalfLaneElts)
3673 Idxs[i] += NumElts;
3674 // Now select the specific element. By adding HalfLaneElts bits from
3675 // the immediate. Wrapping around the immediate every 8-bits.
3676 Idxs[i] += (Imm >> ((i * HalfLaneElts) % 8)) & ((1 << HalfLaneElts) - 1);
3677 }
3678
3679 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3680
3681 Rep =
3682 emitX86Select(Builder, CI->getArgOperand(4), Rep, CI->getArgOperand(3));
3683 } else if (Name.starts_with("avx512.mask.movddup") ||
3684 Name.starts_with("avx512.mask.movshdup") ||
3685 Name.starts_with("avx512.mask.movsldup")) {
3686 Value *Op0 = CI->getArgOperand(0);
3687 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3688 unsigned NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3689
3690 unsigned Offset = 0;
3691 if (Name.starts_with("avx512.mask.movshdup."))
3692 Offset = 1;
3693
3694 SmallVector<int, 16> Idxs(NumElts);
3695 for (unsigned l = 0; l != NumElts; l += NumLaneElts)
3696 for (unsigned i = 0; i != NumLaneElts; i += 2) {
3697 Idxs[i + l + 0] = i + l + Offset;
3698 Idxs[i + l + 1] = i + l + Offset;
3699 }
3700
3701 Rep = Builder.CreateShuffleVector(Op0, Op0, Idxs);
3702
3703 Rep =
3704 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3705 } else if (Name.starts_with("avx512.mask.punpckl") ||
3706 Name.starts_with("avx512.mask.unpckl.")) {
3707 Value *Op0 = CI->getArgOperand(0);
3708 Value *Op1 = CI->getArgOperand(1);
3709 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3710 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3711
3712 SmallVector<int, 64> Idxs(NumElts);
3713 for (int l = 0; l != NumElts; l += NumLaneElts)
3714 for (int i = 0; i != NumLaneElts; ++i)
3715 Idxs[i + l] = l + (i / 2) + NumElts * (i % 2);
3716
3717 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3718
3719 Rep =
3720 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3721 } else if (Name.starts_with("avx512.mask.punpckh") ||
3722 Name.starts_with("avx512.mask.unpckh.")) {
3723 Value *Op0 = CI->getArgOperand(0);
3724 Value *Op1 = CI->getArgOperand(1);
3725 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
3726 int NumLaneElts = 128 / CI->getType()->getScalarSizeInBits();
3727
3728 SmallVector<int, 64> Idxs(NumElts);
3729 for (int l = 0; l != NumElts; l += NumLaneElts)
3730 for (int i = 0; i != NumLaneElts; ++i)
3731 Idxs[i + l] = (NumLaneElts / 2) + l + (i / 2) + NumElts * (i % 2);
3732
3733 Rep = Builder.CreateShuffleVector(Op0, Op1, Idxs);
3734
3735 Rep =
3736 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3737 } else if (Name.starts_with("avx512.mask.and.") ||
3738 Name.starts_with("avx512.mask.pand.")) {
3739 VectorType *FTy = cast<VectorType>(CI->getType());
3741 Rep = Builder.CreateAnd(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3742 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3743 Rep = Builder.CreateBitCast(Rep, FTy);
3744 Rep =
3745 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3746 } else if (Name.starts_with("avx512.mask.andn.") ||
3747 Name.starts_with("avx512.mask.pandn.")) {
3748 VectorType *FTy = cast<VectorType>(CI->getType());
3750 Rep = Builder.CreateNot(Builder.CreateBitCast(CI->getArgOperand(0), ITy));
3751 Rep = Builder.CreateAnd(Rep,
3752 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3753 Rep = Builder.CreateBitCast(Rep, FTy);
3754 Rep =
3755 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3756 } else if (Name.starts_with("avx512.mask.or.") ||
3757 Name.starts_with("avx512.mask.por.")) {
3758 VectorType *FTy = cast<VectorType>(CI->getType());
3760 Rep = Builder.CreateOr(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3761 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3762 Rep = Builder.CreateBitCast(Rep, FTy);
3763 Rep =
3764 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3765 } else if (Name.starts_with("avx512.mask.xor.") ||
3766 Name.starts_with("avx512.mask.pxor.")) {
3767 VectorType *FTy = cast<VectorType>(CI->getType());
3769 Rep = Builder.CreateXor(Builder.CreateBitCast(CI->getArgOperand(0), ITy),
3770 Builder.CreateBitCast(CI->getArgOperand(1), ITy));
3771 Rep = Builder.CreateBitCast(Rep, FTy);
3772 Rep =
3773 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3774 } else if (Name.starts_with("avx512.mask.padd.")) {
3775 Rep = Builder.CreateAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3776 Rep =
3777 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3778 } else if (Name.starts_with("avx512.mask.psub.")) {
3779 Rep = Builder.CreateSub(CI->getArgOperand(0), CI->getArgOperand(1));
3780 Rep =
3781 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3782 } else if (Name.starts_with("avx512.mask.pmull.")) {
3783 Rep = Builder.CreateMul(CI->getArgOperand(0), CI->getArgOperand(1));
3784 Rep =
3785 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3786 } else if (Name.starts_with("avx512.mask.add.p")) {
3787 if (Name.ends_with(".512")) {
3788 Intrinsic::ID IID;
3789 if (Name[17] == 's')
3790 IID = Intrinsic::x86_avx512_add_ps_512;
3791 else
3792 IID = Intrinsic::x86_avx512_add_pd_512;
3793
3794 Rep = Builder.CreateIntrinsic(
3795 IID,
3796 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3797 } else {
3798 Rep = Builder.CreateFAdd(CI->getArgOperand(0), CI->getArgOperand(1));
3799 }
3800 Rep =
3801 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3802 } else if (Name.starts_with("avx512.mask.div.p")) {
3803 if (Name.ends_with(".512")) {
3804 Intrinsic::ID IID;
3805 if (Name[17] == 's')
3806 IID = Intrinsic::x86_avx512_div_ps_512;
3807 else
3808 IID = Intrinsic::x86_avx512_div_pd_512;
3809
3810 Rep = Builder.CreateIntrinsic(
3811 IID,
3812 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3813 } else {
3814 Rep = Builder.CreateFDiv(CI->getArgOperand(0), CI->getArgOperand(1));
3815 }
3816 Rep =
3817 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3818 } else if (Name.starts_with("avx512.mask.mul.p")) {
3819 if (Name.ends_with(".512")) {
3820 Intrinsic::ID IID;
3821 if (Name[17] == 's')
3822 IID = Intrinsic::x86_avx512_mul_ps_512;
3823 else
3824 IID = Intrinsic::x86_avx512_mul_pd_512;
3825
3826 Rep = Builder.CreateIntrinsic(
3827 IID,
3828 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3829 } else {
3830 Rep = Builder.CreateFMul(CI->getArgOperand(0), CI->getArgOperand(1));
3831 }
3832 Rep =
3833 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3834 } else if (Name.starts_with("avx512.mask.sub.p")) {
3835 if (Name.ends_with(".512")) {
3836 Intrinsic::ID IID;
3837 if (Name[17] == 's')
3838 IID = Intrinsic::x86_avx512_sub_ps_512;
3839 else
3840 IID = Intrinsic::x86_avx512_sub_pd_512;
3841
3842 Rep = Builder.CreateIntrinsic(
3843 IID,
3844 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3845 } else {
3846 Rep = Builder.CreateFSub(CI->getArgOperand(0), CI->getArgOperand(1));
3847 }
3848 Rep =
3849 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3850 } else if ((Name.starts_with("avx512.mask.max.p") ||
3851 Name.starts_with("avx512.mask.min.p")) &&
3852 Name.drop_front(18) == ".512") {
3853 bool IsDouble = Name[17] == 'd';
3854 bool IsMin = Name[13] == 'i';
3855 static const Intrinsic::ID MinMaxTbl[2][2] = {
3856 {Intrinsic::x86_avx512_max_ps_512, Intrinsic::x86_avx512_max_pd_512},
3857 {Intrinsic::x86_avx512_min_ps_512, Intrinsic::x86_avx512_min_pd_512}};
3858 Intrinsic::ID IID = MinMaxTbl[IsMin][IsDouble];
3859
3860 Rep = Builder.CreateIntrinsic(
3861 IID,
3862 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(4)});
3863 Rep =
3864 emitX86Select(Builder, CI->getArgOperand(3), Rep, CI->getArgOperand(2));
3865 } else if (Name.starts_with("avx512.mask.lzcnt.")) {
3866 Rep =
3867 Builder.CreateIntrinsic(Intrinsic::ctlz, CI->getType(),
3868 {CI->getArgOperand(0), Builder.getInt1(false)});
3869 Rep =
3870 emitX86Select(Builder, CI->getArgOperand(2), Rep, CI->getArgOperand(1));
3871 } else if (Name.starts_with("avx512.mask.psll")) {
3872 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3873 bool IsVariable = Name[16] == 'v';
3874 char Size = Name[16] == '.' ? Name[17]
3875 : Name[17] == '.' ? Name[18]
3876 : Name[18] == '.' ? Name[19]
3877 : Name[20];
3878
3879 Intrinsic::ID IID;
3880 if (IsVariable && Name[17] != '.') {
3881 if (Size == 'd' && Name[17] == '2') // avx512.mask.psllv2.di
3882 IID = Intrinsic::x86_avx2_psllv_q;
3883 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psllv4.di
3884 IID = Intrinsic::x86_avx2_psllv_q_256;
3885 else if (Size == 's' && Name[17] == '4') // avx512.mask.psllv4.si
3886 IID = Intrinsic::x86_avx2_psllv_d;
3887 else if (Size == 's' && Name[17] == '8') // avx512.mask.psllv8.si
3888 IID = Intrinsic::x86_avx2_psllv_d_256;
3889 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psllv8.hi
3890 IID = Intrinsic::x86_avx512_psllv_w_128;
3891 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psllv16.hi
3892 IID = Intrinsic::x86_avx512_psllv_w_256;
3893 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psllv32hi
3894 IID = Intrinsic::x86_avx512_psllv_w_512;
3895 else
3896 llvm_unreachable("Unexpected size");
3897 } else if (Name.ends_with(".128")) {
3898 if (Size == 'd') // avx512.mask.psll.d.128, avx512.mask.psll.di.128
3899 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_d
3900 : Intrinsic::x86_sse2_psll_d;
3901 else if (Size == 'q') // avx512.mask.psll.q.128, avx512.mask.psll.qi.128
3902 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_q
3903 : Intrinsic::x86_sse2_psll_q;
3904 else if (Size == 'w') // avx512.mask.psll.w.128, avx512.mask.psll.wi.128
3905 IID = IsImmediate ? Intrinsic::x86_sse2_pslli_w
3906 : Intrinsic::x86_sse2_psll_w;
3907 else
3908 llvm_unreachable("Unexpected size");
3909 } else if (Name.ends_with(".256")) {
3910 if (Size == 'd') // avx512.mask.psll.d.256, avx512.mask.psll.di.256
3911 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_d
3912 : Intrinsic::x86_avx2_psll_d;
3913 else if (Size == 'q') // avx512.mask.psll.q.256, avx512.mask.psll.qi.256
3914 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_q
3915 : Intrinsic::x86_avx2_psll_q;
3916 else if (Size == 'w') // avx512.mask.psll.w.256, avx512.mask.psll.wi.256
3917 IID = IsImmediate ? Intrinsic::x86_avx2_pslli_w
3918 : Intrinsic::x86_avx2_psll_w;
3919 else
3920 llvm_unreachable("Unexpected size");
3921 } else {
3922 if (Size == 'd') // psll.di.512, pslli.d, psll.d, psllv.d.512
3923 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_d_512
3924 : IsVariable ? Intrinsic::x86_avx512_psllv_d_512
3925 : Intrinsic::x86_avx512_psll_d_512;
3926 else if (Size == 'q') // psll.qi.512, pslli.q, psll.q, psllv.q.512
3927 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_q_512
3928 : IsVariable ? Intrinsic::x86_avx512_psllv_q_512
3929 : Intrinsic::x86_avx512_psll_q_512;
3930 else if (Size == 'w') // psll.wi.512, pslli.w, psll.w
3931 IID = IsImmediate ? Intrinsic::x86_avx512_pslli_w_512
3932 : Intrinsic::x86_avx512_psll_w_512;
3933 else
3934 llvm_unreachable("Unexpected size");
3935 }
3936
3937 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
3938 } else if (Name.starts_with("avx512.mask.psrl")) {
3939 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
3940 bool IsVariable = Name[16] == 'v';
3941 char Size = Name[16] == '.' ? Name[17]
3942 : Name[17] == '.' ? Name[18]
3943 : Name[18] == '.' ? Name[19]
3944 : Name[20];
3945
3946 Intrinsic::ID IID;
3947 if (IsVariable && Name[17] != '.') {
3948 if (Size == 'd' && Name[17] == '2') // avx512.mask.psrlv2.di
3949 IID = Intrinsic::x86_avx2_psrlv_q;
3950 else if (Size == 'd' && Name[17] == '4') // avx512.mask.psrlv4.di
3951 IID = Intrinsic::x86_avx2_psrlv_q_256;
3952 else if (Size == 's' && Name[17] == '4') // avx512.mask.psrlv4.si
3953 IID = Intrinsic::x86_avx2_psrlv_d;
3954 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrlv8.si
3955 IID = Intrinsic::x86_avx2_psrlv_d_256;
3956 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrlv8.hi
3957 IID = Intrinsic::x86_avx512_psrlv_w_128;
3958 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrlv16.hi
3959 IID = Intrinsic::x86_avx512_psrlv_w_256;
3960 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrlv32hi
3961 IID = Intrinsic::x86_avx512_psrlv_w_512;
3962 else
3963 llvm_unreachable("Unexpected size");
3964 } else if (Name.ends_with(".128")) {
3965 if (Size == 'd') // avx512.mask.psrl.d.128, avx512.mask.psrl.di.128
3966 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_d
3967 : Intrinsic::x86_sse2_psrl_d;
3968 else if (Size == 'q') // avx512.mask.psrl.q.128, avx512.mask.psrl.qi.128
3969 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_q
3970 : Intrinsic::x86_sse2_psrl_q;
3971 else if (Size == 'w') // avx512.mask.psrl.w.128, avx512.mask.psrl.wi.128
3972 IID = IsImmediate ? Intrinsic::x86_sse2_psrli_w
3973 : Intrinsic::x86_sse2_psrl_w;
3974 else
3975 llvm_unreachable("Unexpected size");
3976 } else if (Name.ends_with(".256")) {
3977 if (Size == 'd') // avx512.mask.psrl.d.256, avx512.mask.psrl.di.256
3978 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_d
3979 : Intrinsic::x86_avx2_psrl_d;
3980 else if (Size == 'q') // avx512.mask.psrl.q.256, avx512.mask.psrl.qi.256
3981 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_q
3982 : Intrinsic::x86_avx2_psrl_q;
3983 else if (Size == 'w') // avx512.mask.psrl.w.256, avx512.mask.psrl.wi.256
3984 IID = IsImmediate ? Intrinsic::x86_avx2_psrli_w
3985 : Intrinsic::x86_avx2_psrl_w;
3986 else
3987 llvm_unreachable("Unexpected size");
3988 } else {
3989 if (Size == 'd') // psrl.di.512, psrli.d, psrl.d, psrl.d.512
3990 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_d_512
3991 : IsVariable ? Intrinsic::x86_avx512_psrlv_d_512
3992 : Intrinsic::x86_avx512_psrl_d_512;
3993 else if (Size == 'q') // psrl.qi.512, psrli.q, psrl.q, psrl.q.512
3994 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_q_512
3995 : IsVariable ? Intrinsic::x86_avx512_psrlv_q_512
3996 : Intrinsic::x86_avx512_psrl_q_512;
3997 else if (Size == 'w') // psrl.wi.512, psrli.w, psrl.w)
3998 IID = IsImmediate ? Intrinsic::x86_avx512_psrli_w_512
3999 : Intrinsic::x86_avx512_psrl_w_512;
4000 else
4001 llvm_unreachable("Unexpected size");
4002 }
4003
4004 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4005 } else if (Name.starts_with("avx512.mask.psra")) {
4006 bool IsImmediate = Name[16] == 'i' || (Name.size() > 18 && Name[18] == 'i');
4007 bool IsVariable = Name[16] == 'v';
4008 char Size = Name[16] == '.' ? Name[17]
4009 : Name[17] == '.' ? Name[18]
4010 : Name[18] == '.' ? Name[19]
4011 : Name[20];
4012
4013 Intrinsic::ID IID;
4014 if (IsVariable && Name[17] != '.') {
4015 if (Size == 's' && Name[17] == '4') // avx512.mask.psrav4.si
4016 IID = Intrinsic::x86_avx2_psrav_d;
4017 else if (Size == 's' && Name[17] == '8') // avx512.mask.psrav8.si
4018 IID = Intrinsic::x86_avx2_psrav_d_256;
4019 else if (Size == 'h' && Name[17] == '8') // avx512.mask.psrav8.hi
4020 IID = Intrinsic::x86_avx512_psrav_w_128;
4021 else if (Size == 'h' && Name[17] == '1') // avx512.mask.psrav16.hi
4022 IID = Intrinsic::x86_avx512_psrav_w_256;
4023 else if (Name[17] == '3' && Name[18] == '2') // avx512.mask.psrav32hi
4024 IID = Intrinsic::x86_avx512_psrav_w_512;
4025 else
4026 llvm_unreachable("Unexpected size");
4027 } else if (Name.ends_with(".128")) {
4028 if (Size == 'd') // avx512.mask.psra.d.128, avx512.mask.psra.di.128
4029 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_d
4030 : Intrinsic::x86_sse2_psra_d;
4031 else if (Size == 'q') // avx512.mask.psra.q.128, avx512.mask.psra.qi.128
4032 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_128
4033 : IsVariable ? Intrinsic::x86_avx512_psrav_q_128
4034 : Intrinsic::x86_avx512_psra_q_128;
4035 else if (Size == 'w') // avx512.mask.psra.w.128, avx512.mask.psra.wi.128
4036 IID = IsImmediate ? Intrinsic::x86_sse2_psrai_w
4037 : Intrinsic::x86_sse2_psra_w;
4038 else
4039 llvm_unreachable("Unexpected size");
4040 } else if (Name.ends_with(".256")) {
4041 if (Size == 'd') // avx512.mask.psra.d.256, avx512.mask.psra.di.256
4042 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_d
4043 : Intrinsic::x86_avx2_psra_d;
4044 else if (Size == 'q') // avx512.mask.psra.q.256, avx512.mask.psra.qi.256
4045 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_256
4046 : IsVariable ? Intrinsic::x86_avx512_psrav_q_256
4047 : Intrinsic::x86_avx512_psra_q_256;
4048 else if (Size == 'w') // avx512.mask.psra.w.256, avx512.mask.psra.wi.256
4049 IID = IsImmediate ? Intrinsic::x86_avx2_psrai_w
4050 : Intrinsic::x86_avx2_psra_w;
4051 else
4052 llvm_unreachable("Unexpected size");
4053 } else {
4054 if (Size == 'd') // psra.di.512, psrai.d, psra.d, psrav.d.512
4055 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_d_512
4056 : IsVariable ? Intrinsic::x86_avx512_psrav_d_512
4057 : Intrinsic::x86_avx512_psra_d_512;
4058 else if (Size == 'q') // psra.qi.512, psrai.q, psra.q
4059 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_q_512
4060 : IsVariable ? Intrinsic::x86_avx512_psrav_q_512
4061 : Intrinsic::x86_avx512_psra_q_512;
4062 else if (Size == 'w') // psra.wi.512, psrai.w, psra.w
4063 IID = IsImmediate ? Intrinsic::x86_avx512_psrai_w_512
4064 : Intrinsic::x86_avx512_psra_w_512;
4065 else
4066 llvm_unreachable("Unexpected size");
4067 }
4068
4069 Rep = upgradeX86MaskedShift(Builder, *CI, IID);
4070 } else if (Name.starts_with("avx512.mask.move.s")) {
4071 Rep = upgradeMaskedMove(Builder, *CI);
4072 } else if (Name.starts_with("avx512.cvtmask2")) {
4073 Rep = upgradeMaskToInt(Builder, *CI);
4074 } else if (Name.ends_with(".movntdqa")) {
4076 C, ConstantAsMetadata::get(ConstantInt::get(Type::getInt32Ty(C), 1)));
4077
4078 LoadInst *LI = Builder.CreateAlignedLoad(
4079 CI->getType(), CI->getArgOperand(0),
4081 LI->setMetadata(LLVMContext::MD_nontemporal, Node);
4082 Rep = LI;
4083 } else if (Name.starts_with("fma.vfmadd.") ||
4084 Name.starts_with("fma.vfmsub.") ||
4085 Name.starts_with("fma.vfnmadd.") ||
4086 Name.starts_with("fma.vfnmsub.")) {
4087 bool NegMul = Name[6] == 'n';
4088 bool NegAcc = NegMul ? Name[8] == 's' : Name[7] == 's';
4089 bool IsScalar = NegMul ? Name[12] == 's' : Name[11] == 's';
4090
4091 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4092 CI->getArgOperand(2)};
4093
4094 if (IsScalar) {
4095 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4096 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4097 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4098 }
4099
4100 if (NegMul && !IsScalar)
4101 Ops[0] = Builder.CreateFNeg(Ops[0]);
4102 if (NegMul && IsScalar)
4103 Ops[1] = Builder.CreateFNeg(Ops[1]);
4104 if (NegAcc)
4105 Ops[2] = Builder.CreateFNeg(Ops[2]);
4106
4107 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4108
4109 if (IsScalar)
4110 Rep = Builder.CreateInsertElement(CI->getArgOperand(0), Rep, (uint64_t)0);
4111 } else if (Name.starts_with("fma4.vfmadd.s")) {
4112 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4113 CI->getArgOperand(2)};
4114
4115 Ops[0] = Builder.CreateExtractElement(Ops[0], (uint64_t)0);
4116 Ops[1] = Builder.CreateExtractElement(Ops[1], (uint64_t)0);
4117 Ops[2] = Builder.CreateExtractElement(Ops[2], (uint64_t)0);
4118
4119 Rep = Builder.CreateIntrinsic(Intrinsic::fma, Ops[0]->getType(), Ops);
4120
4121 Rep = Builder.CreateInsertElement(Constant::getNullValue(CI->getType()),
4122 Rep, (uint64_t)0);
4123 } else if (Name.starts_with("avx512.mask.vfmadd.s") ||
4124 Name.starts_with("avx512.maskz.vfmadd.s") ||
4125 Name.starts_with("avx512.mask3.vfmadd.s") ||
4126 Name.starts_with("avx512.mask3.vfmsub.s") ||
4127 Name.starts_with("avx512.mask3.vfnmsub.s")) {
4128 bool IsMask3 = Name[11] == '3';
4129 bool IsMaskZ = Name[11] == 'z';
4130 // Drop the "avx512.mask." to make it easier.
4131 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4132 bool NegMul = Name[2] == 'n';
4133 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4134
4135 Value *A = CI->getArgOperand(0);
4136 Value *B = CI->getArgOperand(1);
4137 Value *C = CI->getArgOperand(2);
4138
4139 if (NegMul && (IsMask3 || IsMaskZ))
4140 A = Builder.CreateFNeg(A);
4141 if (NegMul && !(IsMask3 || IsMaskZ))
4142 B = Builder.CreateFNeg(B);
4143 if (NegAcc)
4144 C = Builder.CreateFNeg(C);
4145
4146 A = Builder.CreateExtractElement(A, (uint64_t)0);
4147 B = Builder.CreateExtractElement(B, (uint64_t)0);
4148 C = Builder.CreateExtractElement(C, (uint64_t)0);
4149
4150 if (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4151 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4) {
4152 Value *Ops[] = {A, B, C, CI->getArgOperand(4)};
4153
4154 Intrinsic::ID IID;
4155 if (Name.back() == 'd')
4156 IID = Intrinsic::x86_avx512_vfmadd_f64;
4157 else
4158 IID = Intrinsic::x86_avx512_vfmadd_f32;
4159 Rep = Builder.CreateIntrinsic(IID, Ops);
4160 } else {
4161 Rep = Builder.CreateFMA(A, B, C);
4162 }
4163
4164 Value *PassThru = IsMaskZ ? Constant::getNullValue(Rep->getType())
4165 : IsMask3 ? C
4166 : A;
4167
4168 // For Mask3 with NegAcc, we need to create a new extractelement that
4169 // avoids the negation above.
4170 if (NegAcc && IsMask3)
4171 PassThru =
4172 Builder.CreateExtractElement(CI->getArgOperand(2), (uint64_t)0);
4173
4174 Rep = emitX86ScalarSelect(Builder, CI->getArgOperand(3), Rep, PassThru);
4175 Rep = Builder.CreateInsertElement(CI->getArgOperand(IsMask3 ? 2 : 0), Rep,
4176 (uint64_t)0);
4177 } else if (Name.starts_with("avx512.mask.vfmadd.p") ||
4178 Name.starts_with("avx512.mask.vfnmadd.p") ||
4179 Name.starts_with("avx512.mask.vfnmsub.p") ||
4180 Name.starts_with("avx512.mask3.vfmadd.p") ||
4181 Name.starts_with("avx512.mask3.vfmsub.p") ||
4182 Name.starts_with("avx512.mask3.vfnmsub.p") ||
4183 Name.starts_with("avx512.maskz.vfmadd.p")) {
4184 bool IsMask3 = Name[11] == '3';
4185 bool IsMaskZ = Name[11] == 'z';
4186 // Drop the "avx512.mask." to make it easier.
4187 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4188 bool NegMul = Name[2] == 'n';
4189 bool NegAcc = NegMul ? Name[4] == 's' : Name[3] == 's';
4190
4191 Value *A = CI->getArgOperand(0);
4192 Value *B = CI->getArgOperand(1);
4193 Value *C = CI->getArgOperand(2);
4194
4195 if (NegMul && (IsMask3 || IsMaskZ))
4196 A = Builder.CreateFNeg(A);
4197 if (NegMul && !(IsMask3 || IsMaskZ))
4198 B = Builder.CreateFNeg(B);
4199 if (NegAcc)
4200 C = Builder.CreateFNeg(C);
4201
4202 if (CI->arg_size() == 5 &&
4203 (!isa<ConstantInt>(CI->getArgOperand(4)) ||
4204 cast<ConstantInt>(CI->getArgOperand(4))->getZExtValue() != 4)) {
4205 Intrinsic::ID IID;
4206 // Check the character before ".512" in string.
4207 if (Name[Name.size() - 5] == 's')
4208 IID = Intrinsic::x86_avx512_vfmadd_ps_512;
4209 else
4210 IID = Intrinsic::x86_avx512_vfmadd_pd_512;
4211
4212 Rep = Builder.CreateIntrinsic(IID, {A, B, C, CI->getArgOperand(4)});
4213 } else {
4214 Rep = Builder.CreateFMA(A, B, C);
4215 }
4216
4217 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4218 : IsMask3 ? CI->getArgOperand(2)
4219 : CI->getArgOperand(0);
4220
4221 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4222 } else if (Name.starts_with("fma.vfmsubadd.p")) {
4223 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4224 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4225 Intrinsic::ID IID;
4226 if (VecWidth == 128 && EltWidth == 32)
4227 IID = Intrinsic::x86_fma_vfmaddsub_ps;
4228 else if (VecWidth == 256 && EltWidth == 32)
4229 IID = Intrinsic::x86_fma_vfmaddsub_ps_256;
4230 else if (VecWidth == 128 && EltWidth == 64)
4231 IID = Intrinsic::x86_fma_vfmaddsub_pd;
4232 else if (VecWidth == 256 && EltWidth == 64)
4233 IID = Intrinsic::x86_fma_vfmaddsub_pd_256;
4234 else
4235 llvm_unreachable("Unexpected intrinsic");
4236
4237 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4238 CI->getArgOperand(2)};
4239 Ops[2] = Builder.CreateFNeg(Ops[2]);
4240 Rep = Builder.CreateIntrinsic(IID, Ops);
4241 } else if (Name.starts_with("avx512.mask.vfmaddsub.p") ||
4242 Name.starts_with("avx512.mask3.vfmaddsub.p") ||
4243 Name.starts_with("avx512.maskz.vfmaddsub.p") ||
4244 Name.starts_with("avx512.mask3.vfmsubadd.p")) {
4245 bool IsMask3 = Name[11] == '3';
4246 bool IsMaskZ = Name[11] == 'z';
4247 // Drop the "avx512.mask." to make it easier.
4248 Name = Name.drop_front(IsMask3 || IsMaskZ ? 13 : 12);
4249 bool IsSubAdd = Name[3] == 's';
4250 if (CI->arg_size() == 5) {
4251 Intrinsic::ID IID;
4252 // Check the character before ".512" in string.
4253 if (Name[Name.size() - 5] == 's')
4254 IID = Intrinsic::x86_avx512_vfmaddsub_ps_512;
4255 else
4256 IID = Intrinsic::x86_avx512_vfmaddsub_pd_512;
4257
4258 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4259 CI->getArgOperand(2), CI->getArgOperand(4)};
4260 if (IsSubAdd)
4261 Ops[2] = Builder.CreateFNeg(Ops[2]);
4262
4263 Rep = Builder.CreateIntrinsic(IID, Ops);
4264 } else {
4265 int NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
4266
4267 Value *Ops[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4268 CI->getArgOperand(2)};
4269
4271 CI->getModule(), Intrinsic::fma, Ops[0]->getType());
4272 Value *Odd = Builder.CreateCall(FMA, Ops);
4273 Ops[2] = Builder.CreateFNeg(Ops[2]);
4274 Value *Even = Builder.CreateCall(FMA, Ops);
4275
4276 if (IsSubAdd)
4277 std::swap(Even, Odd);
4278
4279 SmallVector<int, 32> Idxs(NumElts);
4280 for (int i = 0; i != NumElts; ++i)
4281 Idxs[i] = i + (i % 2) * NumElts;
4282
4283 Rep = Builder.CreateShuffleVector(Even, Odd, Idxs);
4284 }
4285
4286 Value *PassThru = IsMaskZ ? llvm::Constant::getNullValue(CI->getType())
4287 : IsMask3 ? CI->getArgOperand(2)
4288 : CI->getArgOperand(0);
4289
4290 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4291 } else if (Name.starts_with("avx512.mask.pternlog.") ||
4292 Name.starts_with("avx512.maskz.pternlog.")) {
4293 bool ZeroMask = Name[11] == 'z';
4294 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4295 unsigned EltWidth = CI->getType()->getScalarSizeInBits();
4296 Intrinsic::ID IID;
4297 if (VecWidth == 128 && EltWidth == 32)
4298 IID = Intrinsic::x86_avx512_pternlog_d_128;
4299 else if (VecWidth == 256 && EltWidth == 32)
4300 IID = Intrinsic::x86_avx512_pternlog_d_256;
4301 else if (VecWidth == 512 && EltWidth == 32)
4302 IID = Intrinsic::x86_avx512_pternlog_d_512;
4303 else if (VecWidth == 128 && EltWidth == 64)
4304 IID = Intrinsic::x86_avx512_pternlog_q_128;
4305 else if (VecWidth == 256 && EltWidth == 64)
4306 IID = Intrinsic::x86_avx512_pternlog_q_256;
4307 else if (VecWidth == 512 && EltWidth == 64)
4308 IID = Intrinsic::x86_avx512_pternlog_q_512;
4309 else
4310 llvm_unreachable("Unexpected intrinsic");
4311
4312 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4313 CI->getArgOperand(2), CI->getArgOperand(3)};
4314 Rep = Builder.CreateIntrinsic(IID, Args);
4315 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4316 : CI->getArgOperand(0);
4317 Rep = emitX86Select(Builder, CI->getArgOperand(4), Rep, PassThru);
4318 } else if (Name.starts_with("avx512.mask.vpmadd52") ||
4319 Name.starts_with("avx512.maskz.vpmadd52")) {
4320 bool ZeroMask = Name[11] == 'z';
4321 bool High = Name[20] == 'h' || Name[21] == 'h';
4322 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4323 Intrinsic::ID IID;
4324 if (VecWidth == 128 && !High)
4325 IID = Intrinsic::x86_avx512_vpmadd52l_uq_128;
4326 else if (VecWidth == 256 && !High)
4327 IID = Intrinsic::x86_avx512_vpmadd52l_uq_256;
4328 else if (VecWidth == 512 && !High)
4329 IID = Intrinsic::x86_avx512_vpmadd52l_uq_512;
4330 else if (VecWidth == 128 && High)
4331 IID = Intrinsic::x86_avx512_vpmadd52h_uq_128;
4332 else if (VecWidth == 256 && High)
4333 IID = Intrinsic::x86_avx512_vpmadd52h_uq_256;
4334 else if (VecWidth == 512 && High)
4335 IID = Intrinsic::x86_avx512_vpmadd52h_uq_512;
4336 else
4337 llvm_unreachable("Unexpected intrinsic");
4338
4339 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4340 CI->getArgOperand(2)};
4341 Rep = Builder.CreateIntrinsic(IID, Args);
4342 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4343 : CI->getArgOperand(0);
4344 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4345 } else if (Name.starts_with("avx512.mask.vpermi2var.") ||
4346 Name.starts_with("avx512.mask.vpermt2var.") ||
4347 Name.starts_with("avx512.maskz.vpermt2var.")) {
4348 bool ZeroMask = Name[11] == 'z';
4349 bool IndexForm = Name[17] == 'i';
4350 Rep = upgradeX86VPERMT2Intrinsics(Builder, *CI, ZeroMask, IndexForm);
4351 } else if (Name.starts_with("avx512.mask.vpdpbusd.") ||
4352 Name.starts_with("avx512.maskz.vpdpbusd.") ||
4353 Name.starts_with("avx512.mask.vpdpbusds.") ||
4354 Name.starts_with("avx512.maskz.vpdpbusds.")) {
4355 bool ZeroMask = Name[11] == 'z';
4356 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4357 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4358 Intrinsic::ID IID;
4359 if (VecWidth == 128 && !IsSaturating)
4360 IID = Intrinsic::x86_avx512_vpdpbusd_128;
4361 else if (VecWidth == 256 && !IsSaturating)
4362 IID = Intrinsic::x86_avx512_vpdpbusd_256;
4363 else if (VecWidth == 512 && !IsSaturating)
4364 IID = Intrinsic::x86_avx512_vpdpbusd_512;
4365 else if (VecWidth == 128 && IsSaturating)
4366 IID = Intrinsic::x86_avx512_vpdpbusds_128;
4367 else if (VecWidth == 256 && IsSaturating)
4368 IID = Intrinsic::x86_avx512_vpdpbusds_256;
4369 else if (VecWidth == 512 && IsSaturating)
4370 IID = Intrinsic::x86_avx512_vpdpbusds_512;
4371 else
4372 llvm_unreachable("Unexpected intrinsic");
4373
4374 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4375 CI->getArgOperand(2)};
4376
4377 // Input arguments types were incorrectly set to vectors of i32 before but
4378 // they should be vectors of i8. Insert bit cast when encountering the old
4379 // types
4380 if (Args[1]->getType()->isVectorTy() &&
4381 cast<VectorType>(Args[1]->getType())
4382 ->getElementType()
4383 ->isIntegerTy(32) &&
4384 Args[2]->getType()->isVectorTy() &&
4385 cast<VectorType>(Args[2]->getType())
4386 ->getElementType()
4387 ->isIntegerTy(32)) {
4388 Type *NewArgType = nullptr;
4389 if (VecWidth == 128)
4390 NewArgType = VectorType::get(Builder.getInt8Ty(), 16, false);
4391 else if (VecWidth == 256)
4392 NewArgType = VectorType::get(Builder.getInt8Ty(), 32, false);
4393 else if (VecWidth == 512)
4394 NewArgType = VectorType::get(Builder.getInt8Ty(), 64, false);
4395 else
4396 llvm_unreachable("Unexpected vector bit width");
4397
4398 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4399 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4400 }
4401
4402 Rep = Builder.CreateIntrinsic(IID, Args);
4403 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4404 : CI->getArgOperand(0);
4405 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4406 } else if (Name.starts_with("avx512.mask.vpdpwssd.") ||
4407 Name.starts_with("avx512.maskz.vpdpwssd.") ||
4408 Name.starts_with("avx512.mask.vpdpwssds.") ||
4409 Name.starts_with("avx512.maskz.vpdpwssds.")) {
4410 bool ZeroMask = Name[11] == 'z';
4411 bool IsSaturating = Name[ZeroMask ? 21 : 20] == 's';
4412 unsigned VecWidth = CI->getType()->getPrimitiveSizeInBits();
4413 Intrinsic::ID IID;
4414 if (VecWidth == 128 && !IsSaturating)
4415 IID = Intrinsic::x86_avx512_vpdpwssd_128;
4416 else if (VecWidth == 256 && !IsSaturating)
4417 IID = Intrinsic::x86_avx512_vpdpwssd_256;
4418 else if (VecWidth == 512 && !IsSaturating)
4419 IID = Intrinsic::x86_avx512_vpdpwssd_512;
4420 else if (VecWidth == 128 && IsSaturating)
4421 IID = Intrinsic::x86_avx512_vpdpwssds_128;
4422 else if (VecWidth == 256 && IsSaturating)
4423 IID = Intrinsic::x86_avx512_vpdpwssds_256;
4424 else if (VecWidth == 512 && IsSaturating)
4425 IID = Intrinsic::x86_avx512_vpdpwssds_512;
4426 else
4427 llvm_unreachable("Unexpected intrinsic");
4428
4429 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4430 CI->getArgOperand(2)};
4431
4432 // Input arguments types were incorrectly set to vectors of i32 before but
4433 // they should be vectors of i16. Insert bit cast when encountering the old
4434 // types
4435 if (Args[1]->getType()->isVectorTy() &&
4436 cast<VectorType>(Args[1]->getType())
4437 ->getElementType()
4438 ->isIntegerTy(32) &&
4439 Args[2]->getType()->isVectorTy() &&
4440 cast<VectorType>(Args[2]->getType())
4441 ->getElementType()
4442 ->isIntegerTy(32)) {
4443 Type *NewArgType = nullptr;
4444 if (VecWidth == 128)
4445 NewArgType = VectorType::get(Builder.getInt16Ty(), 8, false);
4446 else if (VecWidth == 256)
4447 NewArgType = VectorType::get(Builder.getInt16Ty(), 16, false);
4448 else if (VecWidth == 512)
4449 NewArgType = VectorType::get(Builder.getInt16Ty(), 32, false);
4450 else
4451 llvm_unreachable("Unexpected vector bit width");
4452
4453 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
4454 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
4455 }
4456
4457 Rep = Builder.CreateIntrinsic(IID, Args);
4458 Value *PassThru = ZeroMask ? ConstantAggregateZero::get(CI->getType())
4459 : CI->getArgOperand(0);
4460 Rep = emitX86Select(Builder, CI->getArgOperand(3), Rep, PassThru);
4461 } else if (Name == "addcarryx.u32" || Name == "addcarryx.u64" ||
4462 Name == "addcarry.u32" || Name == "addcarry.u64" ||
4463 Name == "subborrow.u32" || Name == "subborrow.u64") {
4464 Intrinsic::ID IID;
4465 if (Name[0] == 'a' && Name.back() == '2')
4466 IID = Intrinsic::x86_addcarry_32;
4467 else if (Name[0] == 'a' && Name.back() == '4')
4468 IID = Intrinsic::x86_addcarry_64;
4469 else if (Name[0] == 's' && Name.back() == '2')
4470 IID = Intrinsic::x86_subborrow_32;
4471 else if (Name[0] == 's' && Name.back() == '4')
4472 IID = Intrinsic::x86_subborrow_64;
4473 else
4474 llvm_unreachable("Unexpected intrinsic");
4475
4476 // Make a call with 3 operands.
4477 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
4478 CI->getArgOperand(2)};
4479 Value *NewCall = Builder.CreateIntrinsic(IID, Args);
4480
4481 // Extract the second result and store it.
4482 Value *Data = Builder.CreateExtractValue(NewCall, 1);
4483 Builder.CreateAlignedStore(Data, CI->getArgOperand(3), Align(1));
4484 // Replace the original call result with the first result of the new call.
4485 Value *CF = Builder.CreateExtractValue(NewCall, 0);
4486
4487 CI->replaceAllUsesWith(CF);
4488 Rep = nullptr;
4489 } else if (Name.starts_with("avx512.mask.") &&
4490 upgradeAVX512MaskToSelect(Name, Builder, *CI, Rep)) {
4491 // Rep will be updated by the call in the condition.
4492 }
4493
4494 return Rep;
4495}
4496
4498 Function *F, IRBuilder<> &Builder) {
4499 if (Name.starts_with("neon.bfcvt")) {
4500 if (Name.starts_with("neon.bfcvtn2")) {
4501 SmallVector<int, 32> LoMask(4);
4502 std::iota(LoMask.begin(), LoMask.end(), 0);
4503 SmallVector<int, 32> ConcatMask(8);
4504 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4505 Value *Inactive = Builder.CreateShuffleVector(CI->getOperand(0), LoMask);
4506 Value *Trunc =
4507 Builder.CreateFPTrunc(CI->getOperand(1), Inactive->getType());
4508 return Builder.CreateShuffleVector(Inactive, Trunc, ConcatMask);
4509 } else if (Name.starts_with("neon.bfcvtn")) {
4510 SmallVector<int, 32> ConcatMask(8);
4511 std::iota(ConcatMask.begin(), ConcatMask.end(), 0);
4512 Type *V4BF16 =
4513 FixedVectorType::get(Type::getBFloatTy(F->getContext()), 4);
4514 Value *Trunc = Builder.CreateFPTrunc(CI->getOperand(0), V4BF16);
4515 dbgs() << "Trunc: " << *Trunc << "\n";
4516 return Builder.CreateShuffleVector(
4517 Trunc, ConstantAggregateZero::get(V4BF16), ConcatMask);
4518 } else {
4519 return Builder.CreateFPTrunc(CI->getOperand(0),
4520 Type::getBFloatTy(F->getContext()));
4521 }
4522 } else if (Name.starts_with("sve.fcvt")) {
4523 Intrinsic::ID NewID =
4525 .Case("sve.fcvt.bf16f32", Intrinsic::aarch64_sve_fcvt_bf16f32_v2)
4526 .Case("sve.fcvtnt.bf16f32",
4527 Intrinsic::aarch64_sve_fcvtnt_bf16f32_v2)
4529 if (NewID == Intrinsic::not_intrinsic)
4530 llvm_unreachable("Unhandled Intrinsic!");
4531
4532 SmallVector<Value *, 3> Args(CI->args());
4533
4534 // The original intrinsics incorrectly used a predicate based on the
4535 // smallest element type rather than the largest.
4536 Type *BadPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 8);
4537 Type *GoodPredTy = ScalableVectorType::get(Builder.getInt1Ty(), 4);
4538
4539 if (Args[1]->getType() != BadPredTy)
4540 llvm_unreachable("Unexpected predicate type!");
4541
4542 Args[1] = Builder.CreateIntrinsic(Intrinsic::aarch64_sve_convert_to_svbool,
4543 BadPredTy, Args[1]);
4544 Args[1] = Builder.CreateIntrinsic(
4545 Intrinsic::aarch64_sve_convert_from_svbool, GoodPredTy, Args[1]);
4546
4547 return Builder.CreateIntrinsic(NewID, Args, /*FMFSource=*/nullptr,
4548 CI->getName());
4549 }
4550
4551 llvm_unreachable("Unhandled Intrinsic!");
4552}
4553
4555 IRBuilder<> &Builder) {
4556 if (Name == "mve.vctp64.old") {
4557 // Replace the old v4i1 vctp64 with a v2i1 vctp and predicate-casts to the
4558 // correct type.
4559 Value *VCTP = Builder.CreateIntrinsic(Intrinsic::arm_mve_vctp64, {},
4560 CI->getArgOperand(0),
4561 /*FMFSource=*/nullptr, CI->getName());
4562 Value *C1 = Builder.CreateIntrinsic(
4563 Intrinsic::arm_mve_pred_v2i,
4564 {VectorType::get(Builder.getInt1Ty(), 2, false)}, VCTP);
4565 return Builder.CreateIntrinsic(
4566 Intrinsic::arm_mve_pred_i2v,
4567 {VectorType::get(Builder.getInt1Ty(), 4, false)}, C1);
4568 } else if (Name == "mve.mull.int.predicated.v2i64.v4i32.v4i1" ||
4569 Name == "mve.vqdmull.predicated.v2i64.v4i32.v4i1" ||
4570 Name == "mve.vldr.gather.base.predicated.v2i64.v2i64.v4i1" ||
4571 Name == "mve.vldr.gather.base.wb.predicated.v2i64.v2i64.v4i1" ||
4572 Name ==
4573 "mve.vldr.gather.offset.predicated.v2i64.p0i64.v2i64.v4i1" ||
4574 Name == "mve.vldr.gather.offset.predicated.v2i64.p0.v2i64.v4i1" ||
4575 Name == "mve.vstr.scatter.base.predicated.v2i64.v2i64.v4i1" ||
4576 Name == "mve.vstr.scatter.base.wb.predicated.v2i64.v2i64.v4i1" ||
4577 Name ==
4578 "mve.vstr.scatter.offset.predicated.p0i64.v2i64.v2i64.v4i1" ||
4579 Name == "mve.vstr.scatter.offset.predicated.p0.v2i64.v2i64.v4i1" ||
4580 Name == "cde.vcx1q.predicated.v2i64.v4i1" ||
4581 Name == "cde.vcx1qa.predicated.v2i64.v4i1" ||
4582 Name == "cde.vcx2q.predicated.v2i64.v4i1" ||
4583 Name == "cde.vcx2qa.predicated.v2i64.v4i1" ||
4584 Name == "cde.vcx3q.predicated.v2i64.v4i1" ||
4585 Name == "cde.vcx3qa.predicated.v2i64.v4i1") {
4586 std::vector<Type *> Tys;
4587 unsigned ID = CI->getIntrinsicID();
4588 Type *V2I1Ty = FixedVectorType::get(Builder.getInt1Ty(), 2);
4589 switch (ID) {
4590 case Intrinsic::arm_mve_mull_int_predicated:
4591 case Intrinsic::arm_mve_vqdmull_predicated:
4592 case Intrinsic::arm_mve_vldr_gather_base_predicated:
4593 Tys = {CI->getType(), CI->getOperand(0)->getType(), V2I1Ty};
4594 break;
4595 case Intrinsic::arm_mve_vldr_gather_base_wb_predicated:
4596 case Intrinsic::arm_mve_vstr_scatter_base_predicated:
4597 case Intrinsic::arm_mve_vstr_scatter_base_wb_predicated:
4598 Tys = {CI->getOperand(0)->getType(), CI->getOperand(0)->getType(),
4599 V2I1Ty};
4600 break;
4601 case Intrinsic::arm_mve_vldr_gather_offset_predicated:
4602 Tys = {CI->getType(), CI->getOperand(0)->getType(),
4603 CI->getOperand(1)->getType(), V2I1Ty};
4604 break;
4605 case Intrinsic::arm_mve_vstr_scatter_offset_predicated:
4606 Tys = {CI->getOperand(0)->getType(), CI->getOperand(1)->getType(),
4607 CI->getOperand(2)->getType(), V2I1Ty};
4608 break;
4609 case Intrinsic::arm_cde_vcx1q_predicated:
4610 case Intrinsic::arm_cde_vcx1qa_predicated:
4611 case Intrinsic::arm_cde_vcx2q_predicated:
4612 case Intrinsic::arm_cde_vcx2qa_predicated:
4613 case Intrinsic::arm_cde_vcx3q_predicated:
4614 case Intrinsic::arm_cde_vcx3qa_predicated:
4615 Tys = {CI->getOperand(1)->getType(), V2I1Ty};
4616 break;
4617 default:
4618 llvm_unreachable("Unhandled Intrinsic!");
4619 }
4620
4621 std::vector<Value *> Ops;
4622 for (Value *Op : CI->args()) {
4623 Type *Ty = Op->getType();
4624 if (Ty->getScalarSizeInBits() == 1) {
4625 Value *C1 = Builder.CreateIntrinsic(
4626 Intrinsic::arm_mve_pred_v2i,
4627 {VectorType::get(Builder.getInt1Ty(), 4, false)}, Op);
4628 Op = Builder.CreateIntrinsic(Intrinsic::arm_mve_pred_i2v, {V2I1Ty}, C1);
4629 }
4630 Ops.push_back(Op);
4631 }
4632
4633 return Builder.CreateIntrinsic(ID, Tys, Ops, /*FMFSource=*/nullptr,
4634 CI->getName());
4635 }
4636 llvm_unreachable("Unknown function for ARM CallBase upgrade.");
4637}
4638
4639// These are expected to have the arguments:
4640// atomic.intrin (ptr, rmw_value, ordering, scope, isVolatile)
4641//
4642// Except for int_amdgcn_ds_fadd_v2bf16 which only has (ptr, rmw_value).
4643//
4645 Function *F, IRBuilder<> &Builder) {
4646 // Legacy WMMA iu intrinsics missed the optional clamp operand. Append clamp=0
4647 // for compatibility.
4648 auto UpgradeLegacyWMMAIUIntrinsicCall =
4649 [](Function *F, CallBase *CI, IRBuilder<> &Builder,
4650 ArrayRef<Type *> OverloadTys) -> Value * {
4651 // Prepare arguments, append clamp=0 for compatibility
4652 SmallVector<Value *, 10> Args(CI->args().begin(), CI->args().end());
4653 Args.push_back(Builder.getFalse());
4654
4655 // Insert the declaration for the right overload types
4657 F->getParent(), F->getIntrinsicID(), OverloadTys);
4658
4659 // Copy operand bundles if any
4661 CI->getOperandBundlesAsDefs(Bundles);
4662
4663 // Create the new call and copy calling properties
4664 auto *NewCall = cast<CallInst>(Builder.CreateCall(NewDecl, Args, Bundles));
4665 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
4666 NewCall->setCallingConv(CI->getCallingConv());
4667 NewCall->setAttributes(CI->getAttributes());
4668 NewCall->setDebugLoc(CI->getDebugLoc());
4669 NewCall->copyMetadata(*CI);
4670 return NewCall;
4671 };
4672
4673 if (F->getIntrinsicID() == Intrinsic::amdgcn_wmma_i32_16x16x64_iu8) {
4674 assert(CI->arg_size() == 7 && "Legacy int_amdgcn_wmma_i32_16x16x64_iu8 "
4675 "intrinsic should have 7 arguments");
4676 Type *T1 = CI->getArgOperand(4)->getType();
4677 Type *T2 = CI->getArgOperand(1)->getType();
4678 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2});
4679 }
4680 if (F->getIntrinsicID() == Intrinsic::amdgcn_swmmac_i32_16x16x128_iu8) {
4681 assert(CI->arg_size() == 8 && "Legacy int_amdgcn_swmmac_i32_16x16x128_iu8 "
4682 "intrinsic should have 8 arguments");
4683 Type *T1 = CI->getArgOperand(4)->getType();
4684 Type *T2 = CI->getArgOperand(1)->getType();
4685 Type *T3 = CI->getArgOperand(3)->getType();
4686 Type *T4 = CI->getArgOperand(5)->getType();
4687 return UpgradeLegacyWMMAIUIntrinsicCall(F, CI, Builder, {T1, T2, T3, T4});
4688 }
4689
4690 AtomicRMWInst::BinOp RMWOp =
4692 .StartsWith("ds.fadd", AtomicRMWInst::FAdd)
4693 .StartsWith("ds.fmin", AtomicRMWInst::FMin)
4694 .StartsWith("ds.fmax", AtomicRMWInst::FMax)
4695 .StartsWith("atomic.inc.", AtomicRMWInst::UIncWrap)
4696 .StartsWith("atomic.dec.", AtomicRMWInst::UDecWrap)
4697 .StartsWith("global.atomic.fadd", AtomicRMWInst::FAdd)
4698 .StartsWith("flat.atomic.fadd", AtomicRMWInst::FAdd)
4699 .StartsWith("global.atomic.fmin", AtomicRMWInst::FMin)
4700 .StartsWith("flat.atomic.fmin", AtomicRMWInst::FMin)
4701 .StartsWith("global.atomic.fmax", AtomicRMWInst::FMax)
4702 .StartsWith("flat.atomic.fmax", AtomicRMWInst::FMax)
4703 .StartsWith("atomic.cond.sub", AtomicRMWInst::USubCond)
4704 .StartsWith("atomic.csub", AtomicRMWInst::USubSat);
4705
4706 unsigned NumOperands = CI->getNumOperands();
4707 if (NumOperands < 3) // Malformed bitcode.
4708 return nullptr;
4709
4710 Value *Ptr = CI->getArgOperand(0);
4711 PointerType *PtrTy = dyn_cast<PointerType>(Ptr->getType());
4712 if (!PtrTy) // Malformed.
4713 return nullptr;
4714
4715 Value *Val = CI->getArgOperand(1);
4716 if (Val->getType() != CI->getType()) // Malformed.
4717 return nullptr;
4718
4719 ConstantInt *OrderArg = nullptr;
4720 bool IsVolatile = false;
4721
4722 // These should have 5 arguments (plus the callee). A separate version of the
4723 // ds_fadd intrinsic was defined for bf16 which was missing arguments.
4724 if (NumOperands > 3)
4725 OrderArg = dyn_cast<ConstantInt>(CI->getArgOperand(2));
4726
4727 // Ignore scope argument at 3
4728
4729 if (NumOperands > 5) {
4730 ConstantInt *VolatileArg = dyn_cast<ConstantInt>(CI->getArgOperand(4));
4731 IsVolatile = !VolatileArg || !VolatileArg->isZero();
4732 }
4733
4735 if (OrderArg && isValidAtomicOrdering(OrderArg->getZExtValue()))
4736 Order = static_cast<AtomicOrdering>(OrderArg->getZExtValue());
4739
4740 LLVMContext &Ctx = F->getContext();
4741
4742 // Handle the v2bf16 intrinsic which used <2 x i16> instead of <2 x bfloat>
4743 Type *RetTy = CI->getType();
4744 if (VectorType *VT = dyn_cast<VectorType>(RetTy)) {
4745 if (VT->getElementType()->isIntegerTy(16)) {
4746 VectorType *AsBF16 =
4747 VectorType::get(Type::getBFloatTy(Ctx), VT->getElementCount());
4748 Val = Builder.CreateBitCast(Val, AsBF16);
4749 }
4750 }
4751
4752 // The scope argument never really worked correctly. Use agent as the most
4753 // conservative option which should still always produce the instruction.
4754 SyncScope::ID SSID = Ctx.getOrInsertSyncScopeID("agent");
4755 AtomicRMWInst *RMW =
4756 Builder.CreateAtomicRMW(RMWOp, Ptr, Val, std::nullopt, Order, SSID);
4757
4758 unsigned AddrSpace = PtrTy->getAddressSpace();
4759 if (AddrSpace != AMDGPUAS::LOCAL_ADDRESS) {
4760 MDNode *EmptyMD = MDNode::get(F->getContext(), {});
4761 RMW->setMetadata("amdgpu.no.fine.grained.memory", EmptyMD);
4762 if (RMWOp == AtomicRMWInst::FAdd && RetTy->isFloatTy())
4763 RMW->setMetadata("amdgpu.ignore.denormal.mode", EmptyMD);
4764 }
4765
4766 if (AddrSpace == AMDGPUAS::FLAT_ADDRESS) {
4767 MDBuilder MDB(F->getContext());
4768 MDNode *RangeNotPrivate =
4771 RMW->setMetadata(LLVMContext::MD_noalias_addrspace, RangeNotPrivate);
4772 }
4773
4774 if (IsVolatile)
4775 RMW->setVolatile(true);
4776
4777 return Builder.CreateBitCast(RMW, RetTy);
4778}
4779
4780/// Helper to unwrap intrinsic call MetadataAsValue operands. Return as a
4781/// plain MDNode, as it's the verifier's job to check these are the correct
4782/// types later.
4783static MDNode *unwrapMAVOp(CallBase *CI, unsigned Op) {
4784 if (Op < CI->arg_size()) {
4785 if (MetadataAsValue *MAV =
4787 Metadata *MD = MAV->getMetadata();
4788 return dyn_cast_if_present<MDNode>(MD);
4789 }
4790 }
4791 return nullptr;
4792}
4793
4794/// Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
4795static Metadata *unwrapMAVMetadataOp(CallBase *CI, unsigned Op) {
4796 if (Op < CI->arg_size())
4798 return MAV->getMetadata();
4799 return nullptr;
4800}
4801
4803 // The MDNode attached to this instruction might not be the correct type,
4804 // as the verifier has not yet be run. Fetch it as a bare MDNode.
4805 return I->getDebugLoc().getAsMDNode();
4806}
4807
4808/// Convert debug intrinsic calls to non-instruction debug records.
4809/// \p Name - Final part of the intrinsic name, e.g. 'value' in llvm.dbg.value.
4810/// \p CI - The debug intrinsic call.
4812 DbgRecord *DR = nullptr;
4813 if (Name == "label") {
4815 CI->getDebugLoc());
4816 } else if (Name == "assign") {
4819 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), unwrapMAVOp(CI, 3),
4820 unwrapMAVMetadataOp(CI, 4),
4821 /*The address is a Value ref, it will be stored as a Metadata */
4822 unwrapMAVOp(CI, 5), getDebugLocSafe(CI));
4823 } else if (Name == "declare") {
4826 unwrapMAVOp(CI, 1), unwrapMAVOp(CI, 2), nullptr, nullptr, nullptr,
4827 getDebugLocSafe(CI));
4828 } else if (Name == "addr") {
4829 // Upgrade dbg.addr to dbg.value with DW_OP_deref.
4830 MDNode *ExprNode = unwrapMAVOp(CI, 2);
4831 // Don't try to add something to the expression if it's not an expression.
4832 // Instead, allow the verifier to fail later.
4833 if (DIExpression *Expr = dyn_cast<DIExpression>(ExprNode)) {
4834 ExprNode = DIExpression::append(Expr, dwarf::DW_OP_deref);
4835 }
4838 unwrapMAVOp(CI, 1), ExprNode, nullptr, nullptr, nullptr,
4839 getDebugLocSafe(CI));
4840 } else if (Name == "value") {
4841 // An old version of dbg.value had an extra offset argument.
4842 unsigned VarOp = 1;
4843 unsigned ExprOp = 2;
4844 if (CI->arg_size() == 4) {
4846 // Nonzero offset dbg.values get dropped without a replacement.
4847 if (!Offset || !Offset->isZeroValue())
4848 return;
4849 VarOp = 2;
4850 ExprOp = 3;
4851 }
4854 unwrapMAVOp(CI, VarOp), unwrapMAVOp(CI, ExprOp), nullptr, nullptr,
4855 nullptr, getDebugLocSafe(CI));
4856 }
4857 assert(DR && "Unhandled intrinsic kind in upgrade to DbgRecord");
4858 CI->getParent()->insertDbgRecordBefore(DR, CI->getIterator());
4859}
4860
4863 if (!Offset)
4864 reportFatalUsageError("Invalid llvm.vector.splice offset argument");
4865 int64_t OffsetVal = Offset->getSExtValue();
4866 return Builder.CreateIntrinsic(OffsetVal >= 0
4867 ? Intrinsic::vector_splice_left
4868 : Intrinsic::vector_splice_right,
4869 CI->getType(),
4870 {CI->getArgOperand(0), CI->getArgOperand(1),
4871 Builder.getInt32(std::abs(OffsetVal))});
4872}
4873
4875 Function *F, IRBuilder<> &Builder) {
4876 if (Name.starts_with("to.fp16")) {
4877 Value *Cast =
4878 Builder.CreateFPTrunc(CI->getArgOperand(0), Builder.getHalfTy());
4879 return Builder.CreateBitCast(Cast, CI->getType());
4880 }
4881
4882 if (Name.starts_with("from.fp16")) {
4883 Value *Cast =
4884 Builder.CreateBitCast(CI->getArgOperand(0), Builder.getHalfTy());
4885 return Builder.CreateFPExt(Cast, CI->getType());
4886 }
4887
4888 return nullptr;
4889}
4890
4891/// Upgrade a call to an old intrinsic. All argument and return casting must be
4892/// provided to seamlessly integrate with existing context.
4894 // Note dyn_cast to Function is not quite the same as getCalledFunction, which
4895 // checks the callee's function type matches. It's likely we need to handle
4896 // type changes here.
4898 if (!F)
4899 return;
4900
4901 LLVMContext &C = CI->getContext();
4902 IRBuilder<> Builder(C);
4903 if (isa<FPMathOperator>(CI))
4904 Builder.setFastMathFlags(CI->getFastMathFlags());
4905 Builder.SetInsertPoint(CI->getParent(), CI->getIterator());
4906
4907 if (!NewFn) {
4908 // Get the Function's name.
4909 StringRef Name = F->getName();
4910 if (!Name.consume_front("llvm."))
4911 llvm_unreachable("intrinsic doesn't start with 'llvm.'");
4912
4913 bool IsX86 = Name.consume_front("x86.");
4914 bool IsNVVM = Name.consume_front("nvvm.");
4915 bool IsAArch64 = Name.consume_front("aarch64.");
4916 bool IsARM = Name.consume_front("arm.");
4917 bool IsAMDGCN = Name.consume_front("amdgcn.");
4918 bool IsDbg = Name.consume_front("dbg.");
4919 bool IsOldSplice =
4920 (Name.consume_front("experimental.vector.splice") ||
4921 Name.consume_front("vector.splice")) &&
4922 !(Name.starts_with(".left") || Name.starts_with(".right"));
4923 Value *Rep = nullptr;
4924
4925 if (!IsX86 && Name == "stackprotectorcheck") {
4926 Rep = nullptr;
4927 } else if (IsNVVM) {
4928 Rep = upgradeNVVMIntrinsicCall(Name, CI, F, Builder);
4929 } else if (IsX86) {
4930 Rep = upgradeX86IntrinsicCall(Name, CI, F, Builder);
4931 } else if (IsAArch64) {
4932 Rep = upgradeAArch64IntrinsicCall(Name, CI, F, Builder);
4933 } else if (IsARM) {
4934 Rep = upgradeARMIntrinsicCall(Name, CI, F, Builder);
4935 } else if (IsAMDGCN) {
4936 Rep = upgradeAMDGCNIntrinsicCall(Name, CI, F, Builder);
4937 } else if (IsDbg) {
4939 } else if (IsOldSplice) {
4940 Rep = upgradeVectorSplice(CI, Builder);
4941 } else if (Name.consume_front("convert.")) {
4942 Rep = upgradeConvertIntrinsicCall(Name, CI, F, Builder);
4943 } else {
4944 llvm_unreachable("Unknown function for CallBase upgrade.");
4945 }
4946
4947 if (Rep)
4948 CI->replaceAllUsesWith(Rep);
4949 CI->eraseFromParent();
4950 return;
4951 }
4952
4953 const auto &DefaultCase = [&]() -> void {
4954 if (F == NewFn)
4955 return;
4956
4957 if (CI->getFunctionType() == NewFn->getFunctionType()) {
4958 // Handle generic mangling change.
4959 assert(
4960 (CI->getCalledFunction()->getName() != NewFn->getName()) &&
4961 "Unknown function for CallBase upgrade and isn't just a name change");
4962 CI->setCalledFunction(NewFn);
4963 return;
4964 }
4965
4966 // This must be an upgrade from a named to a literal struct.
4967 if (auto *OldST = dyn_cast<StructType>(CI->getType())) {
4968 assert(OldST != NewFn->getReturnType() &&
4969 "Return type must have changed");
4970 assert(OldST->getNumElements() ==
4971 cast<StructType>(NewFn->getReturnType())->getNumElements() &&
4972 "Must have same number of elements");
4973
4974 SmallVector<Value *> Args(CI->args());
4975 CallInst *NewCI = Builder.CreateCall(NewFn, Args);
4976 NewCI->setAttributes(CI->getAttributes());
4977 Value *Res = PoisonValue::get(OldST);
4978 for (unsigned Idx = 0; Idx < OldST->getNumElements(); ++Idx) {
4979 Value *Elem = Builder.CreateExtractValue(NewCI, Idx);
4980 Res = Builder.CreateInsertValue(Res, Elem, Idx);
4981 }
4982 CI->replaceAllUsesWith(Res);
4983 CI->eraseFromParent();
4984 return;
4985 }
4986
4987 // We're probably about to produce something invalid. Let the verifier catch
4988 // it instead of dying here.
4989 CI->setCalledOperand(
4991 return;
4992 };
4993 CallInst *NewCall = nullptr;
4994 switch (NewFn->getIntrinsicID()) {
4995 default: {
4996 DefaultCase();
4997 return;
4998 }
4999 case Intrinsic::arm_neon_vst1:
5000 case Intrinsic::arm_neon_vst2:
5001 case Intrinsic::arm_neon_vst3:
5002 case Intrinsic::arm_neon_vst4:
5003 case Intrinsic::arm_neon_vst2lane:
5004 case Intrinsic::arm_neon_vst3lane:
5005 case Intrinsic::arm_neon_vst4lane: {
5006 SmallVector<Value *, 4> Args(CI->args());
5007 NewCall = Builder.CreateCall(NewFn, Args);
5008 break;
5009 }
5010 case Intrinsic::aarch64_sve_bfmlalb_lane_v2:
5011 case Intrinsic::aarch64_sve_bfmlalt_lane_v2:
5012 case Intrinsic::aarch64_sve_bfdot_lane_v2: {
5013 LLVMContext &Ctx = F->getParent()->getContext();
5014 SmallVector<Value *, 4> Args(CI->args());
5015 Args[3] = ConstantInt::get(Type::getInt32Ty(Ctx),
5016 cast<ConstantInt>(Args[3])->getZExtValue());
5017 NewCall = Builder.CreateCall(NewFn, Args);
5018 break;
5019 }
5020 case Intrinsic::aarch64_sve_ld3_sret:
5021 case Intrinsic::aarch64_sve_ld4_sret:
5022 case Intrinsic::aarch64_sve_ld2_sret: {
5023 StringRef Name = F->getName();
5024 Name = Name.substr(5);
5025 unsigned N = StringSwitch<unsigned>(Name)
5026 .StartsWith("aarch64.sve.ld2", 2)
5027 .StartsWith("aarch64.sve.ld3", 3)
5028 .StartsWith("aarch64.sve.ld4", 4)
5029 .Default(0);
5030 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5031 unsigned MinElts = RetTy->getMinNumElements() / N;
5032 SmallVector<Value *, 2> Args(CI->args());
5033 Value *NewLdCall = Builder.CreateCall(NewFn, Args);
5034 Value *Ret = llvm::PoisonValue::get(RetTy);
5035 for (unsigned I = 0; I < N; I++) {
5036 Value *SRet = Builder.CreateExtractValue(NewLdCall, I);
5037 Ret = Builder.CreateInsertVector(RetTy, Ret, SRet, I * MinElts);
5038 }
5039 NewCall = dyn_cast<CallInst>(Ret);
5040 break;
5041 }
5042
5043 case Intrinsic::coro_end: {
5044 SmallVector<Value *, 3> Args(CI->args());
5045 Args.push_back(ConstantTokenNone::get(CI->getContext()));
5046 NewCall = Builder.CreateCall(NewFn, Args);
5047 break;
5048 }
5049
5050 case Intrinsic::vector_extract: {
5051 StringRef Name = F->getName();
5052 Name = Name.substr(5); // Strip llvm
5053 if (!Name.starts_with("aarch64.sve.tuple.get")) {
5054 DefaultCase();
5055 return;
5056 }
5057 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5058 unsigned MinElts = RetTy->getMinNumElements();
5059 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5060 Value *NewIdx = ConstantInt::get(Type::getInt64Ty(C), I * MinElts);
5061 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0), NewIdx});
5062 break;
5063 }
5064
5065 case Intrinsic::vector_insert: {
5066 StringRef Name = F->getName();
5067 Name = Name.substr(5);
5068 if (!Name.starts_with("aarch64.sve.tuple")) {
5069 DefaultCase();
5070 return;
5071 }
5072 if (Name.starts_with("aarch64.sve.tuple.set")) {
5073 unsigned I = cast<ConstantInt>(CI->getArgOperand(1))->getZExtValue();
5074 auto *Ty = cast<ScalableVectorType>(CI->getArgOperand(2)->getType());
5075 Value *NewIdx =
5076 ConstantInt::get(Type::getInt64Ty(C), I * Ty->getMinNumElements());
5077 NewCall = Builder.CreateCall(
5078 NewFn, {CI->getArgOperand(0), CI->getArgOperand(2), NewIdx});
5079 break;
5080 }
5081 if (Name.starts_with("aarch64.sve.tuple.create")) {
5082 unsigned N = StringSwitch<unsigned>(Name)
5083 .StartsWith("aarch64.sve.tuple.create2", 2)
5084 .StartsWith("aarch64.sve.tuple.create3", 3)
5085 .StartsWith("aarch64.sve.tuple.create4", 4)
5086 .Default(0);
5087 assert(N > 1 && "Create is expected to be between 2-4");
5088 auto *RetTy = cast<ScalableVectorType>(F->getReturnType());
5089 Value *Ret = llvm::PoisonValue::get(RetTy);
5090 unsigned MinElts = RetTy->getMinNumElements() / N;
5091 for (unsigned I = 0; I < N; I++) {
5092 Value *V = CI->getArgOperand(I);
5093 Ret = Builder.CreateInsertVector(RetTy, Ret, V, I * MinElts);
5094 }
5095 NewCall = dyn_cast<CallInst>(Ret);
5096 }
5097 break;
5098 }
5099
5100 case Intrinsic::arm_neon_bfdot:
5101 case Intrinsic::arm_neon_bfmmla:
5102 case Intrinsic::arm_neon_bfmlalb:
5103 case Intrinsic::arm_neon_bfmlalt:
5104 case Intrinsic::aarch64_neon_bfdot:
5105 case Intrinsic::aarch64_neon_bfmmla:
5106 case Intrinsic::aarch64_neon_bfmlalb:
5107 case Intrinsic::aarch64_neon_bfmlalt: {
5109 assert(CI->arg_size() == 3 &&
5110 "Mismatch between function args and call args");
5111 size_t OperandWidth =
5113 assert((OperandWidth == 64 || OperandWidth == 128) &&
5114 "Unexpected operand width");
5115 Type *NewTy = FixedVectorType::get(Type::getBFloatTy(C), OperandWidth / 16);
5116 auto Iter = CI->args().begin();
5117 Args.push_back(*Iter++);
5118 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5119 Args.push_back(Builder.CreateBitCast(*Iter++, NewTy));
5120 NewCall = Builder.CreateCall(NewFn, Args);
5121 break;
5122 }
5123
5124 case Intrinsic::bitreverse:
5125 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5126 break;
5127
5128 case Intrinsic::ctlz:
5129 case Intrinsic::cttz: {
5130 if (CI->arg_size() != 1) {
5131 DefaultCase();
5132 return;
5133 }
5134
5135 NewCall =
5136 Builder.CreateCall(NewFn, {CI->getArgOperand(0), Builder.getFalse()});
5137 break;
5138 }
5139
5140 case Intrinsic::objectsize: {
5141 Value *NullIsUnknownSize =
5142 CI->arg_size() == 2 ? Builder.getFalse() : CI->getArgOperand(2);
5143 Value *Dynamic =
5144 CI->arg_size() < 4 ? Builder.getFalse() : CI->getArgOperand(3);
5145 NewCall = Builder.CreateCall(
5146 NewFn, {CI->getArgOperand(0), CI->getArgOperand(1), NullIsUnknownSize, Dynamic});
5147 break;
5148 }
5149
5150 case Intrinsic::ctpop:
5151 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(0)});
5152 break;
5153 case Intrinsic::dbg_value: {
5154 StringRef Name = F->getName();
5155 Name = Name.substr(5); // Strip llvm.
5156 // Upgrade `dbg.addr` to `dbg.value` with `DW_OP_deref`.
5157 if (Name.starts_with("dbg.addr")) {
5159 cast<MetadataAsValue>(CI->getArgOperand(2))->getMetadata());
5160 Expr = DIExpression::append(Expr, dwarf::DW_OP_deref);
5161 NewCall =
5162 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1),
5163 MetadataAsValue::get(C, Expr)});
5164 break;
5165 }
5166
5167 // Upgrade from the old version that had an extra offset argument.
5168 assert(CI->arg_size() == 4);
5169 // Drop nonzero offsets instead of attempting to upgrade them.
5171 if (Offset->isZeroValue()) {
5172 NewCall = Builder.CreateCall(
5173 NewFn,
5174 {CI->getArgOperand(0), CI->getArgOperand(2), CI->getArgOperand(3)});
5175 break;
5176 }
5177 CI->eraseFromParent();
5178 return;
5179 }
5180
5181 case Intrinsic::ptr_annotation:
5182 // Upgrade from versions that lacked the annotation attribute argument.
5183 if (CI->arg_size() != 4) {
5184 DefaultCase();
5185 return;
5186 }
5187
5188 // Create a new call with an added null annotation attribute argument.
5189 NewCall = Builder.CreateCall(
5190 NewFn,
5191 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5192 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5193 NewCall->takeName(CI);
5194 CI->replaceAllUsesWith(NewCall);
5195 CI->eraseFromParent();
5196 return;
5197
5198 case Intrinsic::var_annotation:
5199 // Upgrade from versions that lacked the annotation attribute argument.
5200 if (CI->arg_size() != 4) {
5201 DefaultCase();
5202 return;
5203 }
5204 // Create a new call with an added null annotation attribute argument.
5205 NewCall = Builder.CreateCall(
5206 NewFn,
5207 {CI->getArgOperand(0), CI->getArgOperand(1), CI->getArgOperand(2),
5208 CI->getArgOperand(3), ConstantPointerNull::get(Builder.getPtrTy())});
5209 NewCall->takeName(CI);
5210 CI->replaceAllUsesWith(NewCall);
5211 CI->eraseFromParent();
5212 return;
5213
5214 case Intrinsic::riscv_aes32dsi:
5215 case Intrinsic::riscv_aes32dsmi:
5216 case Intrinsic::riscv_aes32esi:
5217 case Intrinsic::riscv_aes32esmi:
5218 case Intrinsic::riscv_sm4ks:
5219 case Intrinsic::riscv_sm4ed: {
5220 // The last argument to these intrinsics used to be i8 and changed to i32.
5221 // The type overload for sm4ks and sm4ed was removed.
5222 Value *Arg2 = CI->getArgOperand(2);
5223 if (Arg2->getType()->isIntegerTy(32) && !CI->getType()->isIntegerTy(64))
5224 return;
5225
5226 Value *Arg0 = CI->getArgOperand(0);
5227 Value *Arg1 = CI->getArgOperand(1);
5228 if (CI->getType()->isIntegerTy(64)) {
5229 Arg0 = Builder.CreateTrunc(Arg0, Builder.getInt32Ty());
5230 Arg1 = Builder.CreateTrunc(Arg1, Builder.getInt32Ty());
5231 }
5232
5233 Arg2 = ConstantInt::get(Type::getInt32Ty(C),
5234 cast<ConstantInt>(Arg2)->getZExtValue());
5235
5236 NewCall = Builder.CreateCall(NewFn, {Arg0, Arg1, Arg2});
5237 Value *Res = NewCall;
5238 if (Res->getType() != CI->getType())
5239 Res = Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5240 NewCall->takeName(CI);
5241 CI->replaceAllUsesWith(Res);
5242 CI->eraseFromParent();
5243 return;
5244 }
5245 case Intrinsic::nvvm_mapa_shared_cluster: {
5246 // Create a new call with the correct address space.
5247 NewCall =
5248 Builder.CreateCall(NewFn, {CI->getArgOperand(0), CI->getArgOperand(1)});
5249 Value *Res = NewCall;
5250 Res = Builder.CreateAddrSpaceCast(
5251 Res, Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED));
5252 NewCall->takeName(CI);
5253 CI->replaceAllUsesWith(Res);
5254 CI->eraseFromParent();
5255 return;
5256 }
5257 case Intrinsic::nvvm_cp_async_bulk_global_to_shared_cluster:
5258 case Intrinsic::nvvm_cp_async_bulk_shared_cta_to_cluster: {
5259 // Create a new call with the correct address space.
5260 SmallVector<Value *, 4> Args(CI->args());
5261 Args[0] = Builder.CreateAddrSpaceCast(
5262 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5263
5264 NewCall = Builder.CreateCall(NewFn, Args);
5265 NewCall->takeName(CI);
5266 CI->replaceAllUsesWith(NewCall);
5267 CI->eraseFromParent();
5268 return;
5269 }
5270 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_3d:
5271 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_4d:
5272 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_im2col_5d:
5273 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_1d:
5274 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_2d:
5275 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_3d:
5276 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_4d:
5277 case Intrinsic::nvvm_cp_async_bulk_tensor_g2s_tile_5d: {
5278 SmallVector<Value *, 16> Args(CI->args());
5279
5280 // Create AddrSpaceCast to shared_cluster if needed.
5281 // This handles case (1) in shouldUpgradeNVPTXTMAG2SIntrinsics().
5282 unsigned AS = CI->getArgOperand(0)->getType()->getPointerAddressSpace();
5284 Args[0] = Builder.CreateAddrSpaceCast(
5285 Args[0], Builder.getPtrTy(NVPTXAS::ADDRESS_SPACE_SHARED_CLUSTER));
5286
5287 // Attach the flag argument for cta_group, with a
5288 // default value of 0. This handles case (2) in
5289 // shouldUpgradeNVPTXTMAG2SIntrinsics().
5290 size_t NumArgs = CI->arg_size();
5291 Value *FlagArg = CI->getArgOperand(NumArgs - 3);
5292 if (!FlagArg->getType()->isIntegerTy(1))
5293 Args.push_back(ConstantInt::get(Builder.getInt32Ty(), 0));
5294
5295 NewCall = Builder.CreateCall(NewFn, Args);
5296 NewCall->takeName(CI);
5297 CI->replaceAllUsesWith(NewCall);
5298 CI->eraseFromParent();
5299 return;
5300 }
5301 case Intrinsic::riscv_sha256sig0:
5302 case Intrinsic::riscv_sha256sig1:
5303 case Intrinsic::riscv_sha256sum0:
5304 case Intrinsic::riscv_sha256sum1:
5305 case Intrinsic::riscv_sm3p0:
5306 case Intrinsic::riscv_sm3p1: {
5307 // The last argument to these intrinsics used to be i8 and changed to i32.
5308 // The type overload for sm4ks and sm4ed was removed.
5309 if (!CI->getType()->isIntegerTy(64))
5310 return;
5311
5312 Value *Arg =
5313 Builder.CreateTrunc(CI->getArgOperand(0), Builder.getInt32Ty());
5314
5315 NewCall = Builder.CreateCall(NewFn, Arg);
5316 Value *Res =
5317 Builder.CreateIntCast(NewCall, CI->getType(), /*isSigned*/ true);
5318 NewCall->takeName(CI);
5319 CI->replaceAllUsesWith(Res);
5320 CI->eraseFromParent();
5321 return;
5322 }
5323
5324 case Intrinsic::x86_xop_vfrcz_ss:
5325 case Intrinsic::x86_xop_vfrcz_sd:
5326 NewCall = Builder.CreateCall(NewFn, {CI->getArgOperand(1)});
5327 break;
5328
5329 case Intrinsic::x86_xop_vpermil2pd:
5330 case Intrinsic::x86_xop_vpermil2ps:
5331 case Intrinsic::x86_xop_vpermil2pd_256:
5332 case Intrinsic::x86_xop_vpermil2ps_256: {
5333 SmallVector<Value *, 4> Args(CI->args());
5334 VectorType *FltIdxTy = cast<VectorType>(Args[2]->getType());
5335 VectorType *IntIdxTy = VectorType::getInteger(FltIdxTy);
5336 Args[2] = Builder.CreateBitCast(Args[2], IntIdxTy);
5337 NewCall = Builder.CreateCall(NewFn, Args);
5338 break;
5339 }
5340
5341 case Intrinsic::x86_sse41_ptestc:
5342 case Intrinsic::x86_sse41_ptestz:
5343 case Intrinsic::x86_sse41_ptestnzc: {
5344 // The arguments for these intrinsics used to be v4f32, and changed
5345 // to v2i64. This is purely a nop, since those are bitwise intrinsics.
5346 // So, the only thing required is a bitcast for both arguments.
5347 // First, check the arguments have the old type.
5348 Value *Arg0 = CI->getArgOperand(0);
5349 if (Arg0->getType() != FixedVectorType::get(Type::getFloatTy(C), 4))
5350 return;
5351
5352 // Old intrinsic, add bitcasts
5353 Value *Arg1 = CI->getArgOperand(1);
5354
5355 auto *NewVecTy = FixedVectorType::get(Type::getInt64Ty(C), 2);
5356
5357 Value *BC0 = Builder.CreateBitCast(Arg0, NewVecTy, "cast");
5358 Value *BC1 = Builder.CreateBitCast(Arg1, NewVecTy, "cast");
5359
5360 NewCall = Builder.CreateCall(NewFn, {BC0, BC1});
5361 break;
5362 }
5363
5364 case Intrinsic::x86_rdtscp: {
5365 // This used to take 1 arguments. If we have no arguments, it is already
5366 // upgraded.
5367 if (CI->getNumOperands() == 0)
5368 return;
5369
5370 NewCall = Builder.CreateCall(NewFn);
5371 // Extract the second result and store it.
5372 Value *Data = Builder.CreateExtractValue(NewCall, 1);
5373 Builder.CreateAlignedStore(Data, CI->getArgOperand(0), Align(1));
5374 // Replace the original call result with the first result of the new call.
5375 Value *TSC = Builder.CreateExtractValue(NewCall, 0);
5376
5377 NewCall->takeName(CI);
5378 CI->replaceAllUsesWith(TSC);
5379 CI->eraseFromParent();
5380 return;
5381 }
5382
5383 case Intrinsic::x86_sse41_insertps:
5384 case Intrinsic::x86_sse41_dppd:
5385 case Intrinsic::x86_sse41_dpps:
5386 case Intrinsic::x86_sse41_mpsadbw:
5387 case Intrinsic::x86_avx_dp_ps_256:
5388 case Intrinsic::x86_avx2_mpsadbw: {
5389 // Need to truncate the last argument from i32 to i8 -- this argument models
5390 // an inherently 8-bit immediate operand to these x86 instructions.
5391 SmallVector<Value *, 4> Args(CI->args());
5392
5393 // Replace the last argument with a trunc.
5394 Args.back() = Builder.CreateTrunc(Args.back(), Type::getInt8Ty(C), "trunc");
5395 NewCall = Builder.CreateCall(NewFn, Args);
5396 break;
5397 }
5398
5399 case Intrinsic::x86_avx512_mask_cmp_pd_128:
5400 case Intrinsic::x86_avx512_mask_cmp_pd_256:
5401 case Intrinsic::x86_avx512_mask_cmp_pd_512:
5402 case Intrinsic::x86_avx512_mask_cmp_ps_128:
5403 case Intrinsic::x86_avx512_mask_cmp_ps_256:
5404 case Intrinsic::x86_avx512_mask_cmp_ps_512: {
5405 SmallVector<Value *, 4> Args(CI->args());
5406 unsigned NumElts =
5407 cast<FixedVectorType>(Args[0]->getType())->getNumElements();
5408 Args[3] = getX86MaskVec(Builder, Args[3], NumElts);
5409
5410 NewCall = Builder.CreateCall(NewFn, Args);
5411 Value *Res = applyX86MaskOn1BitsVec(Builder, NewCall, nullptr);
5412
5413 NewCall->takeName(CI);
5414 CI->replaceAllUsesWith(Res);
5415 CI->eraseFromParent();
5416 return;
5417 }
5418
5419 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_128:
5420 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_256:
5421 case Intrinsic::x86_avx512bf16_cvtne2ps2bf16_512:
5422 case Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128:
5423 case Intrinsic::x86_avx512bf16_cvtneps2bf16_256:
5424 case Intrinsic::x86_avx512bf16_cvtneps2bf16_512: {
5425 SmallVector<Value *, 4> Args(CI->args());
5426 unsigned NumElts = cast<FixedVectorType>(CI->getType())->getNumElements();
5427 if (NewFn->getIntrinsicID() ==
5428 Intrinsic::x86_avx512bf16_mask_cvtneps2bf16_128)
5429 Args[1] = Builder.CreateBitCast(
5430 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5431
5432 NewCall = Builder.CreateCall(NewFn, Args);
5433 Value *Res = Builder.CreateBitCast(
5434 NewCall, FixedVectorType::get(Builder.getInt16Ty(), NumElts));
5435
5436 NewCall->takeName(CI);
5437 CI->replaceAllUsesWith(Res);
5438 CI->eraseFromParent();
5439 return;
5440 }
5441 case Intrinsic::x86_avx512bf16_dpbf16ps_128:
5442 case Intrinsic::x86_avx512bf16_dpbf16ps_256:
5443 case Intrinsic::x86_avx512bf16_dpbf16ps_512:{
5444 SmallVector<Value *, 4> Args(CI->args());
5445 unsigned NumElts =
5446 cast<FixedVectorType>(CI->getType())->getNumElements() * 2;
5447 Args[1] = Builder.CreateBitCast(
5448 Args[1], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5449 Args[2] = Builder.CreateBitCast(
5450 Args[2], FixedVectorType::get(Builder.getBFloatTy(), NumElts));
5451
5452 NewCall = Builder.CreateCall(NewFn, Args);
5453 break;
5454 }
5455
5456 case Intrinsic::thread_pointer: {
5457 NewCall = Builder.CreateCall(NewFn, {});
5458 break;
5459 }
5460
5461 case Intrinsic::memcpy:
5462 case Intrinsic::memmove:
5463 case Intrinsic::memset: {
5464 // We have to make sure that the call signature is what we're expecting.
5465 // We only want to change the old signatures by removing the alignment arg:
5466 // @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i32, i1)
5467 // -> @llvm.mem[cpy|move]...(i8*, i8*, i[32|i64], i1)
5468 // @llvm.memset...(i8*, i8, i[32|64], i32, i1)
5469 // -> @llvm.memset...(i8*, i8, i[32|64], i1)
5470 // Note: i8*'s in the above can be any pointer type
5471 if (CI->arg_size() != 5) {
5472 DefaultCase();
5473 return;
5474 }
5475 // Remove alignment argument (3), and add alignment attributes to the
5476 // dest/src pointers.
5477 Value *Args[4] = {CI->getArgOperand(0), CI->getArgOperand(1),
5478 CI->getArgOperand(2), CI->getArgOperand(4)};
5479 NewCall = Builder.CreateCall(NewFn, Args);
5480 AttributeList OldAttrs = CI->getAttributes();
5481 AttributeList NewAttrs = AttributeList::get(
5482 C, OldAttrs.getFnAttrs(), OldAttrs.getRetAttrs(),
5483 {OldAttrs.getParamAttrs(0), OldAttrs.getParamAttrs(1),
5484 OldAttrs.getParamAttrs(2), OldAttrs.getParamAttrs(4)});
5485 NewCall->setAttributes(NewAttrs);
5486 auto *MemCI = cast<MemIntrinsic>(NewCall);
5487 // All mem intrinsics support dest alignment.
5489 MemCI->setDestAlignment(Align->getMaybeAlignValue());
5490 // Memcpy/Memmove also support source alignment.
5491 if (auto *MTI = dyn_cast<MemTransferInst>(MemCI))
5492 MTI->setSourceAlignment(Align->getMaybeAlignValue());
5493 break;
5494 }
5495
5496 case Intrinsic::masked_load:
5497 case Intrinsic::masked_gather:
5498 case Intrinsic::masked_store:
5499 case Intrinsic::masked_scatter: {
5500 if (CI->arg_size() != 4) {
5501 DefaultCase();
5502 return;
5503 }
5504
5505 auto GetMaybeAlign = [](Value *Op) {
5506 if (auto *CI = dyn_cast<ConstantInt>(Op)) {
5507 uint64_t Val = CI->getZExtValue();
5508 if (Val == 0)
5509 return MaybeAlign();
5510 if (isPowerOf2_64(Val))
5511 return MaybeAlign(Val);
5512 }
5513 reportFatalUsageError("Invalid alignment argument");
5514 };
5515 auto GetAlign = [&](Value *Op) {
5516 MaybeAlign Align = GetMaybeAlign(Op);
5517 if (Align)
5518 return *Align;
5519 reportFatalUsageError("Invalid zero alignment argument");
5520 };
5521
5522 const DataLayout &DL = CI->getDataLayout();
5523 switch (NewFn->getIntrinsicID()) {
5524 case Intrinsic::masked_load:
5525 NewCall = Builder.CreateMaskedLoad(
5526 CI->getType(), CI->getArgOperand(0), GetAlign(CI->getArgOperand(1)),
5527 CI->getArgOperand(2), CI->getArgOperand(3));
5528 break;
5529 case Intrinsic::masked_gather:
5530 NewCall = Builder.CreateMaskedGather(
5531 CI->getType(), CI->getArgOperand(0),
5532 DL.getValueOrABITypeAlignment(GetMaybeAlign(CI->getArgOperand(1)),
5533 CI->getType()->getScalarType()),
5534 CI->getArgOperand(2), CI->getArgOperand(3));
5535 break;
5536 case Intrinsic::masked_store:
5537 NewCall = Builder.CreateMaskedStore(
5538 CI->getArgOperand(0), CI->getArgOperand(1),
5539 GetAlign(CI->getArgOperand(2)), CI->getArgOperand(3));
5540 break;
5541 case Intrinsic::masked_scatter:
5542 NewCall = Builder.CreateMaskedScatter(
5543 CI->getArgOperand(0), CI->getArgOperand(1),
5544 DL.getValueOrABITypeAlignment(
5545 GetMaybeAlign(CI->getArgOperand(2)),
5546 CI->getArgOperand(0)->getType()->getScalarType()),
5547 CI->getArgOperand(3));
5548 break;
5549 default:
5550 llvm_unreachable("Unexpected intrinsic ID");
5551 }
5552 // Previous metadata is still valid.
5553 NewCall->copyMetadata(*CI);
5554 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
5555 break;
5556 }
5557
5558 case Intrinsic::lifetime_start:
5559 case Intrinsic::lifetime_end: {
5560 if (CI->arg_size() != 2) {
5561 DefaultCase();
5562 return;
5563 }
5564
5565 Value *Ptr = CI->getArgOperand(1);
5566 // Try to strip pointer casts, such that the lifetime works on an alloca.
5567 Ptr = Ptr->stripPointerCasts();
5568 if (isa<AllocaInst>(Ptr)) {
5569 // Don't use NewFn, as we might have looked through an addrspacecast.
5570 if (NewFn->getIntrinsicID() == Intrinsic::lifetime_start)
5571 NewCall = Builder.CreateLifetimeStart(Ptr);
5572 else
5573 NewCall = Builder.CreateLifetimeEnd(Ptr);
5574 break;
5575 }
5576
5577 // Otherwise remove the lifetime marker.
5578 CI->eraseFromParent();
5579 return;
5580 }
5581
5582 case Intrinsic::x86_avx512_vpdpbusd_128:
5583 case Intrinsic::x86_avx512_vpdpbusd_256:
5584 case Intrinsic::x86_avx512_vpdpbusd_512:
5585 case Intrinsic::x86_avx512_vpdpbusds_128:
5586 case Intrinsic::x86_avx512_vpdpbusds_256:
5587 case Intrinsic::x86_avx512_vpdpbusds_512:
5588 case Intrinsic::x86_avx2_vpdpbssd_128:
5589 case Intrinsic::x86_avx2_vpdpbssd_256:
5590 case Intrinsic::x86_avx10_vpdpbssd_512:
5591 case Intrinsic::x86_avx2_vpdpbssds_128:
5592 case Intrinsic::x86_avx2_vpdpbssds_256:
5593 case Intrinsic::x86_avx10_vpdpbssds_512:
5594 case Intrinsic::x86_avx2_vpdpbsud_128:
5595 case Intrinsic::x86_avx2_vpdpbsud_256:
5596 case Intrinsic::x86_avx10_vpdpbsud_512:
5597 case Intrinsic::x86_avx2_vpdpbsuds_128:
5598 case Intrinsic::x86_avx2_vpdpbsuds_256:
5599 case Intrinsic::x86_avx10_vpdpbsuds_512:
5600 case Intrinsic::x86_avx2_vpdpbuud_128:
5601 case Intrinsic::x86_avx2_vpdpbuud_256:
5602 case Intrinsic::x86_avx10_vpdpbuud_512:
5603 case Intrinsic::x86_avx2_vpdpbuuds_128:
5604 case Intrinsic::x86_avx2_vpdpbuuds_256:
5605 case Intrinsic::x86_avx10_vpdpbuuds_512: {
5606 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 8;
5607 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5608 CI->getArgOperand(2)};
5609 Type *NewArgType = VectorType::get(Builder.getInt8Ty(), NumElts, false);
5610 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5611 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5612
5613 NewCall = Builder.CreateCall(NewFn, Args);
5614 break;
5615 }
5616 case Intrinsic::x86_avx512_vpdpwssd_128:
5617 case Intrinsic::x86_avx512_vpdpwssd_256:
5618 case Intrinsic::x86_avx512_vpdpwssd_512:
5619 case Intrinsic::x86_avx512_vpdpwssds_128:
5620 case Intrinsic::x86_avx512_vpdpwssds_256:
5621 case Intrinsic::x86_avx512_vpdpwssds_512:
5622 case Intrinsic::x86_avx2_vpdpwsud_128:
5623 case Intrinsic::x86_avx2_vpdpwsud_256:
5624 case Intrinsic::x86_avx10_vpdpwsud_512:
5625 case Intrinsic::x86_avx2_vpdpwsuds_128:
5626 case Intrinsic::x86_avx2_vpdpwsuds_256:
5627 case Intrinsic::x86_avx10_vpdpwsuds_512:
5628 case Intrinsic::x86_avx2_vpdpwusd_128:
5629 case Intrinsic::x86_avx2_vpdpwusd_256:
5630 case Intrinsic::x86_avx10_vpdpwusd_512:
5631 case Intrinsic::x86_avx2_vpdpwusds_128:
5632 case Intrinsic::x86_avx2_vpdpwusds_256:
5633 case Intrinsic::x86_avx10_vpdpwusds_512:
5634 case Intrinsic::x86_avx2_vpdpwuud_128:
5635 case Intrinsic::x86_avx2_vpdpwuud_256:
5636 case Intrinsic::x86_avx10_vpdpwuud_512:
5637 case Intrinsic::x86_avx2_vpdpwuuds_128:
5638 case Intrinsic::x86_avx2_vpdpwuuds_256:
5639 case Intrinsic::x86_avx10_vpdpwuuds_512:
5640 unsigned NumElts = CI->getType()->getPrimitiveSizeInBits() / 16;
5641 Value *Args[] = {CI->getArgOperand(0), CI->getArgOperand(1),
5642 CI->getArgOperand(2)};
5643 Type *NewArgType = VectorType::get(Builder.getInt16Ty(), NumElts, false);
5644 Args[1] = Builder.CreateBitCast(Args[1], NewArgType);
5645 Args[2] = Builder.CreateBitCast(Args[2], NewArgType);
5646
5647 NewCall = Builder.CreateCall(NewFn, Args);
5648 break;
5649 }
5650 assert(NewCall && "Should have either set this variable or returned through "
5651 "the default case");
5652 NewCall->takeName(CI);
5653 CI->replaceAllUsesWith(NewCall);
5654 CI->eraseFromParent();
5655}
5656
5658 assert(F && "Illegal attempt to upgrade a non-existent intrinsic.");
5659
5660 // Check if this function should be upgraded and get the replacement function
5661 // if there is one.
5662 Function *NewFn;
5663 if (UpgradeIntrinsicFunction(F, NewFn)) {
5664 // Replace all users of the old function with the new function or new
5665 // instructions. This is not a range loop because the call is deleted.
5666 for (User *U : make_early_inc_range(F->users()))
5667 if (CallBase *CB = dyn_cast<CallBase>(U))
5668 UpgradeIntrinsicCall(CB, NewFn);
5669
5670 // Remove old function, no longer used, from the module.
5671 if (F != NewFn)
5672 F->eraseFromParent();
5673 }
5674}
5675
5677 const unsigned NumOperands = MD.getNumOperands();
5678 if (NumOperands == 0)
5679 return &MD; // Invalid, punt to a verifier error.
5680
5681 // Check if the tag uses struct-path aware TBAA format.
5682 if (isa<MDNode>(MD.getOperand(0)) && NumOperands >= 3)
5683 return &MD;
5684
5685 auto &Context = MD.getContext();
5686 if (NumOperands == 3) {
5687 Metadata *Elts[] = {MD.getOperand(0), MD.getOperand(1)};
5688 MDNode *ScalarType = MDNode::get(Context, Elts);
5689 // Create a MDNode <ScalarType, ScalarType, offset 0, const>
5690 Metadata *Elts2[] = {ScalarType, ScalarType,
5693 MD.getOperand(2)};
5694 return MDNode::get(Context, Elts2);
5695 }
5696 // Create a MDNode <MD, MD, offset 0>
5698 Type::getInt64Ty(Context)))};
5699 return MDNode::get(Context, Elts);
5700}
5701
5703 Instruction *&Temp) {
5704 if (Opc != Instruction::BitCast)
5705 return nullptr;
5706
5707 Temp = nullptr;
5708 Type *SrcTy = V->getType();
5709 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5710 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5711 LLVMContext &Context = V->getContext();
5712
5713 // We have no information about target data layout, so we assume that
5714 // the maximum pointer size is 64bit.
5715 Type *MidTy = Type::getInt64Ty(Context);
5716 Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy);
5717
5718 return CastInst::Create(Instruction::IntToPtr, Temp, DestTy);
5719 }
5720
5721 return nullptr;
5722}
5723
5725 if (Opc != Instruction::BitCast)
5726 return nullptr;
5727
5728 Type *SrcTy = C->getType();
5729 if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() &&
5730 SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) {
5731 LLVMContext &Context = C->getContext();
5732
5733 // We have no information about target data layout, so we assume that
5734 // the maximum pointer size is 64bit.
5735 Type *MidTy = Type::getInt64Ty(Context);
5736
5738 DestTy);
5739 }
5740
5741 return nullptr;
5742}
5743
5744/// Check the debug info version number, if it is out-dated, drop the debug
5745/// info. Return true if module is modified.
5748 return false;
5749
5750 llvm::TimeTraceScope timeScope("Upgrade debug info");
5751 // We need to get metadata before the module is verified (i.e., getModuleFlag
5752 // makes assumptions that we haven't verified yet). Carefully extract the flag
5753 // from the metadata.
5754 unsigned Version = 0;
5755 if (NamedMDNode *ModFlags = M.getModuleFlagsMetadata()) {
5756 auto OpIt = find_if(ModFlags->operands(), [](const MDNode *Flag) {
5757 if (Flag->getNumOperands() < 3)
5758 return false;
5759 if (MDString *K = dyn_cast_or_null<MDString>(Flag->getOperand(1)))
5760 return K->getString() == "Debug Info Version";
5761 return false;
5762 });
5763 if (OpIt != ModFlags->op_end()) {
5764 const MDOperand &ValOp = (*OpIt)->getOperand(2);
5765 if (auto *CI = mdconst::dyn_extract_or_null<ConstantInt>(ValOp))
5766 Version = CI->getZExtValue();
5767 }
5768 }
5769
5771 bool BrokenDebugInfo = false;
5772 if (verifyModule(M, &llvm::errs(), &BrokenDebugInfo))
5773 report_fatal_error("Broken module found, compilation aborted!");
5774 if (!BrokenDebugInfo)
5775 // Everything is ok.
5776 return false;
5777 else {
5778 // Diagnose malformed debug info.
5780 M.getContext().diagnose(Diag);
5781 }
5782 }
5783 bool Modified = StripDebugInfo(M);
5785 // Diagnose a version mismatch.
5787 M.getContext().diagnose(DiagVersion);
5788 }
5789 return Modified;
5790}
5791
5792static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC,
5793 GlobalValue *GV, const Metadata *V) {
5794 Function *F = cast<Function>(GV);
5795
5796 constexpr StringLiteral DefaultValue = "1";
5797 StringRef Vect3[3] = {DefaultValue, DefaultValue, DefaultValue};
5798 unsigned Length = 0;
5799
5800 if (F->hasFnAttribute(Attr)) {
5801 // We expect the existing attribute to have the form "x[,y[,z]]". Here we
5802 // parse these elements placing them into Vect3
5803 StringRef S = F->getFnAttribute(Attr).getValueAsString();
5804 for (; Length < 3 && !S.empty(); Length++) {
5805 auto [Part, Rest] = S.split(',');
5806 Vect3[Length] = Part.trim();
5807 S = Rest;
5808 }
5809 }
5810
5811 const unsigned Dim = DimC - 'x';
5812 assert(Dim < 3 && "Unexpected dim char");
5813
5814 const uint64_t VInt = mdconst::extract<ConstantInt>(V)->getZExtValue();
5815
5816 // local variable required for StringRef in Vect3 to point to.
5817 const std::string VStr = llvm::utostr(VInt);
5818 Vect3[Dim] = VStr;
5819 Length = std::max(Length, Dim + 1);
5820
5821 const std::string NewAttr = llvm::join(ArrayRef(Vect3, Length), ",");
5822 F->addFnAttr(Attr, NewAttr);
5823}
5824
5825static inline bool isXYZ(StringRef S) {
5826 return S == "x" || S == "y" || S == "z";
5827}
5828
5830 const Metadata *V) {
5831 if (K == "kernel") {
5833 cast<Function>(GV)->setCallingConv(CallingConv::PTX_Kernel);
5834 return true;
5835 }
5836 if (K == "align") {
5837 // V is a bitfeild specifying two 16-bit values. The alignment value is
5838 // specfied in low 16-bits, The index is specified in the high bits. For the
5839 // index, 0 indicates the return value while higher values correspond to
5840 // each parameter (idx = param + 1).
5841 const uint64_t AlignIdxValuePair =
5842 mdconst::extract<ConstantInt>(V)->getZExtValue();
5843 const unsigned Idx = (AlignIdxValuePair >> 16);
5844 const Align StackAlign = Align(AlignIdxValuePair & 0xFFFF);
5845 cast<Function>(GV)->addAttributeAtIndex(
5846 Idx, Attribute::getWithStackAlignment(GV->getContext(), StackAlign));
5847 return true;
5848 }
5849 if (K == "maxclusterrank" || K == "cluster_max_blocks") {
5850 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5851 cast<Function>(GV)->addFnAttr("nvvm.maxclusterrank", llvm::utostr(CV));
5852 return true;
5853 }
5854 if (K == "minctasm") {
5855 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5856 cast<Function>(GV)->addFnAttr("nvvm.minctasm", llvm::utostr(CV));
5857 return true;
5858 }
5859 if (K == "maxnreg") {
5860 const auto CV = mdconst::extract<ConstantInt>(V)->getZExtValue();
5861 cast<Function>(GV)->addFnAttr("nvvm.maxnreg", llvm::utostr(CV));
5862 return true;
5863 }
5864 if (K.consume_front("maxntid") && isXYZ(K)) {
5865 upgradeNVVMFnVectorAttr("nvvm.maxntid", K[0], GV, V);
5866 return true;
5867 }
5868 if (K.consume_front("reqntid") && isXYZ(K)) {
5869 upgradeNVVMFnVectorAttr("nvvm.reqntid", K[0], GV, V);
5870 return true;
5871 }
5872 if (K.consume_front("cluster_dim_") && isXYZ(K)) {
5873 upgradeNVVMFnVectorAttr("nvvm.cluster_dim", K[0], GV, V);
5874 return true;
5875 }
5876 if (K == "grid_constant") {
5877 const auto Attr = Attribute::get(GV->getContext(), "nvvm.grid_constant");
5878 for (const auto &Op : cast<MDNode>(V)->operands()) {
5879 // For some reason, the index is 1-based in the metadata. Good thing we're
5880 // able to auto-upgrade it!
5881 const auto Index = mdconst::extract<ConstantInt>(Op)->getZExtValue() - 1;
5882 cast<Function>(GV)->addParamAttr(Index, Attr);
5883 }
5884 return true;
5885 }
5886
5887 return false;
5888}
5889
5891 NamedMDNode *NamedMD = M.getNamedMetadata("nvvm.annotations");
5892 if (!NamedMD)
5893 return;
5894
5895 SmallVector<MDNode *, 8> NewNodes;
5897 for (MDNode *MD : NamedMD->operands()) {
5898 if (!SeenNodes.insert(MD).second)
5899 continue;
5900
5901 auto *GV = mdconst::dyn_extract_or_null<GlobalValue>(MD->getOperand(0));
5902 if (!GV)
5903 continue;
5904
5905 assert((MD->getNumOperands() % 2) == 1 && "Invalid number of operands");
5906
5907 SmallVector<Metadata *, 8> NewOperands{MD->getOperand(0)};
5908 // Each nvvm.annotations metadata entry will be of the following form:
5909 // !{ ptr @gv, !"key1", value1, !"key2", value2, ... }
5910 // start index = 1, to skip the global variable key
5911 // increment = 2, to skip the value for each property-value pairs
5912 for (unsigned j = 1, je = MD->getNumOperands(); j < je; j += 2) {
5913 MDString *K = cast<MDString>(MD->getOperand(j));
5914 const MDOperand &V = MD->getOperand(j + 1);
5915 bool Upgraded = upgradeSingleNVVMAnnotation(GV, K->getString(), V);
5916 if (!Upgraded)
5917 NewOperands.append({K, V});
5918 }
5919
5920 if (NewOperands.size() > 1)
5921 NewNodes.push_back(MDNode::get(M.getContext(), NewOperands));
5922 }
5923
5924 NamedMD->clearOperands();
5925 for (MDNode *N : NewNodes)
5926 NamedMD->addOperand(N);
5927}
5928
5929/// This checks for objc retain release marker which should be upgraded. It
5930/// returns true if module is modified.
5932 bool Changed = false;
5933 const char *MarkerKey = "clang.arc.retainAutoreleasedReturnValueMarker";
5934 NamedMDNode *ModRetainReleaseMarker = M.getNamedMetadata(MarkerKey);
5935 if (ModRetainReleaseMarker) {
5936 MDNode *Op = ModRetainReleaseMarker->getOperand(0);
5937 if (Op) {
5938 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(0));
5939 if (ID) {
5940 SmallVector<StringRef, 4> ValueComp;
5941 ID->getString().split(ValueComp, "#");
5942 if (ValueComp.size() == 2) {
5943 std::string NewValue = ValueComp[0].str() + ";" + ValueComp[1].str();
5944 ID = MDString::get(M.getContext(), NewValue);
5945 }
5946 M.addModuleFlag(Module::Error, MarkerKey, ID);
5947 M.eraseNamedMetadata(ModRetainReleaseMarker);
5948 Changed = true;
5949 }
5950 }
5951 }
5952 return Changed;
5953}
5954
5956 // This lambda converts normal function calls to ARC runtime functions to
5957 // intrinsic calls.
5958 auto UpgradeToIntrinsic = [&](const char *OldFunc,
5959 llvm::Intrinsic::ID IntrinsicFunc) {
5960 Function *Fn = M.getFunction(OldFunc);
5961
5962 if (!Fn)
5963 return;
5964
5965 Function *NewFn =
5966 llvm::Intrinsic::getOrInsertDeclaration(&M, IntrinsicFunc);
5967
5968 for (User *U : make_early_inc_range(Fn->users())) {
5970 if (!CI || CI->getCalledFunction() != Fn)
5971 continue;
5972
5973 IRBuilder<> Builder(CI->getParent(), CI->getIterator());
5974 FunctionType *NewFuncTy = NewFn->getFunctionType();
5976
5977 // Don't upgrade the intrinsic if it's not valid to bitcast the return
5978 // value to the return type of the old function.
5979 if (NewFuncTy->getReturnType() != CI->getType() &&
5980 !CastInst::castIsValid(Instruction::BitCast, CI,
5981 NewFuncTy->getReturnType()))
5982 continue;
5983
5984 bool InvalidCast = false;
5985
5986 for (unsigned I = 0, E = CI->arg_size(); I != E; ++I) {
5987 Value *Arg = CI->getArgOperand(I);
5988
5989 // Bitcast argument to the parameter type of the new function if it's
5990 // not a variadic argument.
5991 if (I < NewFuncTy->getNumParams()) {
5992 // Don't upgrade the intrinsic if it's not valid to bitcast the argument
5993 // to the parameter type of the new function.
5994 if (!CastInst::castIsValid(Instruction::BitCast, Arg,
5995 NewFuncTy->getParamType(I))) {
5996 InvalidCast = true;
5997 break;
5998 }
5999 Arg = Builder.CreateBitCast(Arg, NewFuncTy->getParamType(I));
6000 }
6001 Args.push_back(Arg);
6002 }
6003
6004 if (InvalidCast)
6005 continue;
6006
6007 // Create a call instruction that calls the new function.
6008 CallInst *NewCall = Builder.CreateCall(NewFuncTy, NewFn, Args);
6009 NewCall->setTailCallKind(cast<CallInst>(CI)->getTailCallKind());
6010 NewCall->takeName(CI);
6011
6012 // Bitcast the return value back to the type of the old call.
6013 Value *NewRetVal = Builder.CreateBitCast(NewCall, CI->getType());
6014
6015 if (!CI->use_empty())
6016 CI->replaceAllUsesWith(NewRetVal);
6017 CI->eraseFromParent();
6018 }
6019
6020 if (Fn->use_empty())
6021 Fn->eraseFromParent();
6022 };
6023
6024 // Unconditionally convert a call to "clang.arc.use" to a call to
6025 // "llvm.objc.clang.arc.use".
6026 UpgradeToIntrinsic("clang.arc.use", llvm::Intrinsic::objc_clang_arc_use);
6027
6028 // Upgrade the retain release marker. If there is no need to upgrade
6029 // the marker, that means either the module is already new enough to contain
6030 // new intrinsics or it is not ARC. There is no need to upgrade runtime call.
6032 return;
6033
6034 std::pair<const char *, llvm::Intrinsic::ID> RuntimeFuncs[] = {
6035 {"objc_autorelease", llvm::Intrinsic::objc_autorelease},
6036 {"objc_autoreleasePoolPop", llvm::Intrinsic::objc_autoreleasePoolPop},
6037 {"objc_autoreleasePoolPush", llvm::Intrinsic::objc_autoreleasePoolPush},
6038 {"objc_autoreleaseReturnValue",
6039 llvm::Intrinsic::objc_autoreleaseReturnValue},
6040 {"objc_copyWeak", llvm::Intrinsic::objc_copyWeak},
6041 {"objc_destroyWeak", llvm::Intrinsic::objc_destroyWeak},
6042 {"objc_initWeak", llvm::Intrinsic::objc_initWeak},
6043 {"objc_loadWeak", llvm::Intrinsic::objc_loadWeak},
6044 {"objc_loadWeakRetained", llvm::Intrinsic::objc_loadWeakRetained},
6045 {"objc_moveWeak", llvm::Intrinsic::objc_moveWeak},
6046 {"objc_release", llvm::Intrinsic::objc_release},
6047 {"objc_retain", llvm::Intrinsic::objc_retain},
6048 {"objc_retainAutorelease", llvm::Intrinsic::objc_retainAutorelease},
6049 {"objc_retainAutoreleaseReturnValue",
6050 llvm::Intrinsic::objc_retainAutoreleaseReturnValue},
6051 {"objc_retainAutoreleasedReturnValue",
6052 llvm::Intrinsic::objc_retainAutoreleasedReturnValue},
6053 {"objc_retainBlock", llvm::Intrinsic::objc_retainBlock},
6054 {"objc_storeStrong", llvm::Intrinsic::objc_storeStrong},
6055 {"objc_storeWeak", llvm::Intrinsic::objc_storeWeak},
6056 {"objc_unsafeClaimAutoreleasedReturnValue",
6057 llvm::Intrinsic::objc_unsafeClaimAutoreleasedReturnValue},
6058 {"objc_retainedObject", llvm::Intrinsic::objc_retainedObject},
6059 {"objc_unretainedObject", llvm::Intrinsic::objc_unretainedObject},
6060 {"objc_unretainedPointer", llvm::Intrinsic::objc_unretainedPointer},
6061 {"objc_retain_autorelease", llvm::Intrinsic::objc_retain_autorelease},
6062 {"objc_sync_enter", llvm::Intrinsic::objc_sync_enter},
6063 {"objc_sync_exit", llvm::Intrinsic::objc_sync_exit},
6064 {"objc_arc_annotation_topdown_bbstart",
6065 llvm::Intrinsic::objc_arc_annotation_topdown_bbstart},
6066 {"objc_arc_annotation_topdown_bbend",
6067 llvm::Intrinsic::objc_arc_annotation_topdown_bbend},
6068 {"objc_arc_annotation_bottomup_bbstart",
6069 llvm::Intrinsic::objc_arc_annotation_bottomup_bbstart},
6070 {"objc_arc_annotation_bottomup_bbend",
6071 llvm::Intrinsic::objc_arc_annotation_bottomup_bbend}};
6072
6073 for (auto &I : RuntimeFuncs)
6074 UpgradeToIntrinsic(I.first, I.second);
6075}
6076
6078 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6079 if (!ModFlags)
6080 return false;
6081
6082 bool HasObjCFlag = false, HasClassProperties = false, Changed = false;
6083 bool HasSwiftVersionFlag = false;
6084 uint8_t SwiftMajorVersion, SwiftMinorVersion;
6085 uint32_t SwiftABIVersion;
6086 auto Int8Ty = Type::getInt8Ty(M.getContext());
6087 auto Int32Ty = Type::getInt32Ty(M.getContext());
6088
6089 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6090 MDNode *Op = ModFlags->getOperand(I);
6091 if (Op->getNumOperands() != 3)
6092 continue;
6093 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6094 if (!ID)
6095 continue;
6096 auto SetBehavior = [&](Module::ModFlagBehavior B) {
6097 Metadata *Ops[3] = {ConstantAsMetadata::get(ConstantInt::get(
6098 Type::getInt32Ty(M.getContext()), B)),
6099 MDString::get(M.getContext(), ID->getString()),
6100 Op->getOperand(2)};
6101 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6102 Changed = true;
6103 };
6104
6105 if (ID->getString() == "Objective-C Image Info Version")
6106 HasObjCFlag = true;
6107 if (ID->getString() == "Objective-C Class Properties")
6108 HasClassProperties = true;
6109 // Upgrade PIC from Error/Max to Min.
6110 if (ID->getString() == "PIC Level") {
6111 if (auto *Behavior =
6113 uint64_t V = Behavior->getLimitedValue();
6114 if (V == Module::Error || V == Module::Max)
6115 SetBehavior(Module::Min);
6116 }
6117 }
6118 // Upgrade "PIE Level" from Error to Max.
6119 if (ID->getString() == "PIE Level")
6120 if (auto *Behavior =
6122 if (Behavior->getLimitedValue() == Module::Error)
6123 SetBehavior(Module::Max);
6124
6125 // Upgrade branch protection and return address signing module flags. The
6126 // module flag behavior for these fields were Error and now they are Min.
6127 if (ID->getString() == "branch-target-enforcement" ||
6128 ID->getString().starts_with("sign-return-address")) {
6129 if (auto *Behavior =
6131 if (Behavior->getLimitedValue() == Module::Error) {
6132 Type *Int32Ty = Type::getInt32Ty(M.getContext());
6133 Metadata *Ops[3] = {
6134 ConstantAsMetadata::get(ConstantInt::get(Int32Ty, Module::Min)),
6135 Op->getOperand(1), Op->getOperand(2)};
6136 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6137 Changed = true;
6138 }
6139 }
6140 }
6141
6142 // Upgrade Objective-C Image Info Section. Removed the whitespce in the
6143 // section name so that llvm-lto will not complain about mismatching
6144 // module flags that is functionally the same.
6145 if (ID->getString() == "Objective-C Image Info Section") {
6146 if (auto *Value = dyn_cast_or_null<MDString>(Op->getOperand(2))) {
6147 SmallVector<StringRef, 4> ValueComp;
6148 Value->getString().split(ValueComp, " ");
6149 if (ValueComp.size() != 1) {
6150 std::string NewValue;
6151 for (auto &S : ValueComp)
6152 NewValue += S.str();
6153 Metadata *Ops[3] = {Op->getOperand(0), Op->getOperand(1),
6154 MDString::get(M.getContext(), NewValue)};
6155 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6156 Changed = true;
6157 }
6158 }
6159 }
6160
6161 // IRUpgrader turns a i32 type "Objective-C Garbage Collection" into i8 value.
6162 // If the higher bits are set, it adds new module flag for swift info.
6163 if (ID->getString() == "Objective-C Garbage Collection") {
6164 auto Md = dyn_cast<ConstantAsMetadata>(Op->getOperand(2));
6165 if (Md) {
6166 assert(Md->getValue() && "Expected non-empty metadata");
6167 auto Type = Md->getValue()->getType();
6168 if (Type == Int8Ty)
6169 continue;
6170 unsigned Val = Md->getValue()->getUniqueInteger().getZExtValue();
6171 if ((Val & 0xff) != Val) {
6172 HasSwiftVersionFlag = true;
6173 SwiftABIVersion = (Val & 0xff00) >> 8;
6174 SwiftMajorVersion = (Val & 0xff000000) >> 24;
6175 SwiftMinorVersion = (Val & 0xff0000) >> 16;
6176 }
6177 Metadata *Ops[3] = {
6179 Op->getOperand(1),
6180 ConstantAsMetadata::get(ConstantInt::get(Int8Ty,Val & 0xff))};
6181 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6182 Changed = true;
6183 }
6184 }
6185
6186 if (ID->getString() == "amdgpu_code_object_version") {
6187 Metadata *Ops[3] = {
6188 Op->getOperand(0),
6189 MDString::get(M.getContext(), "amdhsa_code_object_version"),
6190 Op->getOperand(2)};
6191 ModFlags->setOperand(I, MDNode::get(M.getContext(), Ops));
6192 Changed = true;
6193 }
6194 }
6195
6196 // "Objective-C Class Properties" is recently added for Objective-C. We
6197 // upgrade ObjC bitcodes to contain a "Objective-C Class Properties" module
6198 // flag of value 0, so we can correclty downgrade this flag when trying to
6199 // link an ObjC bitcode without this module flag with an ObjC bitcode with
6200 // this module flag.
6201 if (HasObjCFlag && !HasClassProperties) {
6202 M.addModuleFlag(llvm::Module::Override, "Objective-C Class Properties",
6203 (uint32_t)0);
6204 Changed = true;
6205 }
6206
6207 if (HasSwiftVersionFlag) {
6208 M.addModuleFlag(Module::Error, "Swift ABI Version",
6209 SwiftABIVersion);
6210 M.addModuleFlag(Module::Error, "Swift Major Version",
6211 ConstantInt::get(Int8Ty, SwiftMajorVersion));
6212 M.addModuleFlag(Module::Error, "Swift Minor Version",
6213 ConstantInt::get(Int8Ty, SwiftMinorVersion));
6214 Changed = true;
6215 }
6216
6217 return Changed;
6218}
6219
6221 auto TrimSpaces = [](StringRef Section) -> std::string {
6222 SmallVector<StringRef, 5> Components;
6223 Section.split(Components, ',');
6224
6225 SmallString<32> Buffer;
6226 raw_svector_ostream OS(Buffer);
6227
6228 for (auto Component : Components)
6229 OS << ',' << Component.trim();
6230
6231 return std::string(OS.str().substr(1));
6232 };
6233
6234 for (auto &GV : M.globals()) {
6235 if (!GV.hasSection())
6236 continue;
6237
6238 StringRef Section = GV.getSection();
6239
6240 if (!Section.starts_with("__DATA, __objc_catlist"))
6241 continue;
6242
6243 // __DATA, __objc_catlist, regular, no_dead_strip
6244 // __DATA,__objc_catlist,regular,no_dead_strip
6245 GV.setSection(TrimSpaces(Section));
6246 }
6247}
6248
6249namespace {
6250// Prior to LLVM 10.0, the strictfp attribute could be used on individual
6251// callsites within a function that did not also have the strictfp attribute.
6252// Since 10.0, if strict FP semantics are needed within a function, the
6253// function must have the strictfp attribute and all calls within the function
6254// must also have the strictfp attribute. This latter restriction is
6255// necessary to prevent unwanted libcall simplification when a function is
6256// being cloned (such as for inlining).
6257//
6258// The "dangling" strictfp attribute usage was only used to prevent constant
6259// folding and other libcall simplification. The nobuiltin attribute on the
6260// callsite has the same effect.
6261struct StrictFPUpgradeVisitor : public InstVisitor<StrictFPUpgradeVisitor> {
6262 StrictFPUpgradeVisitor() = default;
6263
6264 void visitCallBase(CallBase &Call) {
6265 if (!Call.isStrictFP())
6266 return;
6268 return;
6269 // If we get here, the caller doesn't have the strictfp attribute
6270 // but this callsite does. Replace the strictfp attribute with nobuiltin.
6271 Call.removeFnAttr(Attribute::StrictFP);
6272 Call.addFnAttr(Attribute::NoBuiltin);
6273 }
6274};
6275
6276/// Replace "amdgpu-unsafe-fp-atomics" metadata with atomicrmw metadata
6277struct AMDGPUUnsafeFPAtomicsUpgradeVisitor
6278 : public InstVisitor<AMDGPUUnsafeFPAtomicsUpgradeVisitor> {
6279 AMDGPUUnsafeFPAtomicsUpgradeVisitor() = default;
6280
6281 void visitAtomicRMWInst(AtomicRMWInst &RMW) {
6282 if (!RMW.isFloatingPointOperation())
6283 return;
6284
6285 MDNode *Empty = MDNode::get(RMW.getContext(), {});
6286 RMW.setMetadata("amdgpu.no.fine.grained.host.memory", Empty);
6287 RMW.setMetadata("amdgpu.no.remote.memory.access", Empty);
6288 RMW.setMetadata("amdgpu.ignore.denormal.mode", Empty);
6289 }
6290};
6291} // namespace
6292
6294 // If a function definition doesn't have the strictfp attribute,
6295 // convert any callsite strictfp attributes to nobuiltin.
6296 if (!F.isDeclaration() && !F.hasFnAttribute(Attribute::StrictFP)) {
6297 StrictFPUpgradeVisitor SFPV;
6298 SFPV.visit(F);
6299 }
6300
6301 // Remove all incompatibile attributes from function.
6302 F.removeRetAttrs(AttributeFuncs::typeIncompatible(
6303 F.getReturnType(), F.getAttributes().getRetAttrs()));
6304 for (auto &Arg : F.args())
6305 Arg.removeAttrs(
6306 AttributeFuncs::typeIncompatible(Arg.getType(), Arg.getAttributes()));
6307
6308 // Older versions of LLVM treated an "implicit-section-name" attribute
6309 // similarly to directly setting the section on a Function.
6310 if (Attribute A = F.getFnAttribute("implicit-section-name");
6311 A.isValid() && A.isStringAttribute()) {
6312 F.setSection(A.getValueAsString());
6313 F.removeFnAttr("implicit-section-name");
6314 }
6315
6316 if (!F.empty()) {
6317 // For some reason this is called twice, and the first time is before any
6318 // instructions are loaded into the body.
6319
6320 if (Attribute A = F.getFnAttribute("amdgpu-unsafe-fp-atomics");
6321 A.isValid()) {
6322
6323 if (A.getValueAsBool()) {
6324 AMDGPUUnsafeFPAtomicsUpgradeVisitor Visitor;
6325 Visitor.visit(F);
6326 }
6327
6328 // We will leave behind dead attribute uses on external declarations, but
6329 // clang never added these to declarations anyway.
6330 F.removeFnAttr("amdgpu-unsafe-fp-atomics");
6331 }
6332 }
6333}
6334
6335// Check if the function attribute is not present and set it.
6337 StringRef Value) {
6338 if (!F.hasFnAttribute(FnAttrName))
6339 F.addFnAttr(FnAttrName, Value);
6340}
6341
6342// Check if the function attribute is not present and set it if needed.
6343// If the attribute is "false" then removes it.
6344// If the attribute is "true" resets it to a valueless attribute.
6345static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName) {
6346 if (!F.hasFnAttribute(FnAttrName)) {
6347 if (Set)
6348 F.addFnAttr(FnAttrName);
6349 } else {
6350 auto A = F.getFnAttribute(FnAttrName);
6351 if ("false" == A.getValueAsString())
6352 F.removeFnAttr(FnAttrName);
6353 else if ("true" == A.getValueAsString()) {
6354 F.removeFnAttr(FnAttrName);
6355 F.addFnAttr(FnAttrName);
6356 }
6357 }
6358}
6359
6361 Triple T(M.getTargetTriple());
6362 if (!T.isThumb() && !T.isARM() && !T.isAArch64())
6363 return;
6364
6365 uint64_t BTEValue = 0;
6366 uint64_t BPPLRValue = 0;
6367 uint64_t GCSValue = 0;
6368 uint64_t SRAValue = 0;
6369 uint64_t SRAALLValue = 0;
6370 uint64_t SRABKeyValue = 0;
6371
6372 NamedMDNode *ModFlags = M.getModuleFlagsMetadata();
6373 if (ModFlags) {
6374 for (unsigned I = 0, E = ModFlags->getNumOperands(); I != E; ++I) {
6375 MDNode *Op = ModFlags->getOperand(I);
6376 if (Op->getNumOperands() != 3)
6377 continue;
6378
6379 MDString *ID = dyn_cast_or_null<MDString>(Op->getOperand(1));
6380 auto *CI = mdconst::dyn_extract<ConstantInt>(Op->getOperand(2));
6381 if (!ID || !CI)
6382 continue;
6383
6384 StringRef IDStr = ID->getString();
6385 uint64_t *ValPtr = IDStr == "branch-target-enforcement" ? &BTEValue
6386 : IDStr == "branch-protection-pauth-lr" ? &BPPLRValue
6387 : IDStr == "guarded-control-stack" ? &GCSValue
6388 : IDStr == "sign-return-address" ? &SRAValue
6389 : IDStr == "sign-return-address-all" ? &SRAALLValue
6390 : IDStr == "sign-return-address-with-bkey"
6391 ? &SRABKeyValue
6392 : nullptr;
6393 if (!ValPtr)
6394 continue;
6395
6396 *ValPtr = CI->getZExtValue();
6397 if (*ValPtr == 2)
6398 return;
6399 }
6400 }
6401
6402 bool BTE = BTEValue == 1;
6403 bool BPPLR = BPPLRValue == 1;
6404 bool GCS = GCSValue == 1;
6405 bool SRA = SRAValue == 1;
6406
6407 StringRef SignTypeValue = "non-leaf";
6408 if (SRA && SRAALLValue == 1)
6409 SignTypeValue = "all";
6410
6411 StringRef SignKeyValue = "a_key";
6412 if (SRA && SRABKeyValue == 1)
6413 SignKeyValue = "b_key";
6414
6415 for (Function &F : M.getFunctionList()) {
6416 if (F.isDeclaration())
6417 continue;
6418
6419 if (SRA) {
6420 setFunctionAttrIfNotSet(F, "sign-return-address", SignTypeValue);
6421 setFunctionAttrIfNotSet(F, "sign-return-address-key", SignKeyValue);
6422 } else {
6423 if (auto A = F.getFnAttribute("sign-return-address");
6424 A.isValid() && "none" == A.getValueAsString()) {
6425 F.removeFnAttr("sign-return-address");
6426 F.removeFnAttr("sign-return-address-key");
6427 }
6428 }
6429 ConvertFunctionAttr(F, BTE, "branch-target-enforcement");
6430 ConvertFunctionAttr(F, BPPLR, "branch-protection-pauth-lr");
6431 ConvertFunctionAttr(F, GCS, "guarded-control-stack");
6432 }
6433
6434 if (BTE)
6435 M.setModuleFlag(llvm::Module::Min, "branch-target-enforcement", 2);
6436 if (BPPLR)
6437 M.setModuleFlag(llvm::Module::Min, "branch-protection-pauth-lr", 2);
6438 if (GCS)
6439 M.setModuleFlag(llvm::Module::Min, "guarded-control-stack", 2);
6440 if (SRA) {
6441 M.setModuleFlag(llvm::Module::Min, "sign-return-address", 2);
6442 if (SRAALLValue == 1)
6443 M.setModuleFlag(llvm::Module::Min, "sign-return-address-all", 2);
6444 if (SRABKeyValue == 1)
6445 M.setModuleFlag(llvm::Module::Min, "sign-return-address-with-bkey", 2);
6446 }
6447}
6448
6449static bool isOldLoopArgument(Metadata *MD) {
6450 auto *T = dyn_cast_or_null<MDTuple>(MD);
6451 if (!T)
6452 return false;
6453 if (T->getNumOperands() < 1)
6454 return false;
6455 auto *S = dyn_cast_or_null<MDString>(T->getOperand(0));
6456 if (!S)
6457 return false;
6458 return S->getString().starts_with("llvm.vectorizer.");
6459}
6460
6462 StringRef OldPrefix = "llvm.vectorizer.";
6463 assert(OldTag.starts_with(OldPrefix) && "Expected old prefix");
6464
6465 if (OldTag == "llvm.vectorizer.unroll")
6466 return MDString::get(C, "llvm.loop.interleave.count");
6467
6468 return MDString::get(
6469 C, (Twine("llvm.loop.vectorize.") + OldTag.drop_front(OldPrefix.size()))
6470 .str());
6471}
6472
6474 auto *T = dyn_cast_or_null<MDTuple>(MD);
6475 if (!T)
6476 return MD;
6477 if (T->getNumOperands() < 1)
6478 return MD;
6479 auto *OldTag = dyn_cast_or_null<MDString>(T->getOperand(0));
6480 if (!OldTag)
6481 return MD;
6482 if (!OldTag->getString().starts_with("llvm.vectorizer."))
6483 return MD;
6484
6485 // This has an old tag. Upgrade it.
6487 Ops.reserve(T->getNumOperands());
6488 Ops.push_back(upgradeLoopTag(T->getContext(), OldTag->getString()));
6489 for (unsigned I = 1, E = T->getNumOperands(); I != E; ++I)
6490 Ops.push_back(T->getOperand(I));
6491
6492 return MDTuple::get(T->getContext(), Ops);
6493}
6494
6496 auto *T = dyn_cast<MDTuple>(&N);
6497 if (!T)
6498 return &N;
6499
6500 if (none_of(T->operands(), isOldLoopArgument))
6501 return &N;
6502
6504 Ops.reserve(T->getNumOperands());
6505 for (Metadata *MD : T->operands())
6506 Ops.push_back(upgradeLoopArgument(MD));
6507
6508 return MDTuple::get(T->getContext(), Ops);
6509}
6510
6512 Triple T(TT);
6513 // The only data layout upgrades needed for pre-GCN, SPIR or SPIRV are setting
6514 // the address space of globals to 1. This does not apply to SPIRV Logical.
6515 if ((T.isSPIR() || (T.isSPIRV() && !T.isSPIRVLogical())) &&
6516 !DL.contains("-G") && !DL.starts_with("G")) {
6517 return DL.empty() ? std::string("G1") : (DL + "-G1").str();
6518 }
6519
6520 if (T.isLoongArch64() || T.isRISCV64()) {
6521 // Make i32 a native type for 64-bit LoongArch and RISC-V.
6522 auto I = DL.find("-n64-");
6523 if (I != StringRef::npos)
6524 return (DL.take_front(I) + "-n32:64-" + DL.drop_front(I + 5)).str();
6525 return DL.str();
6526 }
6527
6528 // AMDGPU data layout upgrades.
6529 std::string Res = DL.str();
6530 if (T.isAMDGPU()) {
6531 // Define address spaces for constants.
6532 if (!DL.contains("-G") && !DL.starts_with("G"))
6533 Res.append(Res.empty() ? "G1" : "-G1");
6534
6535 // AMDGCN data layout upgrades.
6536 if (T.isAMDGCN()) {
6537
6538 // Add missing non-integral declarations.
6539 // This goes before adding new address spaces to prevent incoherent string
6540 // values.
6541 if (!DL.contains("-ni") && !DL.starts_with("ni"))
6542 Res.append("-ni:7:8:9");
6543 // Update ni:7 to ni:7:8:9.
6544 if (DL.ends_with("ni:7"))
6545 Res.append(":8:9");
6546 if (DL.ends_with("ni:7:8"))
6547 Res.append(":9");
6548
6549 // Add sizing for address spaces 7 and 8 (fat raw buffers and buffer
6550 // resources) An empty data layout has already been upgraded to G1 by now.
6551 if (!DL.contains("-p7") && !DL.starts_with("p7"))
6552 Res.append("-p7:160:256:256:32");
6553 if (!DL.contains("-p8") && !DL.starts_with("p8"))
6554 Res.append("-p8:128:128:128:48");
6555 constexpr StringRef OldP8("-p8:128:128-");
6556 if (DL.contains(OldP8))
6557 Res.replace(Res.find(OldP8), OldP8.size(), "-p8:128:128:128:48-");
6558 if (!DL.contains("-p9") && !DL.starts_with("p9"))
6559 Res.append("-p9:192:256:256:32");
6560 }
6561
6562 // Upgrade the ELF mangling mode.
6563 if (!DL.contains("m:e"))
6564 Res = Res.empty() ? "m:e" : "m:e-" + Res;
6565
6566 return Res;
6567 }
6568
6569 if (T.isSystemZ() && !DL.empty()) {
6570 // Make sure the stack alignment is present.
6571 if (!DL.contains("-S64"))
6572 return "E-S64" + DL.drop_front(1).str();
6573 return DL.str();
6574 }
6575
6576 auto AddPtr32Ptr64AddrSpaces = [&DL, &Res]() {
6577 // If the datalayout matches the expected format, add pointer size address
6578 // spaces to the datalayout.
6579 StringRef AddrSpaces{"-p270:32:32-p271:32:32-p272:64:64"};
6580 if (!DL.contains(AddrSpaces)) {
6582 Regex R("^([Ee]-m:[a-z](-p:32:32)?)(-.*)$");
6583 if (R.match(Res, &Groups))
6584 Res = (Groups[1] + AddrSpaces + Groups[3]).str();
6585 }
6586 };
6587
6588 // AArch64 data layout upgrades.
6589 if (T.isAArch64()) {
6590 // Add "-Fn32"
6591 if (!DL.empty() && !DL.contains("-Fn32"))
6592 Res.append("-Fn32");
6593 AddPtr32Ptr64AddrSpaces();
6594 return Res;
6595 }
6596
6597 if (T.isSPARC() || (T.isMIPS64() && !DL.contains("m:m")) || T.isPPC64() ||
6598 T.isWasm()) {
6599 // Mips64 with o32 ABI did not add "-i128:128".
6600 // Add "-i128:128"
6601 std::string I64 = "-i64:64";
6602 std::string I128 = "-i128:128";
6603 if (!StringRef(Res).contains(I128)) {
6604 size_t Pos = Res.find(I64);
6605 if (Pos != size_t(-1))
6606 Res.insert(Pos + I64.size(), I128);
6607 }
6608 }
6609
6610 if (T.isPPC() && T.isOSAIX() && !DL.contains("f64:32:64") && !DL.empty()) {
6611 size_t Pos = Res.find("-S128");
6612 if (Pos == StringRef::npos)
6613 Pos = Res.size();
6614 Res.insert(Pos, "-f64:32:64");
6615 }
6616
6617 if (!T.isX86())
6618 return Res;
6619
6620 AddPtr32Ptr64AddrSpaces();
6621
6622 // i128 values need to be 16-byte-aligned. LLVM already called into libgcc
6623 // for i128 operations prior to this being reflected in the data layout, and
6624 // clang mostly produced LLVM IR that already aligned i128 to 16 byte
6625 // boundaries, so although this is a breaking change, the upgrade is expected
6626 // to fix more IR than it breaks.
6627 // Intel MCU is an exception and uses 4-byte-alignment.
6628 if (!T.isOSIAMCU()) {
6629 std::string I128 = "-i128:128";
6630 if (StringRef Ref = Res; !Ref.contains(I128)) {
6632 Regex R("^(e(-[mpi][^-]*)*)((-[^mpi][^-]*)*)$");
6633 if (R.match(Res, &Groups))
6634 Res = (Groups[1] + I128 + Groups[3]).str();
6635 }
6636 }
6637
6638 // For 32-bit MSVC targets, raise the alignment of f80 values to 16 bytes.
6639 // Raising the alignment is safe because Clang did not produce f80 values in
6640 // the MSVC environment before this upgrade was added.
6641 if (T.isWindowsMSVCEnvironment() && !T.isArch64Bit()) {
6642 StringRef Ref = Res;
6643 auto I = Ref.find("-f80:32-");
6644 if (I != StringRef::npos)
6645 Res = (Ref.take_front(I) + "-f80:128-" + Ref.drop_front(I + 8)).str();
6646 }
6647
6648 return Res;
6649}
6650
6651void llvm::UpgradeAttributes(AttrBuilder &B) {
6652 StringRef FramePointer;
6653 Attribute A = B.getAttribute("no-frame-pointer-elim");
6654 if (A.isValid()) {
6655 // The value can be "true" or "false".
6656 FramePointer = A.getValueAsString() == "true" ? "all" : "none";
6657 B.removeAttribute("no-frame-pointer-elim");
6658 }
6659 if (B.contains("no-frame-pointer-elim-non-leaf")) {
6660 // The value is ignored. "no-frame-pointer-elim"="true" takes priority.
6661 if (FramePointer != "all")
6662 FramePointer = "non-leaf";
6663 B.removeAttribute("no-frame-pointer-elim-non-leaf");
6664 }
6665 if (!FramePointer.empty())
6666 B.addAttribute("frame-pointer", FramePointer);
6667
6668 A = B.getAttribute("null-pointer-is-valid");
6669 if (A.isValid()) {
6670 // The value can be "true" or "false".
6671 bool NullPointerIsValid = A.getValueAsString() == "true";
6672 B.removeAttribute("null-pointer-is-valid");
6673 if (NullPointerIsValid)
6674 B.addAttribute(Attribute::NullPointerIsValid);
6675 }
6676}
6677
6678void llvm::UpgradeOperandBundles(std::vector<OperandBundleDef> &Bundles) {
6679 // clang.arc.attachedcall bundles are now required to have an operand.
6680 // If they don't, it's okay to drop them entirely: when there is an operand,
6681 // the "attachedcall" is meaningful and required, but without an operand,
6682 // it's just a marker NOP. Dropping it merely prevents an optimization.
6683 erase_if(Bundles, [&](OperandBundleDef &OBD) {
6684 return OBD.getTag() == "clang.arc.attachedcall" &&
6685 OBD.inputs().empty();
6686 });
6687}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU address space definition.
AMDGPU Register Bank Select
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
This file contains the simple types necessary to represent the attributes associated with functions a...
static Value * upgradeX86VPERMT2Intrinsics(IRBuilder<> &Builder, CallBase &CI, bool ZeroMask, bool IndexForm)
static Metadata * upgradeLoopArgument(Metadata *MD)
static bool isXYZ(StringRef S)
static bool upgradeIntrinsicFunction1(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords)
static Value * upgradeX86PSLLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXSharedClusterIntrinsic(Function *F, StringRef Name)
static bool upgradeRetainReleaseMarker(Module &M)
This checks for objc retain release marker which should be upgraded.
static Value * upgradeX86vpcom(IRBuilder<> &Builder, CallBase &CI, unsigned Imm, bool IsSigned)
static Value * upgradeMaskToInt(IRBuilder<> &Builder, CallBase &CI)
static bool convertIntrinsicValidType(StringRef Name, const FunctionType *FuncTy)
static Value * upgradeX86Rotate(IRBuilder<> &Builder, CallBase &CI, bool IsRotateRight)
static bool upgradeX86MultiplyAddBytes(Function *F, Intrinsic::ID IID, Function *&NewFn)
static void setFunctionAttrIfNotSet(Function &F, StringRef FnAttrName, StringRef Value)
static Intrinsic::ID shouldUpgradeNVPTXBF16Intrinsic(StringRef Name)
static bool upgradeSingleNVVMAnnotation(GlobalValue *GV, StringRef K, const Metadata *V)
static MDNode * unwrapMAVOp(CallBase *CI, unsigned Op)
Helper to unwrap intrinsic call MetadataAsValue operands.
static MDString * upgradeLoopTag(LLVMContext &C, StringRef OldTag)
static void upgradeNVVMFnVectorAttr(const StringRef Attr, const char DimC, GlobalValue *GV, const Metadata *V)
static bool upgradeX86MaskedFPCompare(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeX86ALIGNIntrinsics(IRBuilder<> &Builder, Value *Op0, Value *Op1, Value *Shift, Value *Passthru, Value *Mask, bool IsVALIGN)
static Value * upgradeAbs(IRBuilder<> &Builder, CallBase &CI)
static Value * emitX86Select(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeAArch64IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedMove(IRBuilder<> &Builder, CallBase &CI)
static bool upgradeX86IntrinsicFunction(Function *F, StringRef Name, Function *&NewFn)
static Value * applyX86MaskOn1BitsVec(IRBuilder<> &Builder, Value *Vec, Value *Mask)
static bool consumeNVVMPtrAddrSpace(StringRef &Name)
static bool shouldUpgradeX86Intrinsic(Function *F, StringRef Name)
static Value * upgradeX86PSRLDQIntrinsics(IRBuilder<> &Builder, Value *Op, unsigned Shift)
static Intrinsic::ID shouldUpgradeNVPTXTMAG2SIntrinsics(Function *F, StringRef Name)
static bool isOldLoopArgument(Metadata *MD)
static Value * upgradeARMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86IntrinsicsWith8BitMask(Function *F, Intrinsic::ID IID, Function *&NewFn)
static Value * upgradeVectorSplice(CallBase *CI, IRBuilder<> &Builder)
static Value * upgradeAMDGCNIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeMaskedLoad(IRBuilder<> &Builder, Value *Ptr, Value *Passthru, Value *Mask, bool Aligned)
static Metadata * unwrapMAVMetadataOp(CallBase *CI, unsigned Op)
Helper to unwrap Metadata MetadataAsValue operands, such as the Value field.
static bool upgradeX86BF16Intrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeArmOrAarch64IntrinsicFunction(bool IsArm, Function *F, StringRef Name, Function *&NewFn)
static Value * getX86MaskVec(IRBuilder<> &Builder, Value *Mask, unsigned NumElts)
static Value * emitX86ScalarSelect(IRBuilder<> &Builder, Value *Mask, Value *Op0, Value *Op1)
static Value * upgradeX86ConcatShift(IRBuilder<> &Builder, CallBase &CI, bool IsShiftRight, bool ZeroMask)
static void rename(GlobalValue *GV)
static bool upgradePTESTIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static bool upgradeX86BF16DPIntrinsic(Function *F, Intrinsic::ID IID, Function *&NewFn)
static cl::opt< bool > DisableAutoUpgradeDebugInfo("disable-auto-upgrade-debug-info", cl::desc("Disable autoupgrade of debug info"))
static Value * upgradeMaskedCompare(IRBuilder<> &Builder, CallBase &CI, unsigned CC, bool Signed)
static Value * upgradeX86BinaryIntrinsics(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static Value * upgradeNVVMIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static Value * upgradeX86MaskedShift(IRBuilder<> &Builder, CallBase &CI, Intrinsic::ID IID)
static bool upgradeAVX512MaskToSelect(StringRef Name, IRBuilder<> &Builder, CallBase &CI, Value *&Rep)
static void upgradeDbgIntrinsicToDbgRecord(StringRef Name, CallBase *CI)
Convert debug intrinsic calls to non-instruction debug records.
static void ConvertFunctionAttr(Function &F, bool Set, StringRef FnAttrName)
static Value * upgradePMULDQ(IRBuilder<> &Builder, CallBase &CI, bool IsSigned)
static Value * upgradeMaskedStore(IRBuilder<> &Builder, Value *Ptr, Value *Data, Value *Mask, bool Aligned)
static Value * upgradeConvertIntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static bool upgradeX86MultiplyAddWords(Function *F, Intrinsic::ID IID, Function *&NewFn)
static MDNode * getDebugLocSafe(const Instruction *I)
static Value * upgradeX86IntrinsicCall(StringRef Name, CallBase *CI, Function *F, IRBuilder<> &Builder)
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
This file contains the declarations for the subclasses of Constant, which represent the different fla...
This file contains constants used for implementing Dwarf debug support.
Module.h This file contains the declarations for the Module class.
const AbstractManglingParser< Derived, Alloc >::OperatorInfo AbstractManglingParser< Derived, Alloc >::Ops[]
static bool isZero(Value *V, const DataLayout &DL, DominatorTree *DT, AssumptionCache *AC)
Definition Lint.cpp:539
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
#define R2(n)
This file contains the declarations for metadata subclasses.
#define T
#define T1
NVPTX address space definition.
uint64_t High
static unsigned getNumElements(Type *Ty)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:487
This file contains some functions that are useful when dealing with strings.
This file implements the StringSwitch template, which mimics a switch() statement whose cases are str...
static SymbolRef::Type getType(const Symbol *Sym)
Definition TapiFile.cpp:39
LocallyHashedType DenseMapInfo< LocallyHashedType >::Empty
static const X86InstrFMA3Group Groups[]
Value * RHS
Value * LHS
Class for arbitrary precision integers.
Definition APInt.h:78
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
Class to represent array types.
static LLVM_ABI ArrayType * get(Type *ElementType, uint64_t NumElements)
This static method is the primary way to construct an ArrayType.
Type * getElementType() const
an instruction that atomically reads a memory location, combines it with another value,...
void setVolatile(bool V)
Specify whether this is a volatile RMW or not.
BinOp
This enumeration lists the possible modifications atomicrmw can make.
@ FAdd
*p = old + v
@ USubCond
Subtract only if no unsigned overflow.
@ USubSat
*p = usub.sat(old, v) usub.sat matches the behavior of llvm.usub.sat.
@ UIncWrap
Increment one up to a maximum value.
@ FMin
*p = minnum(old, v) minnum matches the behavior of llvm.minnum.
@ FMax
*p = maxnum(old, v) maxnum matches the behavior of llvm.maxnum.
@ UDecWrap
Decrement one until a minimum value or zero.
bool isFloatingPointOperation() const
Functions, function parameters, and return types can have attributes to indicate how they should be t...
Definition Attributes.h:69
static LLVM_ABI Attribute getWithStackAlignment(LLVMContext &Context, Align Alignment)
static LLVM_ABI Attribute get(LLVMContext &Context, AttrKind Kind, uint64_t Val=0)
Return a uniquified Attribute object.
Base class for all callable instructions (InvokeInst and CallInst) Holds everything related to callin...
LLVM_ABI void getOperandBundlesAsDefs(SmallVectorImpl< OperandBundleDef > &Defs) const
Return the list of operand bundles attached to this instruction as a vector of OperandBundleDefs.
Function * getCalledFunction() const
Returns the function called, or null if this is an indirect function invocation or the function signa...
CallingConv::ID getCallingConv() const
Value * getCalledOperand() const
void setAttributes(AttributeList A)
Set the attributes for this call.
Value * getArgOperand(unsigned i) const
FunctionType * getFunctionType() const
LLVM_ABI Intrinsic::ID getIntrinsicID() const
Returns the intrinsic ID of the intrinsic called or Intrinsic::not_intrinsic if the called function i...
iterator_range< User::op_iterator > args()
Iteration adapter for range-for loops.
void setCalledOperand(Value *V)
unsigned arg_size() const
AttributeList getAttributes() const
Return the attributes for this call.
void setCalledFunction(Function *Fn)
Sets the function called, including updating the function type.
This class represents a function call, abstracting a target machine's calling convention.
void setTailCallKind(TailCallKind TCK)
static LLVM_ABI CastInst * Create(Instruction::CastOps, Value *S, Type *Ty, const Twine &Name="", InsertPosition InsertBefore=nullptr)
Provides a way to construct any of the CastInst subclasses using an opcode instead of the subclass's ...
static LLVM_ABI bool castIsValid(Instruction::CastOps op, Type *SrcTy, Type *DstTy)
This method can be used to determine if a cast from SrcTy to DstTy using Opcode op is valid or not.
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_SLT
signed less than
Definition InstrTypes.h:705
@ ICMP_SLE
signed less or equal
Definition InstrTypes.h:706
@ ICMP_UGE
unsigned greater or equal
Definition InstrTypes.h:700
@ ICMP_UGT
unsigned greater than
Definition InstrTypes.h:699
@ ICMP_SGT
signed greater than
Definition InstrTypes.h:703
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
@ ICMP_NE
not equal
Definition InstrTypes.h:698
@ ICMP_SGE
signed greater or equal
Definition InstrTypes.h:704
@ ICMP_ULE
unsigned less or equal
Definition InstrTypes.h:702
static LLVM_ABI ConstantAggregateZero * get(Type *Ty)
static LLVM_ABI Constant * get(ArrayType *T, ArrayRef< Constant * > V)
static ConstantAsMetadata * get(Constant *C)
Definition Metadata.h:536
static LLVM_ABI Constant * getIntToPtr(Constant *C, Type *Ty, bool OnlyIfReduced=false)
static LLVM_ABI Constant * getPointerCast(Constant *C, Type *Ty)
Create a BitCast, AddrSpaceCast, or a PtrToInt cast constant expression.
static LLVM_ABI Constant * getPtrToInt(Constant *C, Type *Ty, bool OnlyIfReduced=false)
This is the shared class of boolean and integer constants.
Definition Constants.h:87
bool isZero() const
This is just a convenience method to make client code smaller for a common code.
Definition Constants.h:219
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
static LLVM_ABI ConstantPointerNull * get(PointerType *T)
Static factory methods - Return objects of the specified value.
static LLVM_ABI Constant * get(StructType *T, ArrayRef< Constant * > V)
static LLVM_ABI ConstantTokenNone * get(LLVMContext &Context)
Return the ConstantTokenNone.
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getAllOnesValue(Type *Ty)
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
DWARF expression.
static LLVM_ABI DIExpression * append(const DIExpression *Expr, ArrayRef< uint64_t > Ops)
Append the opcodes Ops to DIExpr.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DbgLabelRecord * createUnresolvedDbgLabelRecord(MDNode *Label, MDNode *DL)
For use during parsing; creates a DbgLabelRecord from as-of-yet unresolved MDNodes.
Base class for non-instruction debug metadata records that have positions within IR.
static LLVM_ABI DbgVariableRecord * createUnresolvedDbgVariableRecord(LocationType Type, Metadata *Val, MDNode *Variable, MDNode *Expression, MDNode *AssignID, Metadata *Address, MDNode *AddressExpression, MDNode *DI)
Used to create DbgVariableRecords during parsing, where some metadata references may still be unresol...
Diagnostic information for debug metadata version reporting.
Diagnostic information for stripping invalid debug metadata.
Convenience struct for specifying and reasoning about fast-math flags.
Definition FMF.h:22
void setApproxFunc(bool B=true)
Definition FMF.h:93
static LLVM_ABI FixedVectorType * get(Type *ElementType, unsigned NumElts)
Definition Type.cpp:802
Class to represent function types.
Type * getParamType(unsigned i) const
Parameter type accessors.
Type * getReturnType() const
static LLVM_ABI FunctionType * get(Type *Result, ArrayRef< Type * > Params, bool isVarArg)
This static method is the primary way of constructing a FunctionType.
static Function * Create(FunctionType *Ty, LinkageTypes Linkage, unsigned AddrSpace, const Twine &N="", Module *M=nullptr)
Definition Function.h:166
FunctionType * getFunctionType() const
Returns the FunctionType for me.
Definition Function.h:209
Intrinsic::ID getIntrinsicID() const LLVM_READONLY
getIntrinsicID - This method returns the ID number of the specified function, or Intrinsic::not_intri...
Definition Function.h:244
const Function & getFunction() const
Definition Function.h:164
void eraseFromParent()
eraseFromParent - This method unlinks 'this' from the containing module and deletes it.
Definition Function.cpp:451
size_t arg_size() const
Definition Function.h:899
Type * getReturnType() const
Returns the type of the ret val.
Definition Function.h:214
Argument * getArg(unsigned i) const
Definition Function.h:884
LinkageTypes getLinkage() const
Type * getValueType() const
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
PointerType * getPtrTy(unsigned AddrSpace=0)
Fetch the type representing a pointer.
Definition IRBuilder.h:605
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2745
Base class for instruction visitors.
Definition InstVisitor.h:78
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI void setMetadata(unsigned KindID, MDNode *Node)
Set the metadata of the specified kind to the specified node.
LLVM_ABI FastMathFlags getFastMathFlags() const LLVM_READONLY
Convenience function for getting all the fast-math flags, which must be an operator which supports th...
LLVM_ABI void copyMetadata(const Instruction &SrcInst, ArrayRef< unsigned > WL=ArrayRef< unsigned >())
Copy metadata from SrcInst to this instruction.
LLVM_ABI const DataLayout & getDataLayout() const
Get the data layout of the module this instruction belongs to.
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
LLVM_ABI MDNode * createRange(const APInt &Lo, const APInt &Hi)
Return metadata describing the range [Lo, Hi).
Definition MDBuilder.cpp:96
Metadata node.
Definition Metadata.h:1078
const MDOperand & getOperand(unsigned I) const
Definition Metadata.h:1442
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1569
unsigned getNumOperands() const
Return number of MDNode operands.
Definition Metadata.h:1448
LLVMContext & getContext() const
Definition Metadata.h:1242
Tracking metadata reference owned by Metadata.
Definition Metadata.h:900
A single uniqued string.
Definition Metadata.h:721
static LLVM_ABI MDString * get(LLVMContext &Context, StringRef Str)
Definition Metadata.cpp:614
static MDTuple * get(LLVMContext &Context, ArrayRef< Metadata * > MDs)
Definition Metadata.h:1526
Metadata wrapper in the Value hierarchy.
Definition Metadata.h:183
static LLVM_ABI MetadataAsValue * get(LLVMContext &Context, Metadata *MD)
Definition Metadata.cpp:110
Root of the metadata hierarchy.
Definition Metadata.h:64
A Module instance is used to store all the information related to an LLVM module.
Definition Module.h:67
ModFlagBehavior
This enumeration defines the supported behaviors of module flags.
Definition Module.h:117
@ Override
Uses the specified value, regardless of the behavior or value of the other module.
Definition Module.h:138
@ Error
Emits an error if two values disagree, otherwise the resulting value is that of the operands.
Definition Module.h:120
@ Min
Takes the min of the two values, which are required to be integers.
Definition Module.h:152
@ Max
Takes the max of the two values, which are required to be integers.
Definition Module.h:149
A tuple of MDNodes.
Definition Metadata.h:1757
LLVM_ABI void setOperand(unsigned I, MDNode *New)
LLVM_ABI MDNode * getOperand(unsigned i) const
LLVM_ABI unsigned getNumOperands() const
LLVM_ABI void clearOperands()
Drop all references to this node's operands.
iterator_range< op_iterator > operands()
Definition Metadata.h:1853
LLVM_ABI void addOperand(MDNode *M)
ArrayRef< InputTy > inputs() const
StringRef getTag() const
static LLVM_ABI PoisonValue * get(Type *T)
Static factory methods - Return an 'poison' object of the specified type.
LLVM_ABI bool match(StringRef String, SmallVectorImpl< StringRef > *Matches=nullptr, std::string *Error=nullptr) const
matches - Match the regex against a given String.
Definition Regex.cpp:83
static LLVM_ABI ScalableVectorType * get(Type *ElementType, unsigned MinNumElts)
Definition Type.cpp:824
ArrayRef< int > getShuffleMask() const
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
SmallString - A SmallString is just a SmallVector with methods and accessors that make it work better...
Definition SmallString.h:26
void append(ItTy in_start, ItTy in_end)
Add the specified range to the end of the SmallVector.
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
A wrapper around a string literal that serves as a proxy for constructing global tables of StringRefs...
Definition StringRef.h:864
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
std::pair< StringRef, StringRef > split(char Separator) const
Split into two substrings around the first occurrence of a separator character.
Definition StringRef.h:712
static constexpr size_t npos
Definition StringRef.h:57
constexpr StringRef substr(size_t Start, size_t N=npos) const
Return a reference to the substring from [Start, Start + N).
Definition StringRef.h:573
bool starts_with(StringRef Prefix) const
Check if this string starts with the given Prefix.
Definition StringRef.h:261
constexpr bool empty() const
empty - Check if the string is empty.
Definition StringRef.h:143
StringRef drop_front(size_t N=1) const
Return a StringRef equal to 'this' but with the first N elements dropped.
Definition StringRef.h:611
constexpr size_t size() const
size - Get the string size.
Definition StringRef.h:146
StringRef trim(char Char) const
Return string with consecutive Char characters starting from the left and right removed.
Definition StringRef.h:826
A switch()-like statement whose cases are string literals.
StringSwitch & Case(StringLiteral S, T Value)
StringSwitch & StartsWith(StringLiteral S, T Value)
StringSwitch & Cases(std::initializer_list< StringLiteral > CaseStrings, T Value)
Class to represent struct types.
static LLVM_ABI StructType * get(LLVMContext &Context, ArrayRef< Type * > Elements, bool isPacked=false)
This static method is the primary way to create a literal StructType.
Definition Type.cpp:413
unsigned getNumElements() const
Random access to the elements.
Type * getElementType(unsigned N) const
The TimeTraceScope is a helper class to call the begin and end functions of the time trace profiler.
Triple - Helper class for working with autoconf configuration names.
Definition Triple.h:47
Twine - A lightweight data structure for efficiently representing the concatenation of temporary valu...
Definition Twine.h:82
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
static LLVM_ABI IntegerType * getInt64Ty(LLVMContext &C)
Definition Type.cpp:297
bool isVectorTy() const
True if this is an instance of VectorType.
Definition Type.h:273
static LLVM_ABI IntegerType * getInt32Ty(LLVMContext &C)
Definition Type.cpp:296
bool isFloatTy() const
Return true if this is 'float', a 32-bit IEEE fp type.
Definition Type.h:153
bool isBFloatTy() const
Return true if this is 'bfloat', a 16-bit bfloat type.
Definition Type.h:145
LLVM_ABI unsigned getPointerAddressSpace() const
Get the address space of this pointer or pointer vector type.
static LLVM_ABI IntegerType * getInt8Ty(LLVMContext &C)
Definition Type.cpp:294
Type * getScalarType() const
If this is a vector type, return the element type, otherwise return 'this'.
Definition Type.h:352
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isPtrOrPtrVectorTy() const
Return true if this is a pointer type or a vector of pointer types.
Definition Type.h:270
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
bool isFPOrFPVectorTy() const
Return true if this is a FP type or a vector of FP.
Definition Type.h:225
static LLVM_ABI Type * getFloatTy(LLVMContext &C)
Definition Type.cpp:284
static LLVM_ABI Type * getBFloatTy(LLVMContext &C)
Definition Type.cpp:283
static LLVM_ABI Type * getHalfTy(LLVMContext &C)
Definition Type.cpp:282
Value * getOperand(unsigned i) const
Definition User.h:207
unsigned getNumOperands() const
Definition User.h:229
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
LLVM_ABI void setName(const Twine &Name)
Change the name of the value.
Definition Value.cpp:397
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
iterator_range< user_iterator > users()
Definition Value.h:426
LLVM_ABI const Value * stripPointerCasts() const
Strip off pointer casts, all-zero GEPs and address space casts.
Definition Value.cpp:708
bool use_empty() const
Definition Value.h:346
bool hasName() const
Definition Value.h:262
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
Base class of all SIMD vector types.
static VectorType * getInteger(VectorType *VTy)
This static method gets a VectorType with the same number of elements as the input type,...
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
constexpr ScalarTy getFixedValue() const
Definition TypeSize.h:200
const ParentTy * getParent() const
Definition ilist_node.h:34
self_iterator getIterator()
Definition ilist_node.h:123
A raw_ostream that writes to an SmallVector or SmallString.
StringRef str() const
Return a StringRef for the vector contents.
CallInst * Call
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ LOCAL_ADDRESS
Address space for local memory.
@ FLAT_ADDRESS
Address space for flat memory.
@ PRIVATE_ADDRESS
Address space for private memory.
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ PTX_Kernel
Call to a PTX kernel. Passes all arguments in parameter space.
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
LLVM_ABI Function * getOrInsertDeclaration(Module *M, ID id, ArrayRef< Type * > Tys={})
Look up the Function declaration of the intrinsic id in the Module M.
LLVM_ABI void getIntrinsicInfoTableEntries(ID id, SmallVectorImpl< IITDescriptor > &T)
Return the IIT table descriptor for the specified intrinsic into an array of IITDescriptors.
LLVM_ABI std::optional< Function * > remangleIntrinsicFunction(Function *F)
LLVM_ABI AttributeList getAttributes(LLVMContext &C, ID id, FunctionType *FT)
Return the attributes for an intrinsic.
LLVM_ABI bool getIntrinsicSignature(Intrinsic::ID, FunctionType *FT, SmallVectorImpl< Type * > &ArgTys)
Gets the type arguments of an intrinsic call by matching type contraints specified by the ....
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract_or_null(Y &&MD)
Extract a Value from Metadata, if any, allowing null.
Definition Metadata.h:708
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > dyn_extract(Y &&MD)
Extract a Value from Metadata, if any.
Definition Metadata.h:695
std::enable_if_t< detail::IsValidPointer< X, Y >::value, X * > extract(Y &&MD)
Extract a Value from Metadata.
Definition Metadata.h:667
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
@ Offset
Definition DWP.cpp:532
@ Length
Definition DWP.cpp:532
LLVM_ABI void UpgradeIntrinsicCall(CallBase *CB, Function *NewFn)
This is the complement to the above, replacing a specific call to an intrinsic function with a call t...
LLVM_ABI void UpgradeSectionAttributes(Module &M)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1667
LLVM_ABI void UpgradeInlineAsmString(std::string *AsmStr)
Upgrade comment in call to inline asm that represents an objc retain release marker.
bool isValidAtomicOrdering(Int I)
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
FunctionAddr VTableAddr uintptr_t uintptr_t Int32Ty
Definition InstrProf.h:296
LLVM_ABI bool UpgradeIntrinsicFunction(Function *F, Function *&NewFn, bool CanUpgradeDebugIntrinsicsToRecords=true)
This is a more granular function that simply checks an intrinsic function for upgrading,...
LLVM_ABI MDNode * upgradeInstructionLoopAttachment(MDNode &N)
Upgrade the loop attachment metadata node.
auto dyn_cast_if_present(const Y &Val)
dyn_cast_if_present<X> - Functionally identical to dyn_cast, except that a null (or none in the case ...
Definition Casting.h:732
LLVM_ABI void UpgradeAttributes(AttrBuilder &B)
Upgrade attributes that changed format or kind.
LLVM_ABI void UpgradeCallsToIntrinsic(Function *F)
This is an auto-upgrade hook for any old intrinsic function syntaxes which need to have both the func...
LLVM_ABI void UpgradeNVVMAnnotations(Module &M)
Convert legacy nvvm.annotations metadata to appropriate function attributes.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:632
LLVM_ABI bool UpgradeModuleFlags(Module &M)
This checks for module flags which should be upgraded.
std::string utostr(uint64_t X, bool isNeg=false)
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition MathExtras.h:284
void copyModuleAttrToFunctions(Module &M)
Copies module attributes to the functions in the module.
Op::Description Desc
LLVM_ABI void UpgradeOperandBundles(std::vector< OperandBundleDef > &OperandBundles)
Upgrade operand bundles (without knowing about their user instruction).
LLVM_ABI Constant * UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy)
This is an auto-upgrade for bitcast constant expression between pointers with different address space...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
FunctionAddr VTableAddr uintptr_t uintptr_t Version
Definition InstrProf.h:302
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
LLVM_ABI std::string UpgradeDataLayoutString(StringRef DL, StringRef Triple)
Upgrade the datalayout string by adding a section for address space pointers.
bool none_of(R &&Range, UnaryPredicate P)
Provide wrappers to std::none_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1751
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
LLVM_ABI GlobalVariable * UpgradeGlobalVariable(GlobalVariable *GV)
This checks for global variables which should be upgraded.
LLVM_ABI raw_fd_ostream & errs()
This returns a reference to a raw_ostream for standard error.
LLVM_ABI bool StripDebugInfo(Module &M)
Strip debug info in the module if it exists.
AtomicOrdering
Atomic ordering for LLVM's memory model.
@ Ref
The access may reference the value stored in memory.
Definition ModRef.h:32
std::string join(IteratorT Begin, IteratorT End, StringRef Separator)
Joins the strings in the range [Begin, End), adding Separator between the elements.
FunctionAddr VTableAddr uintptr_t uintptr_t Data
Definition InstrProf.h:189
OperandBundleDefT< Value * > OperandBundleDef
Definition AutoUpgrade.h:34
LLVM_ABI Instruction * UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, Instruction *&Temp)
This is an auto-upgrade for bitcast between pointers with different address spaces: the instruction i...
DWARFExpression::Operation Op
@ Dynamic
Denotes mode unknown at compile time.
ArrayRef(const T &OneElt) -> ArrayRef< T >
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
auto find_if(R &&Range, UnaryPredicate P)
Provide wrappers to std::find_if which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1770
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition STLExtras.h:2182
LLVM_ABI bool UpgradeDebugInfo(Module &M)
Check the debug info version number, if it is out-dated, drop the debug info.
LLVM_ABI void UpgradeFunctionAttributes(Function &F)
Correct any IR that is relying on old function attribute behavior.
@ Default
The result values are uniform if and only if all operands are uniform.
Definition Uniformity.h:20
LLVM_ABI MDNode * UpgradeTBAANode(MDNode &TBAANode)
If the given TBAA tag uses the scalar TBAA format, create a new node corresponding to the upgrade to ...
LLVM_ABI void UpgradeARCRuntime(Module &M)
Convert calls to ARC runtime functions to intrinsic calls and upgrade the old retain release marker t...
@ DEBUG_METADATA_VERSION
Definition Metadata.h:54
LLVM_ABI bool verifyModule(const Module &M, raw_ostream *OS=nullptr, bool *BrokenDebugInfo=nullptr)
Check a module for errors.
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition Alignment.h:39
This struct is a compact representation of a valid (power of two) or undefined (0) alignment.
Definition Alignment.h:106